R: repeat linear regression for all variables and save results in a new data frame

前端 未结 3 985
醉话见心
醉话见心 2021-01-28 17:03

I have a data frame named “dat” with 10 numeric variables (var1, var2,var3,var4 , var5,…var 10), each with several observations…

dat

   var1 var2 var3 var4 var         


        
3条回答
  •  -上瘾入骨i
    2021-01-28 17:40

    You can try the following code to have the desired output

    data <- structure(list(var1 = c(12L, 3L, 13L, 17L, 9L, 15L, 12L, 3L, 
    13L), var2 = c(5L, 2L, 15L, 11L, 13L, 6L, 5L, 2L, 15L), var3 = c(18L, 
    10L, 14L, 16L, 8L, 20L, 18L, 10L, 14L), var4 = c(19L, 6L, 13L, 
    18L, 8L, 17L, 19L, 6L, 13L), var5 = c(12L, 13L, 1L, 10L, 7L, 
    3L, 12L, 13L, 1L), var6 = c(17L, 17L, 17L, 17L, 17L, 17L, 17L, 
    17L, 17L), var7 = c(11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L
    ), var8 = c(16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L), var9 = c(18L, 
    18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L), var10 = c(10L, 10L, 
    10L, 10L, 10L, 10L, 10L, 10L, 10L)), class = "data.frame", row.names = c(NA, 
    -9L))
    
    head(data,2)
    #>   var1 var2 var3 var4 var5 var6 var7 var8 var9 var10
    #> 1   12    5   18   19   12   17   11   16   18    10
    #> 2    3    2   10    6   13   17   11   16   18    10
    
    x = names(data[,-1])
    out <- unlist(lapply(1, function(n) combn(x, 1, FUN=function(row) paste0("var1 ~ ", paste0(row, collapse = "+")))))
    out
    #> [1] "var1 ~ var2"  "var1 ~ var3"  "var1 ~ var4"  "var1 ~ var5" 
    #> [5] "var1 ~ var6"  "var1 ~ var7"  "var1 ~ var8"  "var1 ~ var9" 
    #> [9] "var1 ~ var10"
    
    library(broom)
    #> Warning: package 'broom' was built under R version 3.5.3
    
    library(dplyr)
    #> Warning: package 'dplyr' was built under R version 3.5.3
    #> 
    #> Attaching package: 'dplyr'
    #> The following objects are masked from 'package:stats':
    #> 
    #>     filter, lag
    #> The following objects are masked from 'package:base':
    #> 
    #>     intersect, setdiff, setequal, union
    
    #To have the regression coefficients
    tmp1 = bind_rows(lapply(out, function(frml) {
     a = tidy(lm(frml, data=data))
     a$frml = frml
     return(a)
    }))
    head(tmp1)
    #> # A tibble: 6 x 6
    #>   term        estimate std.error statistic p.value frml       
    #>                                 
    #> 1 (Intercept)    6.46      2.78      2.33  0.0529  var1 ~ var2
    #> 2 var2           0.525     0.288     1.82  0.111   var1 ~ var2
    #> 3 (Intercept)   -1.50      4.47     -0.335 0.748   var1 ~ var3
    #> 4 var3           0.863     0.303     2.85  0.0247  var1 ~ var3
    #> 5 (Intercept)    0.649     2.60      0.250 0.810   var1 ~ var4
    #> 6 var4           0.766     0.183     4.18  0.00413 var1 ~ var4
    
    #To have the regression results i.e. R2, AIC, BIC
    tmp2 = bind_rows(lapply(out, function(frml) {
     a = glance(lm(frml, data=data))
     a$frml = frml
     return(a)
    }))
    head(tmp2)
    #> # A tibble: 6 x 12
    #>   r.squared adj.r.squared sigma statistic  p.value    df logLik   AIC   BIC
    #>                               
    #> 1     0.321         0.224  4.33      3.31  0.111       2  -24.8  55.7  56.3
    #> 2     0.537         0.471  3.58      8.12  0.0247      2  -23.1  52.2  52.8
    #> 3     0.714         0.673  2.81     17.5   0.00413     2  -20.9  47.9  48.5
    #> 4     0.276         0.173  4.47      2.67  0.146       2  -25.1  56.2  56.8
    #> 5     0             0      4.92     NA    NA           1  -26.6  57.2  57.6
    #> 6     0             0      4.92     NA    NA           1  -26.6  57.2  57.6
    #> # ... with 3 more variables: deviance , df.residual , frml 
    
    write.csv(tmp1, "Try_lm_coefficients.csv")
    write.csv(tmp2, "Try_lm_results.csv")
    

    Created on 2019-11-20 by the reprex package (v0.3.0)

提交回复
热议问题