问题
I try to do 1104 linear regressions with the same model. My independent variable does not change. However, my dependant variable does. Indeed, I have 1104 dependent variables. I can only extract all the coefficients (intercepts included), t-stats and R-square stats. I would also like to extract all the p-values lists of each coefficients within the 1104 linear regressions. How to do that with an easy way ?
Here are my codes:
run 1104 regressions for M1
bigtest<-as.data.frame(bigtest)
test <- lapply(135:1238, function(i) lm(bigtest[,i]~bigtest[,"rm"]))
reg_sq <- sapply(1:length(test),function(i) summary(test[[i]])$r.squared)
#reg_sq
coefrm <- sapply(1:length(test),function(i)summary(test[[i]])$coefficients[2,1])
intercept <- sapply(1:length(test),function(i)summary(test[[i]])$coefficients[1,1])
#betas
tstatrm <- sapply(1:length(test),function(i) summary(test[[i]])$coefficients[2,3])
tstatint <- sapply(1:length(test),function(i) summary(test[[i]])$coefficients[1,3])
#tstat
m1 <- cbind(reg_sq,coefrm,intercept,tstatrm,tstatint)
resultsM1 <- as.data.frame(m1)
回答1:
Here's a tidyverse solution in multiple parts, hopefully easier to read that way :-) I used mtcars
as a play dataset with mpg
as the invariant independent variable
library(dplyr)
library(purrr)
library(broom)
library(tibble)
# first key change is let `broom::tidy` do the hard work
test2 <- lapply(2:10, function(i) broom::tidy(lm(mtcars[,i] ~ mtcars[,"mpg"])))
names(test2) <- names(mtcars[2:10])
basic_information <-
map2_df(test2,
names(test2),
~ mutate(.x, which_dependent = .y)) %>%
mutate(term = ifelse(term == "(Intercept)", "Intercept", "mpg")) %>%
select(which_dependent, everything())
basic_information
#> # A tibble: 18 x 6
#> which_dependent term estimate std.error statistic p.value
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 cyl Intercept 11.3 0.593 19.0 2.87e-18
#> 2 cyl mpg -0.253 0.0283 -8.92 6.11e-10
#> 3 disp Intercept 581. 41.7 13.9 1.26e-14
#> 4 disp mpg -17.4 1.99 -8.75 9.38e-10
#> 5 hp Intercept 324. 27.4 11.8 8.25e-13
#> 6 hp mpg -8.83 1.31 -6.74 1.79e- 7
#> 7 drat Intercept 2.38 0.248 9.59 1.20e-10
#> 8 drat mpg 0.0604 0.0119 5.10 1.78e- 5
#> 9 wt Intercept 6.05 0.309 19.6 1.20e-18
#> 10 wt mpg -0.141 0.0147 -9.56 1.29e-10
#> 11 qsec Intercept 15.4 1.03 14.9 2.05e-15
#> 12 qsec mpg 0.124 0.0492 2.53 1.71e- 2
#> 13 vs Intercept -0.678 0.239 -2.84 8.11e- 3
#> 14 vs mpg 0.0555 0.0114 4.86 3.42e- 5
#> 15 am Intercept -0.591 0.253 -2.33 2.64e- 2
#> 16 am mpg 0.0497 0.0121 4.11 2.85e- 4
#> 17 gear Intercept 2.51 0.411 6.10 1.05e- 6
#> 18 gear mpg 0.0588 0.0196 3.00 5.40e- 3
Just to change things up a bit... we'll use map
to construct formula
y <- 'mpg'
x <- names(mtcars[2:10])
models <- map(setNames(x, x),
~ lm(as.formula(paste(.x, y, sep="~")),
data=mtcars))
pvalues <-
data.frame(rsquared = unlist(map(models, ~ summary(.)$r.squared)),
RSE = unlist(map(models, ~ summary(.)$sigma))) %>%
rownames_to_column(var = "which_dependent")
results <- full_join(basic_information, pvalues)
#> Joining, by = "which_dependent"
results
# A tibble: 18 x 8
which_dependent term estimate std.error statistic p.value rsquared RSE
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 cyl Intercept 11.3 0.593 19.0 2.87e-18 0.726 0.950
2 cyl mpg -0.253 0.0283 -8.92 6.11e-10 0.726 0.950
3 disp Intercept 581. 41.7 13.9 1.26e-14 0.718 66.9
4 disp mpg -17.4 1.99 -8.75 9.38e-10 0.718 66.9
5 hp Intercept 324. 27.4 11.8 8.25e-13 0.602 43.9
6 hp mpg -8.83 1.31 -6.74 1.79e- 7 0.602 43.9
7 drat Intercept 2.38 0.248 9.59 1.20e-10 0.464 0.398
8 drat mpg 0.0604 0.0119 5.10 1.78e- 5 0.464 0.398
9 wt Intercept 6.05 0.309 19.6 1.20e-18 0.753 0.494
10 wt mpg -0.141 0.0147 -9.56 1.29e-10 0.753 0.494
11 qsec Intercept 15.4 1.03 14.9 2.05e-15 0.175 1.65
12 qsec mpg 0.124 0.0492 2.53 1.71e- 2 0.175 1.65
13 vs Intercept -0.678 0.239 -2.84 8.11e- 3 0.441 0.383
14 vs mpg 0.0555 0.0114 4.86 3.42e- 5 0.441 0.383
15 am Intercept -0.591 0.253 -2.33 2.64e- 2 0.360 0.406
16 am mpg 0.0497 0.0121 4.11 2.85e- 4 0.360 0.406
17 gear Intercept 2.51 0.411 6.10 1.05e- 6 0.231 0.658
18 gear mpg 0.0588 0.0196 3.00 5.40e- 3 0.231 0.658
回答2:
sapply(test, function(i) summary(i)$coefficients[-1, 4])
will get you the p-values. Note, I'm assuming you don't need the intercept. Also the sapply can be written a bit more cleanly that what you've been using.
Here's a small example:
y <- c(1.03, 2.05, 2.91, 4.07)
x1 <- c(2.1, 4.3, 5.8, 7.9)
x2 <- c(43, 17, 11, 7)
x3 <- c(5.1, 6.1, 5.5, 6.8)
df <- data.frame(y, x1, x2, x3)
# Fit models
fit <- lapply(df[,-1], function(x) lm(df$y~x))
# Extract pvalues with intercept
pval <- sapply(fit, function(x) summary(x)$coefficients[,4])
pval
Output:
x1 x2 x3
(Intercept) 0.311515551 0.02163118 0.3022066
x 0.001185388 0.09842442 0.1855516
# Without intercept
pval2 <- sapply(fit, function(x) summary(x)$coefficients[-1,4])
pval2
Output:
x1 x2 x3
0.001185388 0.098424425 0.185551567
来源:https://stackoverflow.com/questions/63907925/extract-lists-of-p-values-for-each-regression-coefficients-1104-linear-regressi