问题
Lets say i have 3 response variables A,C and M and i want to fit a model for all possible models ie fit Y ~ A, Y ~ C, Y ~ M, Y ~ A * C, Y ~ A * M, Y ~ C * M, etc. Is there a quick way to do this without manually specifiying the interactions each time?
i do not want to write
M1 = glm(Y ~ A , data = subs, family = "poisson")
M2 = glm(Y ~ C , data = subs, family = "poisson")
M3 = glm(Y ~ M , data = subs, family = "poisson")
M4 = glm(Y ~ A*C , data = subs, family = "poisson")
...
In reality i have more than 3 variables and would like some sort of loop, is this even possible. Thanks
回答1:
Here is a sort of functional programming approach.
You create your data, and as long as your Y
is the first column, this code would take all the rest of the variables (no matter how many) and construct models on their combinations.
Finally, since you've done it in this framework, you can call broom's tidy
and confint_tidy
to extract the results into an easy to filter dataset.
DF <- data_frame(Y = rpois(100, 5),
A = rnorm(100),
C = rnorm(100),
M = rnorm(100))
formula_frame <- bind_rows(data_frame(V1 = names(DF[,-1])),
as_data_frame(t(combn(names(DF[,-1]),2)))) %>%
rowwise() %>%
mutate(formula_text = paste0("Y ~", if_else(is.na(V2),
V1,
paste(V1,V2, sep = "*"))),
formula_obj = list(as.formula(formula_text))) %>%
ungroup()
formula_frame %>%
mutate(fits = map(formula_obj, ~glm(.x, family = "poisson", data = DF) %>%
(function(X)bind_cols(broom::tidy(X),broom::confint_tidy((X)))))) %>%
unnest(fits) %>%
select(-formula_obj)
# A tibble: 18 x 10
V1 V2 formula_text term estimate std.error statistic p.value conf.low conf.high
<chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 A NA Y ~A (Intercept) 1.63 0.0443 36.8 6.92e-297 1.54 1.72
2 A NA Y ~A A 0.0268 0.0444 0.602 5.47e- 1 -0.0603 0.114
3 C NA Y ~C (Intercept) 1.63 0.0443 36.8 5.52e-296 1.54 1.72
4 C NA Y ~C C 0.0326 0.0466 0.699 4.84e- 1 -0.0587 0.124
5 M NA Y ~M (Intercept) 1.63 0.0454 35.8 1.21e-280 1.53 1.71
6 M NA Y ~M M -0.0291 0.0460 -0.634 5.26e- 1 -0.119 0.0615
7 A C Y ~A*C (Intercept) 1.62 0.0446 36.4 5.64e-290 1.54 1.71
8 A C Y ~A*C A 0.00814 0.0459 0.178 8.59e- 1 -0.0816 0.0982
9 A C Y ~A*C C 0.0410 0.0482 0.850 3.96e- 1 -0.0532 0.136
10 A C Y ~A*C A:C 0.0650 0.0474 1.37 1.70e- 1 -0.0270 0.158
11 A M Y ~A*M (Intercept) 1.62 0.0458 35.5 1.21e-275 1.53 1.71
12 A M Y ~A*M A 0.0232 0.0451 0.514 6.07e- 1 -0.0653 0.112
13 A M Y ~A*M M -0.0260 0.0464 -0.561 5.75e- 1 -0.116 0.0655
14 A M Y ~A*M A:M -0.00498 0.0480 -0.104 9.17e- 1 -0.0992 0.0887
15 C M Y ~C*M (Intercept) 1.60 0.0472 34.0 1.09e-253 1.51 1.70
16 C M Y ~C*M C 0.0702 0.0506 1.39 1.65e- 1 -0.0291 0.169
17 C M Y ~C*M M -0.0333 0.0479 -0.695 4.87e- 1 -0.127 0.0611
18 C M Y ~C*M C:M 0.0652 0.0377 1.73 8.39e- 2 -0.0102 0.138
回答2:
This should work:
glmulti::glmulti(
Y = "Y",
xr = c("A", "C", "M"),
data = subs,
filename = "my_results",
family = "poisson"
)
It will create a text file my_results.txt
with information about each of the models.
You can do it as a one-liner with other packages as well, leaps
, bestglm
, probably others.
回答3:
Putting all RHS variables in a v
ector and using combn
to get combinations of one and two (with lapply
). The formulae we get with reformulate
and paste
which collapses with *
. After combn
we need another lapply
to loop over the combinations.
v <- c("X1", "X2", "X3")
res <- lapply(1:2, function(n) {
.env <- environment()
cb <- combn(c("X1", "X2", "X3"), n, function(x) paste(x, collapse=" * "))
lapply(cb, function(cb) lm(reformulate(cb, "y", env=.env), dat))
})
Result
res
# [[1]]
# [[1]][[1]]
#
# Call:
# lm(formula = reformulate(cb, "y", env = .env), data = dat)
#
# Coefficients:
# (Intercept) X1
# -0.3433 0.3382
#
#
# [[1]][[2]]
#
# Call:
# lm(formula = reformulate(cb, "y", env = .env), data = dat)
#
# Coefficients:
# (Intercept) X2
# 0.008104 1.017076
#
#
# [[1]][[3]]
#
# Call:
# lm(formula = reformulate(cb, "y", env = .env), data = dat)
#
# Coefficients:
# (Intercept) X3
# 0.02774 1.04382
#
#
#
# [[2]]
# [[2]][[1]]
#
# Call:
# lm(formula = reformulate(cb, "y", env = .env), data = dat)
#
# Coefficients:
# (Intercept) X1 X2 X1:X2
# -0.577 1.408 1.157 0.296
#
#
# [[2]][[2]]
#
# Call:
# lm(formula = reformulate(cb, "y", env = .env), data = dat)
#
# Coefficients:
# (Intercept) X1 X3 X1:X3
# 0.7378 -0.6141 1.3707 -1.1076
#
#
# [[2]][[3]]
#
# Call:
# lm(formula = reformulate(cb, "y", env = .env), data = dat)
#
# Coefficients:
# (Intercept) X2 X3 X2:X3
# 0.257141 1.148571 1.290523 -0.009836
Data:
dat <- structure(list(X1 = c(1.37095844714667, -0.564698171396089, 0.363128411337339,
0.63286260496104, 0.404268323140999, -0.106124516091484, 1.51152199743894,
-0.0946590384130976, 2.01842371387704, -0.062714099052421), X2 = c(1.30486965422349,
2.28664539270111, -1.38886070111234, -0.278788766817371, -0.133321336393658,
0.635950398070074, -0.284252921416072, -2.65645542090478, -2.44046692857552,
1.32011334573019), X3 = c(-0.306638594078475, -1.78130843398,
-0.171917355759621, 1.2146746991726, 1.89519346126497, -0.4304691316062,
-0.25726938276893, -1.76316308519478, 0.460097354831271, -0.639994875960119
), y = c(2.8246396305329, 0.645476124553837, -0.162546123564699,
0.959822161909057, 2.67109557131028, -1.61765192870095, 0.185540684874441,
-5.36518513868917, -2.37615350981384, 0.653526977609908)), row.names = c(NA,
-10L), class = "data.frame")
来源:https://stackoverflow.com/questions/64104051/how-to-fit-multiple-interaction-models-in-a-loop