Automatically compare nested models from mice's glm.mids

你说的曾经没有我的故事 提交于 2019-12-06 11:07:30

You can use a loop to iterate through the different combinations of predictors, after arranging them in the order required for pool.compare.

So using your fake data from above - tweaked the number of categories

library(mice)
library(Hmisc)
# turn all the variables into factors
# turn all the variables into factors
fake <- nhanes
fake$age <- as.factor(nhanes$age)
fake$bmi <- cut2(nhanes$bmi, g=2) 
fake$chl <- cut2(nhanes$chl, g=2) 

# Impute
imput <- mice(fake, seed=1)

# Create models 
# - reduced models with one variable removed
# - full models with extra variables at end of expression
vars <- c("age", "bmi", "chl")

red <- combn(vars, length(vars)-1 , simplify=FALSE)
diffs <- lapply(red, function(i) setdiff(vars, i) )
(full <- lapply(1:length(red), function(i) 
                            paste(c(red[[i]], diffs[[i]]), collapse=" + ")))
#[[1]]
#[1] "age + bmi + chl"

#[[2]]
#[1] "age + chl + bmi"

#[[3]]
#[1] "bmi + chl + age"

(red <- combn(vars, length(vars)-1 , FUN=paste, collapse=" + "))
#[1] "age + bmi" "age + chl" "bmi + chl"

The models are now in the correct order to pass to the glm call. I've also replaced glm.mids method as it has been replaced by with.mids - see ?glm.mids

out <- vector("list", length(red))

for( i in 1:length(red)) {

  redMod <-  with(imput, 
               glm(formula(paste("(hyp==2) ~ ", red[[i]])), family = binomial))

  fullMod <-  with(imput, 
               glm(formula(paste("(hyp==2) ~ ", full[[i]])), family = binomial))

  out[[i]] <- list(predictors = diffs[[i]], 
                   pval = c(pool.compare(fullMod, redMod)$pvalue))
   }

do.call(rbind.data.frame, out)
#    predictors      pval
#2         chl 0.9976629
#21        bmi 0.9985028
#3         age 0.9815831

# Check manually by leaving out chl
mod1 <- with(imput, glm((hyp==2) ~ age + bmi + chl , family = binomial))
mod2 <- with(imput, glm((hyp==2) ~ age + bmi , family = binomial))
pool.compare(mod1, mod2)$pvalue
#         [,1]
#[1,] 0.9976629

You will get a lot of warnings using this dataset

EDIT

You could wrap this in a function

impGlmDrop1 <- function(vars, outcome, Data=imput,  Family="binomial") 
{

  red <- combn(vars, length(vars)-1 , simplify=FALSE)
  diffs <- lapply(red, function(i) setdiff(vars, i))
  full <- lapply(1:length(red), function(i) 
                      paste(c(red[[i]], diffs[[i]]), collapse=" + "))
  red <- combn(vars, length(vars)-1 , FUN=paste, collapse=" + ")

  out <- vector("list", length(red))
  for( i in 1:length(red)) {

  redMod <-  with(Data, 
              glm(formula(paste(outcome, red[[i]], sep="~")), family = Family))
  fullMod <-  with(Data, 
              glm(formula(paste(outcome, full[[i]], sep="~")), family = Family))
  out[[i]] <- list(predictors = diffs[[i]], 
                   pval = c(pool.compare(fullMod, redMod)$pvalue)  )
  }
  do.call(rbind.data.frame, out)
}

# Run
impGlmDrop1(c("age", "bmi", "chl"), "(hyp==2)")
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!