问题
I need to impute missing data and then coduct multinomial regression with the generated datasets. I have tried using mice for the imputing and then multinom function from nnet for the multnomial regression. But this gives me unreadable output. Here is an example using the nhanes2 dataset available with the mice package:
library(mice)
library(nnet)
test <- mice(nhanes2, meth=c('sample','pmm','logreg','norm'))
#age is categorical, bmi is continuous
m <- with(test, multinom(age ~ bmi, model = T))
summary(pool(m))
m1 <- with(test, lm(bmi ~ age, model = T))
summary(pool(m1))
With the code above, the output of 'm' lists the coefficients as 1, 2, 3 and 4 instead of their variable names. The output for m1 when using the lm command instead comes out correctly. This seems to be an issue with running the multinomial command on a mira object. Does anyone have any suggestions on how to fix this or suggestions for other imputation and multinomial regression methods that would work better?
回答1:
EDIT
This has now been updated on the development branch of mice
, and executes as expected: see https://github.com/stefvanbuuren/mice/issues/85
There are a couple of issues when using mulitnom
. The pool
method mixes up the ordering of the coefficients and standard errors - so (asaik) the object returned from using pool
is not correct (actually mixes up is a bit strong - there is no specific pool
method for multinom
models, and so it uses the default which doesn't quite work in this case) .
Also, as you mention, then names are dropped - this is due to pool
calling names(coef(modelobject))
, but multinom
models return a matrix of coefficients, and so don't have names
(they have rownames
& colnames
).
So you can alter the pool
function to cater for multinom
models - see pooly
below (actually you could write a much smaller function that just deals with this model class, but I chose to write a quick, more general approach which should hopefully not break for other model classes, but with caveat, I have not fully tested that it didn't.)
Test with your example
library(mice)
library(nnet)
test <- mice(nhanes2, meth=c('polyreg','pmm','logreg','norm'), print=0)
m <- with(test, multinom(age ~ bmi, model = T))
summary(pooly(m))
# est se t df Pr(>|t|) lo 95 hi 95 nmis fmi lambda
# 40-59:(Intercept) 5.8960594 4.5352921 1.300040 12.82882 0.21646037 -3.9151498 15.70726867 NA 0.2951384 0.1931975
# 40-59:bmi -0.2356516 0.1669807 -1.411250 12.81702 0.18198189 -0.5969156 0.12561248 NA 0.2955593 0.1935923
# 60-99:(Intercept) 8.2723321 4.7656701 1.735817 15.55876 0.10235284 -1.8537729 18.39843700 NA 0.1989371 0.1021831
# 60-99:bmi -0.3364014 0.1832718 -1.835533 15.03938 0.08627846 -0.7269469 0.05414413 NA 0.2174394 0.1198595
#
Compare to original which (I think) mixes up the coefs & se's
summary(pool(m))
Define function to accept multinom
models. The added code has comments next to it.
pooly <- function (object, method = "smallsample") {
call <- match.call()
if (!is.mira(object))
stop("The object must have class 'mira'")
m <- length(object$analyses)
fa <- getfit(object, 1)
if (m == 1) {
warning("Number of multiple imputations m=1. No pooling done.")
return(fa)
}
analyses <- getfit(object)
if (class(fa)[1] == "lme" && !requireNamespace("nlme", quietly = TRUE))
stop("Package 'nlme' needed fo this function to work. Please install it.",
call. = FALSE)
if ((class(fa)[1] == "mer" || class(fa)[1] == "lmerMod" ||
inherits(fa, "merMod")) && !requireNamespace("lme4",
quietly = TRUE))
stop("Package 'lme4' needed fo this function to work. Please install it.",
call. = FALSE)
mess <- try(coef(fa), silent = TRUE)
if (inherits(mess, "try-error"))
stop("Object has no coef() method.")
mess <- try(vcov(fa), silent = TRUE)
if (inherits(mess, "try-error"))
stop("Object has no vcov() method.")
if (class(fa)[1] == "mer" || class(fa)[1] == "lmerMod" ||
inherits(fa, "merMod")) {
k <- length(lme4::fixef(fa))
names <- names(lme4::fixef(fa))
}
else if (class(fa)[1] == "polr") {
k <- length(coef(fa)) + length(fa$zeta)
names <- c(names(coef(fa)), names(fa$zeta))
}
# added this ---------------------------------
else if (class(fa)[1] == "multinom") {
k <- length(coef(fa))
names <- rownames(vcov(fa))
}
# --------------------------------------------
else {
k <- length(coef(fa))
names <- names(coef(fa))
}
qhat <- matrix(NA, nrow = m, ncol = k, dimnames = list(seq_len(m),
names))
u <- array(NA, dim = c(m, k, k), dimnames = list(seq_len(m),
names, names))
for (i in seq_len(m)) {
fit <- analyses[[i]]
if (class(fit)[1] == "mer") {
qhat[i, ] <- lme4::fixef(fit)
ui <- as.matrix(vcov(fit))
if (ncol(ui) != ncol(qhat))
stop("Different number of parameters: class mer, fixef(fit): ",
ncol(qhat), ", as.matrix(vcov(fit)): ", ncol(ui))
u[i, , ] <- array(ui, dim = c(1, dim(ui)))
}
else if (class(fit)[1] == "lmerMod" || inherits(fa, "merMod")) {
qhat[i, ] <- lme4::fixef(fit)
ui <- vcov(fit)
if (ncol(ui) != ncol(qhat))
stop("Different number of parameters: class lmerMod, fixed(fit): ",
ncol(qhat), ", vcov(fit): ", ncol(ui))
u[i, , ] <- array(ui, dim = c(1, dim(ui)))
}
else if (class(fit)[1] == "lme") {
qhat[i, ] <- fit$coefficients$fixed
ui <- vcov(fit)
if (ncol(ui) != ncol(qhat))
stop("Different number of parameters: class lme, fit$coefficients$fixef: ",
ncol(qhat), ", vcov(fit): ", ncol(ui))
u[i, , ] <- array(ui, dim = c(1, dim(ui)))
}
else if (class(fit)[1] == "polr") {
qhat[i, ] <- c(coef(fit), fit$zeta)
ui <- vcov(fit)
if (ncol(ui) != ncol(qhat))
stop("Different number of parameters: class polr, c(coef(fit, fit$zeta): ",
ncol(qhat), ", vcov(fit): ", ncol(ui))
u[i, , ] <- array(ui, dim = c(1, dim(ui)))
}
else if (class(fit)[1] == "survreg") {
qhat[i, ] <- coef(fit)
ui <- vcov(fit)
parnames <- dimnames(ui)[[1]]
select <- !(parnames %in% "Log(scale)")
ui <- ui[select, select]
if (ncol(ui) != ncol(qhat))
stop("Different number of parameters: class survreg, coef(fit): ",
ncol(qhat), ", vcov(fit): ", ncol(ui))
u[i, , ] <- array(ui, dim = c(1, dim(ui)))
}
# added this block -------------------------------------
else if (class(fit)[1] == "multinom") {
qhat[i, ] <- c(t(coef(fit))) # transpose to get same order as standard errors
ui <- vcov(fit)
if (ncol(ui) != ncol(qhat))
stop("Different number of parameters: class multinom, c(coef(fit)): ",
ncol(qhat), ", vcov(fit): ", ncol(ui))
u[i, , ] <- array(ui, dim = c(1, dim(ui)))
}
# ----------------------------------------------------
else {
qhat[i, ] <- coef(fit)
ui <- vcov(fit)
ui <- expandvcov(qhat[i, ], ui)
if (ncol(ui) != ncol(qhat))
stop("Different number of parameters: coef(fit): ",
ncol(qhat), ", vcov(fit): ", ncol(ui))
u[i, , ] <- array(ui, dim = c(1, dim(ui)))
}
}
qbar <- apply(qhat, 2, mean)
ubar <- apply(u, c(2, 3), mean)
e <- qhat - matrix(qbar, nrow = m, ncol = k, byrow = TRUE)
b <- (t(e) %*% e)/(m - 1)
t <- ubar + (1 + 1/m) * b
r <- (1 + 1/m) * diag(b/ubar)
lambda <- (1 + 1/m) * diag(b/t)
dfcom <- df.residual(object)
df <- mice.df(m, lambda, dfcom, method)
fmi <- (r + 2/(df + 3))/(r + 1)
names(r) <- names(df) <- names(fmi) <- names(lambda) <- names
fit <- list(call = call, call1 = object$call, call2 = object$call1,
nmis = object$nmis, m = m, qhat = qhat, u = u, qbar = qbar,
ubar = ubar, b = b, t = t, r = r, dfcom = dfcom, df = df,
fmi = fmi, lambda = lambda)
oldClass(fit) <- c("mipo", oldClass(object))
return(fit)
}
environment(pooly) <- environment(mice)
来源:https://stackoverflow.com/questions/46195370/multinomial-regression-on-imputed-data-with-mice-why-are-coefficient-names-not