I am currenlty computing glm
models off a huge data data set. Both glm
and even speedglm
take days to compute.
I currently have ar
If you can get the coefficients can't you just roll your own? This would not be a dataset size issue
# ex. data
n = 2000
dat <- data.frame( dv = sample(0:1, size = n, rep = TRUE),
iv1 = sample(1:10, size = n, rep = TRUE),
iv2 = sample(1:10, size = n, rep = TRUE),
iv3 = sample(1:10, size = n, rep = TRUE),
iv4 = sample(0:10, size = n, rep = TRUE),
iv5 = as.factor(sample(0:1, size = n, rep = TRUE)),
x = sample(1:100, size = n, rep = TRUE),
y = as.factor(sample(0:1, size = n, rep = TRUE)),
ff1 = as.factor(sample(1:15, size = n, rep = TRUE)),
ff2 = as.factor(sample(1:100, size = n, rep = TRUE))
)
mod1 <- glm(formula = dv ~
iv1 + iv2 + iv3+
iv4 + iv5 +
x * y +
ff1 + ff2,
family = binomial(link = "probit"), data = dat)
# coefficients for x, y and their interaction
x1 <- coef(mod1)['x']
y1 <- coef(mod1)['y1']
xy <- coef(mod1)['x:y1']
x <- 1:100
a <- x1*x
b <- x1*x + y1 + xy*x
plot(a~x, type= 'line', col = 'red', xlim = c(0,max(x)), ylim = range(c(a, b)))
lines(b~x, col = 'blue')
legend('topright', c('y = 0', 'y = 1'), col = c('red', 'blue'))