问题
I have a model that combines a dummy and a continuous variable to describe an outcome following a disturbance. So if there was a disturbance, I have time measurements at times 1:16 following the disturbance. If there was no disturbance in the recent past, the outcome is coded to a fake time value of -1. Here's a representation of the dataset:
library(lme4)
library(ggplot2)
df <- data.frame(ID = rep(c("a", "b", "c"), each = 20),
Time = c(1:16, -1, -1, -1, -1,
1:16, -1, -1, -1, -1,
1:16, -1, -1, -1, -1))
df$y <- 2 + 0.8*df$Time + 1*df$Time^2 + rnorm(30, 0, 3)
df[df$Time < 0,]$y <- rnorm(12, 5, 3)
df[df$ID == "b",]$y <- df[df$ID == "b",]$y + 5
df[df$ID == "c",]$y <- df[df$ID == "c",]$y - 5
df$Exposure <- "Before"
df[df$Time > 0,]$Exposure <- "After"
df$Exposure <- factor(df$Exposure, levels = c("Before", "After"))
ggplot(df[df$Time > 0,]) +
geom_point(aes(x = Time, y = y, colour = ID)) +
geom_point(data = df[df$Time < 0,], aes(x = -5, y = y, colour = ID))
What I'm after is comparing "no disturbance" estimate to various times post-disturbance to see when the difference becomes significant.
Prior to modeling, assign "no disturbance" data to a time of 0.
df[df$Time < 0,]$Time <- 0
m <- lmer(y ~ Exposure + poly(Time, 2) + (1|ID), data = df)
# output estimates
newdata <- data.frame(Exposure = c("Before", "After", "After", "After", "After", "After"),
Time = c(0, 1, 4, 8, 12, 16))
newdata$Pred <- predict(m, re.form = NA, newdata = newdata)
## plot looks good
ggplot(df[df$Time > 0,]) +
geom_point(aes(x = Time, y = y, colour = ID)) +
geom_point(data = df[df$Time == 0,], aes(x = -5, y = y, colour = ID)) +
geom_line(data = newdata[newdata$Exposure == "After",],
aes(x = Time, y = Pred)) +
geom_point(data = newdata[newdata$Exposure == "Before",],
aes(x = -5, y = Pred), colour = "red")
How would I then compare, say, Before estimates to After estimates at Time==3
, Time == 6
, and Time == 9
, for example? Something like this would be great, but I can't figure out how to resolve the error I'm getting.
library(contrast)
library(multcomp)
cc <- contrast(m,
a = list(Time = 0, Exposure = "Before"),
b = list(Time = c(3, 6, 9), Exposure = "After"))
summary(glht(m, linfct = cc$X))
### UPDATE
Following rvl's excellent changes, I did a trial run on my actual data and ran into a new problem. My actual Time variable isn't an integer, but I want to make the predictions on an integer scale. When I update the toy example, the nesting seems to break:
df$Time <- df$Time + rnorm(60, 0, 0.5)
df[df$Exposure == "Before",]$Time <- -1.12
m <- lmer(y ~ Exposure + poly(Time, 2) + (1|ID), data = df)
# freshly installed emmeans from github
emm = emmeans(m, "Time", at = list(Time = c(0,3,6,9)))
emm ## no longer get the nesting info, and the preds aren't nested
In my own data (and using the at
specification, I actually only get a single line, for Time == 0
and Exposure == Before
, and that's it - nothing else in the output... any suggestions??
## UPDATE2
For some reason, the solution works with the toy example but not my own data... Here's a small subset of my dataset. The model fit is singular, but the issues I'm getting for emmeans
are the same as for my entire dataset... help?
df <- structure(list(ID = structure(c(2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 2L, 2L), .Label = c("B", "A"), class = "factor"),
Exposure = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L), .Label = c("No exposure", "Exposure"
), class = "factor"), Time = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 4.78757545912946, 9.63531173739354, 5.47889766247861,
7.17017886302881, 1.43155423003375, 3.72391354120779, 2.56353688399906,
8.29779117320654, 9.52304006615339, 9.48174174807695, 0.859601950498583,
4.63141168677387, 7.92347302279951, 7.92067346608815, 5.23250024053785,
5.57671787587839, 1.85126003367584, 3.1097216702916, 7.72389534567839,
9.36144591805227, 2.70213603445334, 1.84811002303022, 6.82448971585652,
7.88336338096561, 3.84031339520175, 5.62874085650497, 4.0972590990481,
2.09535527965164, 2.22160757456982, 7.35862943664427, 7.41826702411403,
8.24309337727667, 4.7943847267765, 5.8840472004994, 7.02963322046381
), Response = c(-7.16922413711838, 143.482571506177, 16.45347120693,
25.022565770909, -55.8024015971315, -124.925019624537, -16.4000310854958,
40.9499232825204, 2.46651714407957, -34.3558611547229, -80.1711009500979,
-58.5220697399603, 17.6390452197579, -11.2077688506688, 87.0618648836916,
113.611468732, -27.1400972587652, -30.0256851366867, -111.149731873181,
-24.2689502403869, -16.2737794106996, -125.618994529607,
95.9640135688539, 46.4163972081548, 6.72470222784859, -0.148508667228167,
-118.897875455802, 28.6093848128793, -57.5632050845714, 31.390260468939,
27.6826377837027, -40.7112943346364, -53.5934755706868, 27.0754421268185,
165.146183257597, 39.6762439690417, -9.74912218853661, 18.3454700992841,
33.8006770750647, -18.6013173700368, 12.7360264627221, 178.646948999019,
93.5496871933183, -8.68468960982507, 2.86668462850576)), row.names = c(1L,
3L, 5L, 7L, 9L, 11L, 13L, 15L, 17L, 19L, 21L, 23L, 25L, 27L,
29L, 31L, 33L, 35L, 37L, 39L, 41L, 43L, 45L, 47L, 49L, 51L, 53L,
55L, 57L, 59L, 61L, 63L, 65L, 67L, 69L, 71L, 73L, 75L, 77L, 79L,
81L, 83L, 85L, 87L, 89L), class = c("tbl_df", "tbl", "data.frame"))
Running the model and emmeans:
m <- lmer(Response ~ Exposure + poly(Time, 2) + (1|ID), data = df)
## this only gives one row instead of 8?
emmeans(m, c("Time", "Exposure"), at = list(Time = c(0,3,6,9)))
## when I specify the nesting myself, I get a "multiple actual arguments" error...
emmeans(m, c("Time", "Exposure"), at = list(Time = c(0,3,6,9)),
nesting = "Time %in% Exposure")
回答1:
After your clarification, I think this will do the trick:
require(emmeans)
emm = emmeans(m, c("Time", "Exposure"),
at = list(Time = c(0,3,6,9)))
This creates eight predictions: four for exposure "After"
at times 0, 3, 6, 0, followed by "Before
" with the same four times (note that After comes before Before in the default alphabetical ordering of factor levels). Accordingly, I think the contrasts you need are obtainable by
contrast(emm, list(
c3 = c(0, 1, 0, 0, -1, 0, 0, 0),
c6 = c(0, 0, 1, 0, -1, 0, 0, 0),
c9 = c(0, 0, 0, 1, -1, 0, 0, 0)))
Addendum
In reality, this model has a nested structure with Time
nested in Exposure
. I discovered a bug in emmeans::ref_grid
that fails to detect this nesting when the nested "factor" is a covariate rather than a regular factor. With this now fixed (you'll need to install it from the github site), this is now much simpler to do, essentially reverting to my previous version of this answer:
> emm <- emmeans(m, "Time", cov.reduce = FALSE)
NOTE: A nesting structure was detected in the fitted model:
Time %in% Exposure
Specifying cov.reduce = FALSE
asks that all unique levels of all covariates be included. Alternatively (recommended if there are other covariates laying around) is to use at = list(Time = 0:17)
.
> emm
Time Exposure emmean SE df lower.CL upper.CL
0 Before 4.54321 2.817328 2.30 -6.18006 15.26648
1 After 5.28918 2.907673 2.61 -4.80080 15.37916
2 After 8.61589 2.823986 2.32 -2.05285 19.28462
3 After 14.01341 2.776795 2.17 2.92581 25.10101
4 After 21.48175 2.755698 2.11 10.18026 32.78323
5 After 31.02091 2.751049 2.09 19.66982 42.37199
6 After 42.63088 2.754742 2.10 31.31927 53.94250
7 After 56.31168 2.760612 2.12 45.06163 67.56173
8 After 72.06329 2.764565 2.13 60.85388 83.27270
9 After 89.88572 2.764565 2.13 78.67631 101.09513
10 After 109.77897 2.760612 2.12 98.52892 121.02903
11 After 131.74304 2.754742 2.10 120.43143 143.05466
12 After 155.77793 2.751049 2.09 144.42685 167.12901
13 After 181.88363 2.755698 2.11 170.58215 193.18512
14 After 210.06015 2.776795 2.17 198.97255 221.14776
15 After 240.30750 2.823986 2.32 229.63876 250.97623
16 After 272.62565 2.907673 2.61 262.53568 282.71563
Degrees-of-freedom method: kenward-roger
Confidence level used: 0.95
Note that, though I asked for just Time
, Exposure
comes along too as kind of a "by" variable because it nests time
. Now, let's compare the first with each of the others:
> contrast(emm, "trt.vs.ctrl1")
contrast estimate SE df t.ratio p.value
1,After - 0,Before 0.74597 1.3643132 54 0.547 0.9953
2,After - 0,Before 4.07267 1.1754498 54 3.465 0.0137
3,After - 0,Before 9.47020 1.0570597 54 8.959 <.0001
4,After - 0,Before 16.93854 1.0003291 54 16.933 <.0001
5,After - 0,Before 26.47770 0.9874492 54 26.814 <.0001
6,After - 0,Before 38.08767 0.9976910 54 38.176 <.0001
7,After - 0,Before 51.76847 1.0137883 54 51.064 <.0001
8,After - 0,Before 67.52008 1.0245019 54 65.905 <.0001
9,After - 0,Before 85.34251 1.0245019 54 83.301 <.0001
10,After - 0,Before 105.23576 1.0137883 54 103.804 <.0001
11,After - 0,Before 127.19983 0.9976910 54 127.494 <.0001
12,After - 0,Before 151.23472 0.9874492 54 153.157 <.0001
13,After - 0,Before 177.34042 1.0003291 54 177.282 <.0001
14,After - 0,Before 205.51694 1.0570597 54 194.423 <.0001
15,After - 0,Before 235.76429 1.1754498 54 200.574 <.0001
16,After - 0,Before 268.08244 1.3643132 54 196.496 <.0001
P value adjustment: dunnettx method for 16 tests
Addendum 2
Re the update #2, the problem is that the nesting stuff doesn't work right unless you provide actual value that occur in the data. To illustrate (with the updated data and model):
> emmeans(m, c("Time", "Exposure"), at = list(Time = df$Time[c(1,15,25,35)]))
NOTE: A nesting structure was detected in the fitted model:
Time %in% Exposure
Time Exposure emmean SE df lower.CL upper.CL
0.000000 No exposure -1.027749 22.90015 12.81 -50.57545 48.51995
1.431554 Exposure -3.001869 29.90185 22.16 -64.98937 58.98563
5.232500 Exposure 19.464761 19.59438 5.42 -29.75007 68.67959
3.840313 Exposure 17.361564 18.56171 4.03 -34.01995 68.74308
Degrees-of-freedom method: kenward-roger
Confidence level used: 0.95
The other part with providing the nesting explicitly appears to be a bug, which I need to fix.
Here's one way to work around it all: First, obtain the reference grid for combinations of Exposure
and Time
, suppressing the nesting (that does work in calls to ref_grid()
:
rg = ref_grid(m, at = list(Time = c(0,3,6,9)), nesting = NULL)
Then pick out the ones that make sense:
emm = rg[c(1,4,6,8)]
confint(emm)
... for which you get:
Exposure Time prediction SE df lower.CL upper.CL
No exposure 0 -1.027749 22.90015 12.81 -50.57545 48.51995
Exposure 3 12.665198 18.76906 4.18 -38.57825 63.90864
Exposure 6 17.596368 19.07591 5.03 -31.35612 66.54885
Exposure 9 -10.353097 24.21000 14.49 -62.11348 41.40728
Degrees-of-freedom method: kenward-roger
Confidence level used: 0.95
Then, to get the comparisons you need:
contrast(emm, "trt.vs.ctrl1")
which produces:
contrast estimate SE df t.ratio p.value
Exposure,3 - No exposure,0 13.692947 28.36206 40.29 0.483 0.9033
Exposure,6 - No exposure,0 18.624117 28.68533 40.18 0.649 0.8257
Exposure,9 - No exposure,0 -9.325349 32.59268 40.01 -0.286 0.9669
P value adjustment: dunnettx method for 3 tests
Addendum 3
Here's a better workaround: Create a fake dataset that has the Time
values you want, and specify that dataset in the data
argument:
fakedf = df[c(1,21,23,25), ]
fakedf$Time = c(0,3,6,9)
emmeans(m, trt.vs.ctrl1 ~ Time, data = fakedf,
covnest = TRUE, cov.reduce = FALSE)
... which produces this output:
NOTE: A nesting structure was detected in the fitted model:
Time %in% Exposure
$`emmeans`
Time Exposure emmean SE df lower.CL upper.CL
0 No exposure -1.027749 22.90015 12.81 -50.57545 48.51995
3 Exposure 12.665198 18.76906 4.18 -38.57825 63.90864
6 Exposure 17.596368 19.07591 5.03 -31.35612 66.54885
9 Exposure -10.353097 24.21000 14.49 -62.11348 41.40728
Degrees-of-freedom method: kenward-roger
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
3,Exposure - 0,No exposure 13.692947 28.36206 40.29 0.483 0.9033
6,Exposure - 0,No exposure 18.624117 28.68533 40.18 0.649 0.8257
9,Exposure - 0,No exposure -9.325349 32.59268 40.01 -0.286 0.9669
P value adjustment: dunnettx method for 3 tests
来源:https://stackoverflow.com/questions/53358555/post-hoc-comparison-of-point-on-slope-to-another-group