问题
So using the Matching
Package (Link to package here)
We can work through a modified GenMatch
example.
library(Matching)
data(lalonde)
#introduce an id vaiable
lalonde$ID <- 1:length(lalonde$age)
X = cbind(lalonde$age, lalonde$educ, lalonde$black, lalonde$hisp,
lalonde$married, lalonde$nodegr, lalonde$u74, lalonde$u75,
lalonde$re75, lalonde$re74)
BalanceMat <- cbind(lalonde$age, lalonde$educ, lalonde$black,
lalonde$hisp, lalonde$married, lalonde$nodegr,
lalonde$u74, lalonde$u75, lalonde$re75, lalonde$re74,
I(lalonde$re74*lalonde$re75))
genout <- GenMatch(Tr=lalonde$treat, X=X, BalanceMatrix=BalanceMat, estimand="ATE",
pop.size=16, max.generations=10, wait.generations=1)
mout <- Match(Y=NULL, Tr=lalonde$treat, X=X,
Weight.matrix=genout,
replace=TRUE, ties=FALSE)
# here we set ties FALSE so we only have 1-1 Matching
summary(mout)
#now lets create our "Matched dataset"
treated <- lalonde[mout$index.treated,]
# and introduce an indetity variable for each pair
treated$Pair_ID <- treated$ID
non.treated <- lalonde[mout$index.control,]
non.treated$Pair_ID <- treated$ID
matched.data <- rbind(treated, non.treated)
matched.data <- matched.data[order(matched.data$Pair_ID),]
#this outputs which of the non-treated ID was paired with the first person
matched.data$ID[matched.data$Pair_ID==1 & matched.data$treat==0]
We see that for the data, the ID=1 is matched with ID=193
Now lets introduce some randomisation into the order of the data and see if we get the same pairs
n <- 500
P1 <- rep(NA, n)
P2 <- rep(NA, n)
P3 <- rep(NA, n)
P4 <- rep(NA, n)
P5 <- rep(NA, n)
P6 <- rep(NA, n)
P7 <- rep(NA, n)
for (i in 1:n) {
lalonde <- lalonde[sample(1:nrow(lalonde)), ] # randomise order
genout <- GenMatch(Tr=lalonde$treat, X=X, BalanceMatrix=BalanceMat, estimand="ATE",
pop.size=16, max.generations=10, wait.generations=1)
mout <- Match(Y=NULL, Tr=lalonde$treat, X=X,
Weight.matrix=genout,
replace=TRUE, ties=FALSE)
summary(mout)
treated <- lalonde[mout$index.treated,]
treated$Pair_ID <- treated$ID
non.treated <- lalonde[mout$index.control,]
non.treated$Pair_ID <- treated$ID
matched.data <- rbind(treated, non.treated)
matched.data <- matched.data[order(matched.data$Pair_ID),]
P1[i] <- matched.data$ID[matched.data$Pair_ID==1 & matched.data$treat==0]
P2[i] <- matched.data$ID[matched.data$Pair_ID==2 & matched.data$treat==0]
P3[i] <- matched.data$ID[matched.data$Pair_ID==3 & matched.data$treat==0]
P4[i] <- matched.data$ID[matched.data$Pair_ID==4 & matched.data$treat==0]
P5[i] <- matched.data$ID[matched.data$Pair_ID==5 & matched.data$treat==0]
P6[i] <- matched.data$ID[matched.data$Pair_ID==6 & matched.data$treat==0]
P7[i] <- matched.data$ID[matched.data$Pair_ID==7 & matched.data$treat==0]
}
So the loop
will match the pairs 500 times and P1
will save the treat==0
case each time.
We then look at the which P1
appears the most, by:
plot(1:n, P1, main="P1")
OR
summary(as.factor(P1))
We see that no one treat==0
case is commonly paired.
I would expect there to be a case (possibly =193??) that is commonly paired that does not depend on the order of the data. Therefore I think my loop is wrong. Can anybody point out where? Or when they run a loop, they find, independent of the order of the data, that similar cases are paired??
回答1:
The problem is that you randomise the order of lalonde
, but your input to GenMatch
and Match
are X
and BalanceMat
which still have the original order. When you then build your matched.data
at the end, you are subsetting using indices which don't tie into lalonde
any more. Try again but including the assignment of X
and BalanceMat
in your loop.
i.e.
X = cbind(lalonde$age, lalonde$educ, lalonde$black, lalonde$hisp,
lalonde$married, lalonde$nodegr, lalonde$u74, lalonde$u75,
lalonde$re75, lalonde$re74)
BalanceMat <- cbind(lalonde$age, lalonde$educ, lalonde$black,
lalonde$hisp, lalonde$married, lalonde$nodegr,
lalonde$u74, lalonde$u75, lalonde$re75, lalonde$re74,
I(lalonde$re74*lalonde$re75))
来源:https://stackoverflow.com/questions/30957536/consistent-matched-pairs-in-r