I am using R and RStudio for the first time to work with a very large dataset (15 million cases) with many columns of data. To facilitate analysis, I need to search a range of
We can use mtabulate
from qdapTools
library(qdapTools)
res <- cbind(df1, mtabulate(as.data.frame(t(df1)))!=0)
row.names(res) <- NULL
names(res)[-(1:3)] <- paste0("Var", names(res)[-(1:3)])
res
# Dx1 Dx2 Dx3 Var001 Var234 Var456 Var231 Var444 Var245 Var777
#1 001 234 456 TRUE TRUE TRUE FALSE FALSE FALSE FALSE
#2 231 001 444 TRUE FALSE FALSE TRUE TRUE FALSE FALSE
#3 245 777 001 TRUE FALSE FALSE FALSE FALSE TRUE TRUE
df1 <- structure(list(Dx1 = c("001", "231", "245"), Dx2 = c("234", "001",
"777"), Dx3 = c("456", "444", "001")), .Names = c("Dx1", "Dx2",
"Dx3"), row.names = c(NA, -3L), class = "data.frame")