I have a similar question this the one on this thread: Using R, replace all values in a matrix <0.1 with 0?
But in my case I have hypothetically larger dataset and va
One option is data.table
library(data.table)
nm1 <- paste0("IND", rep(letters[1:2], length(seqIndividuals)),
rep(seqIndividuals, each = 2))
setDT(alFrequ)
for(j in seq_along(nm1)) {
alFrequ[, nm1[j] := A2
][runif(.N, 0, 1) < MAF , nm1[j] := A1][]
}
set.seed(24)
alFrequ <- data.frame(SNP= paste0('rs', sample(600000, 340000, replace=FALSE)),
A1 = sample(c("G", "T", "A", "C"), 340000, replace=TRUE),
A2 = sample(c("G", "T", "A", "C"), 340000, replace=TRUE),
MAF = runif(340000, 0, 1), stringsAsFactors=FALSE)
nm1 <- paste0("IND", rep(letters[1:2], length(seqIndividuals)),
rep(seqIndividuals, each = 2))
system.time({
setDT(alFrequ)
for(j in seq_along(nm1)){
alFrequ[, nm1[j] := A2][runif(.N, 0, 1) < MAF , nm1[j] := A1][]
}
})
# user system elapsed
# 10.72 1.05 11.76
and using the OP's code on the original dataset
system.time({
for(i in seqIndividuals) {
alFrequ[paste("IND",i,"a",sep="")] = ifelse(runif(length(alFrequ$SNP),0.00,1.00) <
alFrequ$MAF, alFrequ$A1, alFrequ$A2)
alFrequ[paste("IND",i,"b",sep="")] = ifelse(runif(length(alFrequ$SNP),0.00,1.00) <
alFrequ$MAF, alFrequ$A1, alFrequ$A2)
}
})
# user system elapsed
# 72.16 6.82 79.33