R: Quickest way to create dataframe with an alternative to IFELSE

前端 未结 1 1758
谎友^
谎友^ 2021-01-25 04:42

I have a similar question this the one on this thread: Using R, replace all values in a matrix <0.1 with 0?

But in my case I have hypothetically larger dataset and va

相关标签:
1条回答
  • 2021-01-25 05:46

    One option is data.table

    library(data.table)
    nm1 <- paste0("IND", rep(letters[1:2], length(seqIndividuals)), 
                        rep(seqIndividuals, each = 2))
    setDT(alFrequ)
    for(j in seq_along(nm1)) {
          alFrequ[, nm1[j] := A2
                 ][runif(.N, 0, 1) < MAF , nm1[j] := A1][]
    }
    

    Benchmarks

    set.seed(24)
    alFrequ <- data.frame(SNP= paste0('rs', sample(600000, 340000, replace=FALSE)),
                       A1 = sample(c("G", "T", "A", "C"), 340000, replace=TRUE),
                       A2 = sample(c("G", "T", "A", "C"), 340000, replace=TRUE),
                       MAF = runif(340000, 0, 1), stringsAsFactors=FALSE)
    nm1 <- paste0("IND", rep(letters[1:2], length(seqIndividuals)), 
                              rep(seqIndividuals, each = 2))
    
    system.time({
        setDT(alFrequ)
         for(j in seq_along(nm1)){
         alFrequ[, nm1[j] := A2][runif(.N, 0, 1) < MAF , nm1[j] := A1][]
       }
    })
    #   user  system elapsed 
    #  10.72    1.05   11.76 
    

    and using the OP's code on the original dataset

    system.time({
     for(i in seqIndividuals) {
       alFrequ[paste("IND",i,"a",sep="")] = ifelse(runif(length(alFrequ$SNP),0.00,1.00) < 
              alFrequ$MAF, alFrequ$A1, alFrequ$A2)
       alFrequ[paste("IND",i,"b",sep="")] = ifelse(runif(length(alFrequ$SNP),0.00,1.00) < 
                 alFrequ$MAF, alFrequ$A1, alFrequ$A2)
     }
    })
    #    user  system elapsed 
    #   72.16    6.82   79.33 
    
    0 讨论(0)
提交回复
热议问题