Extract last non-missing value in row with data.table

前端 未结 5 851
囚心锁ツ
囚心锁ツ 2021-01-04 04:03

I have a data.table of factor columns, and I want to pull out the label of the last non-missing value in each row. It\'s kindof a typical max.col situation, bu

5条回答
  •  花落未央
    2021-01-04 04:58

    Another idea -similar to Frank's- that tries (1) to avoid subsetting 'data.table' rows (which I assume must have some cost) and (2) to avoid checking a length == nrow(dat) vector for NAs in every iteration.

    alex = function(x, ans = rep_len(NA, length(x[[1L]])), wh = seq_len(length(x[[1L]])))
    {
        if(!length(wh)) return(ans)
        ans[wh] = as.character(x[[length(x)]])[wh]
        Recall(x[-length(x)], ans, wh[is.na(ans[wh])])
    }   
    alex(as.list(dat)) #had some trouble with 'data.table' subsetting
    # [1] "u" "q" "w" "h" "r" "t" "e" "t"
    

    And to compare with Frank's:

    frank = function(x)
    {
        x[, res := NA_character_]
        for(v in rev(names(x))[-1]) x[is.na(res), res := get(v)]
        return(x$res)       
    }
    
    DAT1 = as.data.table(lapply(ceiling(seq(0, 1e4, length.out = 1e2)), 
                         function(n) c(rep(NA, n), sample(letters, 3e5 - n, TRUE))))
    DAT2 = copy(DAT1)
    microbenchmark::microbenchmark(alex(as.list(DAT1)), 
                                   { frank(DAT2); DAT2[, res := NULL] }, 
                                   times = 30)
    #Unit: milliseconds
    #                                            expr       min        lq    median        uq       max neval
    #                             alex(as.list(DAT1))  102.9767  108.5134  117.6595  133.1849  166.9594    30
    # {     frank(DAT2)     DAT2[, `:=`(res, NULL)] } 1413.3296 1455.1553 1497.3517 1540.8705 1685.0589    30
    identical(alex(as.list(DAT1)), frank(DAT2))
    #[1] TRUE
    

提交回复
热议问题