Efficiently remove all NULL values in a list and all sublists

前端 未结 2 993
旧巷少年郎
旧巷少年郎 2021-01-17 08:48

Consider the following list:

> l1 <- list(NULL,1,2,list(NULL,3,list(NULL,4)))
> str(l1)
List of 4
 $ : NULL
 $ : num 1
 $ : num 2
 $ :List of 3
  ..         


        
相关标签:
2条回答
  • 2021-01-17 09:26

    This can be done recursively:

    rmNull <- function(x) {
       x <- Filter(Negate(is.null), x)
       lapply(x, function(x) if (is.list(x)) rmNull(x) else x)
    }
    l2 <- rmNull(l1)
    

    giving:

    > str(l2)
    List of 3
     $ : num 1
     $ : num 2
     $ :List of 2
      ..$ : num 3
      ..$ :List of 1
      .. ..$ : num 4
    
    0 讨论(0)
  • 2021-01-17 09:28

    Using an external package, this can now also be done with rrapply in the rrapply-package (a revised version of base rapply). Set how = "prune" to prune all list elements that do not satisfy a function defined in the condition argument:

    library(rrapply)
    
    l1 <- list(NULL,1,2,list(NULL,3,list(NULL,4)))
    
    rrapply(l1, condition = Negate(is.null), how = "prune")
    #> [[1]]
    #> [1] 1
    #> 
    #> [[2]]
    #> [1] 2
    #> 
    #> [[3]]
    #> [[3]][[1]]
    #> [1] 3
    #> 
    #> [[3]][[2]]
    #> [[3]][[2]][[1]]
    #> [1] 4
    

    We can benchmark computation times for large list objects against OP's list.clean function and G. Grothendieck's rmNull function:

    ## benchmark recursion functions
    rmNull <- function(x) {
      x <- Filter(Negate(is.null), x)
      lapply(x, function(x) if (is.list(x)) rmNull(x) else x)
    }
    
    list.clean <- function(.data, fun = is.null, recursive = FALSE) {
      if(recursive) {
        .data <- lapply(.data, function(.item) {
          if(is.list(.item)) list.clean(.item, fun, TRUE)
          else .item
        })
      }
      .data[vapply(.data,fun,logical(1L))] <- NULL
      .data
    }
    
    ## recursively create nested list with dmax layers and 50% NULL elements
    f <- function(len, d, dmax) {
      x <- vector(mode = "list", length = len)
      for(i in seq_along(x)) {
        if(d + 1 < dmax) {
          x[[i]] <- Recall(len, d + 1, dmax)
        } else {
          x[[i]] <- list(1, NULL)
        }
      }
      return(x)
    }
    
    ## long shallow list (3 layers, total 5e5 nodes)
    x_long <- f(len = 500, d = 1, dmax = 3)
    
    microbenchmark::microbenchmark(
      rmNull = rmNull(x_long),
      list.clean = list.clean(x_long, recursive = TRUE),
      rrapply = rrapply(x_long, condition = Negate(is.null), how = "prune"),
      check = "equal",
      times = 5L
    )
    #> Unit: milliseconds
    #>        expr       min        lq      mean    median        uq       max
    #>      rmNull 2381.5536 2535.6871 2559.4045 2546.0375 2571.9462 2761.7982
    #>  list.clean 1954.4046 1973.7983 2012.2158 2010.7334 2049.8020 2072.3409
    #>     rrapply  288.5784  297.9041  382.3111  301.3147  460.5107  563.2475
    
    ## deeply nested list (18 layers, total 2^18 nodes)
    x_deep <- f(len = 2, d = 1, dmax = 18)
    
    microbenchmark::microbenchmark(
      rmNull = rmNull(x_deep),
      list.clean = list.clean(x_deep, recursive = TRUE),
      rrapply = rrapply(x_deep, condition = Negate(is.null), how = "prune"),
      check = "equal",
      times = 5L
    )
    #> Unit: milliseconds
    #>        expr       min        lq      mean    median       uq       max
    #>      rmNull 2306.5788 2360.2663 2422.2578 2367.9296 2530.201 2546.3135
    #>  list.clean 1708.1192 1829.1303 2014.2162 2157.2148 2180.023 2196.5937
    #>     rrapply  174.5385  187.9491  271.4967  200.9263  206.739  587.3306
    
    0 讨论(0)
提交回复
热议问题