Consider the following list:
> l1 <- list(NULL,1,2,list(NULL,3,list(NULL,4)))
> str(l1)
List of 4
$ : NULL
$ : num 1
$ : num 2
$ :List of 3
..
This can be done recursively:
rmNull <- function(x) {
x <- Filter(Negate(is.null), x)
lapply(x, function(x) if (is.list(x)) rmNull(x) else x)
}
l2 <- rmNull(l1)
giving:
> str(l2)
List of 3
$ : num 1
$ : num 2
$ :List of 2
..$ : num 3
..$ :List of 1
.. ..$ : num 4
Using an external package, this can now also be done with rrapply
in the rrapply
-package (a revised version of base rapply
). Set how = "prune"
to prune all list elements that do not satisfy a function defined in the condition
argument:
library(rrapply)
l1 <- list(NULL,1,2,list(NULL,3,list(NULL,4)))
rrapply(l1, condition = Negate(is.null), how = "prune")
#> [[1]]
#> [1] 1
#>
#> [[2]]
#> [1] 2
#>
#> [[3]]
#> [[3]][[1]]
#> [1] 3
#>
#> [[3]][[2]]
#> [[3]][[2]][[1]]
#> [1] 4
We can benchmark computation times for large list objects against OP's list.clean
function and G. Grothendieck's rmNull
function:
## benchmark recursion functions
rmNull <- function(x) {
x <- Filter(Negate(is.null), x)
lapply(x, function(x) if (is.list(x)) rmNull(x) else x)
}
list.clean <- function(.data, fun = is.null, recursive = FALSE) {
if(recursive) {
.data <- lapply(.data, function(.item) {
if(is.list(.item)) list.clean(.item, fun, TRUE)
else .item
})
}
.data[vapply(.data,fun,logical(1L))] <- NULL
.data
}
## recursively create nested list with dmax layers and 50% NULL elements
f <- function(len, d, dmax) {
x <- vector(mode = "list", length = len)
for(i in seq_along(x)) {
if(d + 1 < dmax) {
x[[i]] <- Recall(len, d + 1, dmax)
} else {
x[[i]] <- list(1, NULL)
}
}
return(x)
}
## long shallow list (3 layers, total 5e5 nodes)
x_long <- f(len = 500, d = 1, dmax = 3)
microbenchmark::microbenchmark(
rmNull = rmNull(x_long),
list.clean = list.clean(x_long, recursive = TRUE),
rrapply = rrapply(x_long, condition = Negate(is.null), how = "prune"),
check = "equal",
times = 5L
)
#> Unit: milliseconds
#> expr min lq mean median uq max
#> rmNull 2381.5536 2535.6871 2559.4045 2546.0375 2571.9462 2761.7982
#> list.clean 1954.4046 1973.7983 2012.2158 2010.7334 2049.8020 2072.3409
#> rrapply 288.5784 297.9041 382.3111 301.3147 460.5107 563.2475
## deeply nested list (18 layers, total 2^18 nodes)
x_deep <- f(len = 2, d = 1, dmax = 18)
microbenchmark::microbenchmark(
rmNull = rmNull(x_deep),
list.clean = list.clean(x_deep, recursive = TRUE),
rrapply = rrapply(x_deep, condition = Negate(is.null), how = "prune"),
check = "equal",
times = 5L
)
#> Unit: milliseconds
#> expr min lq mean median uq max
#> rmNull 2306.5788 2360.2663 2422.2578 2367.9296 2530.201 2546.3135
#> list.clean 1708.1192 1829.1303 2014.2162 2157.2148 2180.023 2196.5937
#> rrapply 174.5385 187.9491 271.4967 200.9263 206.739 587.3306