I have the following \'list\' in R:
[[1]]
[1] 17336 5246 8597 5246 17878 19701
[[2]]
[1] 19701 37748 18155 5246 8597
[[3]]
[1] 12297 19701 17878 5246 173
Using @SymbolixAU's data:
lapply(lst, function(x) tail(x, -Position(isTRUE, x==5, nomatch=-Inf)) )
#[[1]]
#[1] 1 2 3
#
#[[2]]
#[1] 2 3 4
#
#[[3]]
#numeric(0)
#
#[[4]]
#[1] 6
To explain how this works:
1) The middle Position
part just returns the negative of the index of the first time a 5
is detected, i.e.:
sapply(lst, function(x) -Position(isTRUE, x==5, nomatch=-Inf) )
#[1] Inf Inf -3 -2
2) tail(x, -n)
just removes the first n
values from a vector. When run with Inf
instead, nothing is removed. Hence why Inf
is used in the instance when no 5
is found.
I think a for-loop would be a reasonable way to go for this problem:
v <- 5L;
for (li in seq_along(x))
if (!is.na(vi <- match(v,x[[li]])))
x[[li]] <- x[[li]][-1:-vi];
x;
## [[1]]
## [1] 17336 5246 8597 5246 17878 19701
##
## [[2]]
## [1] 19701 37748 18155 5246 8597
##
## [[3]]
## [1] 12297 19701 17878 5246 17336 8597 17878
##
## [[4]]
## [1] 17878 37748 19701 37748 12297 8597
##
## [[5]]
## [1] 19701 37748 19701 37748 19701 5246
##
## [[6]]
## [1] 19701 6254 17336 18155 19701 12297
##
## [[7]]
## [1] 19701 17878 18155 17878 18155 19701 8597
##
## [[8]]
## [1] 8597 18155
##
## [[9]]
## [1] 12450 18155 5246 8597 5246 8597
##
## [[10]]
## [1] 18155 4105 6254 17878 12297 5246
##
## [[11]]
## [1] 8597 12297
##
## [[12]]
## [1] 17878 5246 18155 17878 12297 8597
##
## [[13]]
## [1] 8597 18155
##
## [[14]]
## [1] 5246 37748 18155 12450 18155 8597
##
## [[15]]
## [1] 19701 37748 6254 8597 6254 8597 12297
##
## [[16]]
## [1] 19701 17878 4105 37748 18155 19701 12450 12297
##
## [[17]]
## [1] 6254 12450 37748 17878 5246 17878 8597
##
## [[18]]
## [1] 8597 12297 18155 5246 18155 12297
##
## [[19]]
## [1] 4105 37748 17878 5246 12450 5246 12450
##
## [[20]]
## [1] 17878 20467
##
## [[21]]
## [1] 20467
##
## [[22]]
## [1] 12450 37748 12450 17878 12450 12297
##
## [[23]]
## [1] 4105 27697 4105 27697
##
## [[24]]
## [1] 4105 37748 17878 20467 12450 17878 27697
##
## [[25]]
## [1] 5246 27697 5246 17336 17878 5246 12297 20467
##
Data
x <- list(c(17336L,5246L,8597L,5246L,17878L,19701L),c(19701L,37748L,18155L,5246L,8597L),c(
12297L,19701L,17878L,5246L,17336L,8597L,17878L),c(17878L,37748L,19701L,37748L,12297L,8597L),
c(19701L,37748L,19701L,37748L,19701L,5246L),c(19701L,6254L,17336L,18155L,19701L,12297L),c(
19701L,17878L,18155L,17878L,18155L,19701L,8597L),c(8597L,18155L),c(12450L,18155L,5246L,8597L,
5246L,8597L),c(18155L,4105L,6254L,17878L,12297L,5246L),c(8597L,12297L),c(17878L,5246L,18155L,
17878L,12297L,8597L),c(8597L,18155L),c(5246L,37748L,18155L,12450L,18155L,8597L),c(19701L,
37748L,6254L,8597L,6254L,8597L,12297L),c(19701L,17878L,4105L,37748L,18155L,19701L,12450L,
12297L),c(6254L,12450L,37748L,17878L,5246L,17878L,8597L),c(8597L,12297L,18155L,5246L,18155L,
12297L),c(4105L,37748L,17878L,5246L,12450L,5246L,12450L),c(17878L,20467L),c(20467L),c(12450L,
37748L,12450L,17878L,12450L,12297L),c(6254L,17878L,12450L,12297L,5L,4105L,27697L,4105L,27697L
),c(4105L,37748L,17878L,20467L,12450L,17878L,27697L),c(5246L,27697L,5246L,17336L,17878L,5246L
,12297L,20467L));
One thought is to identify the elements of the list that contain your value. Then subset the list, and remove the required elements of the vector in those list elements
This can be done in a single lapply
statement using if{}else{}
lst <- list(c(1,2,3),
c(2,3,4),
c(3,4,5),
c(4,5,6))
lst <- lapply(lst, function(x){
if(any(x %in% 5)){
x[-c(1:which(x == 5))]
}else{
x
}
})
# lst
# [[1]]
# [1] 1 2 3
#
# [[2]]
# [1] 2 3 4
#
# [[3]]
# numeric(0)
#
# [[4]]
# [1] 6
It appears the for
-loop performs better
library(microbenchmark)
microbenchmark(
Symbolix = {
lapply(x, function(x){
if(any(x %in% 5L)){
x[-c(1L:which(x == 5L))]
}else{
x
}
})
},
bgoldst = {
v <- 5L;
for (li in seq_along(x))
if (!is.na(vi <- match(v,x[[li]])))
x[[li]] <- x[[li]][-1:-vi];
},
thelatemail = {
lapply(lst, function(x) tail(x, -Position(isTRUE, x==5, nomatch=-Inf)) )
},
Jota = {for (li in seq_along(x)) if (any(x[[li]] == v, na.rm = TRUE)) x[[li]] <- x[[li]][-1:-match(v,x[[li]])];}
)
# Unit: microseconds
# expr min lq mean median uq max neval
# Symbolix 55.082 62.1750 70.93372 67.8025 73.4365 159.070 100
# bgoldst 27.186 30.9555 35.67991 32.0970 39.3185 100.706 100
# thelatemail 59.665 65.9235 76.64469 72.1920 78.4580 195.755 100
# Jota 11.797 13.8760 17.74330 14.9370 21.7510 43.929 100