identifying unique elements in sub lists and altering

后端 未结 3 1161
后悔当初
后悔当初 2021-01-21 12:32

I have the following \'list\' in R:

[[1]]
[1] 17336  5246  8597  5246 17878 19701

[[2]]
[1] 19701 37748 18155  5246  8597

[[3]]
[1] 12297 19701 17878  5246 173         


        
相关标签:
3条回答
  • 2021-01-21 12:54

    Using @SymbolixAU's data:

    lapply(lst, function(x) tail(x, -Position(isTRUE, x==5, nomatch=-Inf)) )
    #[[1]]
    #[1] 1 2 3
    # 
    #[[2]]
    #[1] 2 3 4
    #
    #[[3]]
    #numeric(0)
    # 
    #[[4]]
    #[1] 6
    

    To explain how this works:
    1) The middle Position part just returns the negative of the index of the first time a 5 is detected, i.e.:

    sapply(lst, function(x) -Position(isTRUE, x==5, nomatch=-Inf) )
    #[1] Inf Inf  -3  -2
    

    2) tail(x, -n) just removes the first n values from a vector. When run with Inf instead, nothing is removed. Hence why Inf is used in the instance when no 5 is found.

    0 讨论(0)
  • 2021-01-21 12:58

    I think a for-loop would be a reasonable way to go for this problem:

    v <- 5L;
    for (li in seq_along(x))
        if (!is.na(vi <- match(v,x[[li]])))
            x[[li]] <- x[[li]][-1:-vi];
    x;
    ## [[1]]
    ## [1] 17336  5246  8597  5246 17878 19701
    ##
    ## [[2]]
    ## [1] 19701 37748 18155  5246  8597
    ##
    ## [[3]]
    ## [1] 12297 19701 17878  5246 17336  8597 17878
    ##
    ## [[4]]
    ## [1] 17878 37748 19701 37748 12297  8597
    ##
    ## [[5]]
    ## [1] 19701 37748 19701 37748 19701  5246
    ##
    ## [[6]]
    ## [1] 19701  6254 17336 18155 19701 12297
    ##
    ## [[7]]
    ## [1] 19701 17878 18155 17878 18155 19701  8597
    ##
    ## [[8]]
    ## [1]  8597 18155
    ##
    ## [[9]]
    ## [1] 12450 18155  5246  8597  5246  8597
    ##
    ## [[10]]
    ## [1] 18155  4105  6254 17878 12297  5246
    ##
    ## [[11]]
    ## [1]  8597 12297
    ##
    ## [[12]]
    ## [1] 17878  5246 18155 17878 12297  8597
    ##
    ## [[13]]
    ## [1]  8597 18155
    ##
    ## [[14]]
    ## [1]  5246 37748 18155 12450 18155  8597
    ##
    ## [[15]]
    ## [1] 19701 37748  6254  8597  6254  8597 12297
    ##
    ## [[16]]
    ## [1] 19701 17878  4105 37748 18155 19701 12450 12297
    ##
    ## [[17]]
    ## [1]  6254 12450 37748 17878  5246 17878  8597
    ##
    ## [[18]]
    ## [1]  8597 12297 18155  5246 18155 12297
    ##
    ## [[19]]
    ## [1]  4105 37748 17878  5246 12450  5246 12450
    ##
    ## [[20]]
    ## [1] 17878 20467
    ##
    ## [[21]]
    ## [1] 20467
    ##
    ## [[22]]
    ## [1] 12450 37748 12450 17878 12450 12297
    ##
    ## [[23]]
    ## [1]  4105 27697  4105 27697
    ##
    ## [[24]]
    ## [1]  4105 37748 17878 20467 12450 17878 27697
    ##
    ## [[25]]
    ## [1]  5246 27697  5246 17336 17878  5246 12297 20467
    ##
    

    Data

    x <- list(c(17336L,5246L,8597L,5246L,17878L,19701L),c(19701L,37748L,18155L,5246L,8597L),c(
    12297L,19701L,17878L,5246L,17336L,8597L,17878L),c(17878L,37748L,19701L,37748L,12297L,8597L),
    c(19701L,37748L,19701L,37748L,19701L,5246L),c(19701L,6254L,17336L,18155L,19701L,12297L),c(
    19701L,17878L,18155L,17878L,18155L,19701L,8597L),c(8597L,18155L),c(12450L,18155L,5246L,8597L,
    5246L,8597L),c(18155L,4105L,6254L,17878L,12297L,5246L),c(8597L,12297L),c(17878L,5246L,18155L,
    17878L,12297L,8597L),c(8597L,18155L),c(5246L,37748L,18155L,12450L,18155L,8597L),c(19701L,
    37748L,6254L,8597L,6254L,8597L,12297L),c(19701L,17878L,4105L,37748L,18155L,19701L,12450L,
    12297L),c(6254L,12450L,37748L,17878L,5246L,17878L,8597L),c(8597L,12297L,18155L,5246L,18155L,
    12297L),c(4105L,37748L,17878L,5246L,12450L,5246L,12450L),c(17878L,20467L),c(20467L),c(12450L,
    37748L,12450L,17878L,12450L,12297L),c(6254L,17878L,12450L,12297L,5L,4105L,27697L,4105L,27697L
    ),c(4105L,37748L,17878L,20467L,12450L,17878L,27697L),c(5246L,27697L,5246L,17336L,17878L,5246L
    ,12297L,20467L));
    
    0 讨论(0)
  • 2021-01-21 13:01

    One thought is to identify the elements of the list that contain your value. Then subset the list, and remove the required elements of the vector in those list elements

    This can be done in a single lapply statement using if{}else{}

    lst <- list(c(1,2,3),
                c(2,3,4),
                c(3,4,5),
                c(4,5,6))
    
    lst <- lapply(lst, function(x){
        if(any(x %in% 5)){
            x[-c(1:which(x == 5))]
        }else{
            x
        }
    })
    
    # lst
    # [[1]]
    # [1] 1 2 3
    # 
    # [[2]]
    # [1] 2 3 4
    # 
    # [[3]]
    # numeric(0)
    # 
    # [[4]]
    # [1] 6
    

    It appears the for-loop performs better

    library(microbenchmark)
    microbenchmark(
        Symbolix = {
            lapply(x, function(x){
                if(any(x %in% 5L)){
                    x[-c(1L:which(x == 5L))]
                }else{
                    x
                }
            })
        },
        bgoldst = {
            v <- 5L;
            for (li in seq_along(x))
                if (!is.na(vi <- match(v,x[[li]])))
                    x[[li]] <- x[[li]][-1:-vi];
        },
        thelatemail = {
            lapply(lst, function(x) tail(x, -Position(isTRUE, x==5, nomatch=-Inf)) )
        },
        Jota = {for (li in seq_along(x)) if (any(x[[li]] == v, na.rm = TRUE)) x[[li]] <- x[[li]][-1:-match(v,x[[li]])];}
    )
    
    # Unit: microseconds
    #        expr    min      lq     mean  median      uq     max neval
    #    Symbolix 55.082 62.1750 70.93372 67.8025 73.4365 159.070   100
    #     bgoldst 27.186 30.9555 35.67991 32.0970 39.3185 100.706   100
    # thelatemail 59.665 65.9235 76.64469 72.1920 78.4580 195.755   100
    #        Jota 11.797 13.8760 17.74330 14.9370 21.7510  43.929   100
    
    0 讨论(0)
提交回复
热议问题