Randomly associate elements of two vectors given conditions

前端 未结 5 761
心在旅途
心在旅途 2021-01-23 18:26

I have a data.table of capitals

capitals<-data.table(capital=c(100,50,25,5))
capitals
   capital
1:     100
2:      50
3:      25
4:       5
         


        
5条回答
  •  无人共我
    2021-01-23 18:30

    Unless I've missed something, here is an approach that looks valid:

    capital = c(100, 50, 25, 5); loss = c(45, 10, 5, 1)
    
    sc = sort(capital)
    sl = sort(loss)
    allowed = lapply(findInterval(sc, sl), seq_len)
    
    replicate(10, {  #just to replicate the process
        tmp = seq_along(loss)
        sams = rep(NA, length(loss))
        for(i in seq_along(allowed)) {
            intsec = intersect(allowed[[i]], tmp)
            s = intsec[sample(length(intsec), 1)]
            tmp[s] = NA
            sams[i] = s
        }
        sl[sams]
    })
    #     [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
    #[1,]    1    1    1    5    1    1    1    5    5     1
    #[2,]   10   10    5    1   10   10   10    1    1     5
    #[3,]   45    5   10   45    5   45   45   10   45    45
    #[4,]    5   45   45   10   45    5    5   45   10    10
    

    Each element in each column, above, corresponds to its respective element in "sc" (sorted capital) [5 25 50 100].

    And some benchmarkings comparing with rnso's answer:

    cap2 = sample(100:500, 10); los2 = sample(50:250, 10)  #10 capitals and losses
    microbenchmark::microbenchmark(ffalex(cap2, los2), ffrnso(cap2, los2), times = 5)
    #Unit: microseconds
    #               expr     min      lq  median      uq     max neval
    # ffalex(cap2, los2) 385.589 396.377 399.162 434.309 591.608     5
    # ffrnso(cap2, los2)  14.964  21.577  27.492  42.456  80.389     5
    cap2 = sample(100:500, 50); los2 = sample(50:250, 50)  #50
    microbenchmark::microbenchmark(ffalex(cap2, los2), ffrnso(cap2, los2), times = 5)
    #Unit: milliseconds
    #               expr       min        lq     median          uq         max neval
    # ffalex(cap2, los2)   1.62031   1.64467   1.949522    1.966226    3.508583     5
    # ffrnso(cap2, los2) 283.27681 538.50515 971.273262 3348.542296 4279.280326     5
    cap2 = sample(100:500, 2e2); los2 = sample(50:250, 2e2)  #200
    system.time({ ans1 = ffalex(cap2, los2) })
    #   user  system elapsed 
    #   0.01    0.02    0.03 
    system.time({ ans2 = ffrnso(cap2, los2) })
    #Timing stopped at: 77.69 0.14 78.22
    

    And check that indeed all losses are "<=" to capital :

    #head(ans1)
    #      sc   
    #[1,] 100 83
    #[2,] 101 92
    #[3,] 103 59
    #[4,] 107 52
    #[5,] 109 74
    #[6,] 110 79
    sum(ans1[, 2] > ans1[, 1])
    #[1] 0   #none is greater
    

    The two functions:

    ffalex = function (capital, loss) 
    {
        sc = sort(capital)
        sl = sort(loss)
        allowed = lapply(findInterval(sc, sl), seq_len)
        tmp = seq_along(loss)
        sams = rep(NA, length(loss))
        for (i in seq_along(allowed)) {
            intsec = intersect(allowed[[i]], tmp)
            s = intsec[sample(length(intsec), 1)]
            tmp[s] = NA
            sams[i] = s
        }
        cbind(sc, sl[sams])
    }
    
    ffrnso = function (capital, loss) 
    {
        while (any(loss > capital)) {
            loss <- sample(loss, replace = F)
        }
        cbind(capital, loss)
    }
    

提交回复
热议问题