do.call and order to sort each row to descending order of a matrix?

戏子无情 提交于 2019-12-03 20:28:16

Two alternatives:

# Jaap
do.call(rbind, lapply(split(a, row(a)), sort, decreasing = TRUE))

# adaption of lmo's solution in the comments
for(i in 1:nrow(a)) a[i,] <- a[i,][order(a[i,], decreasing = TRUE)]

gives:

   [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
1     6    6    5    4    4    2    2    1    1     1
2     5    4    4    4    4    3    3    3    3     1
3     4    4    4    4    3    3    3    3    3     2
4     7    5    4    4    4    3    2    2    1     0
5     5    4    4    3    3    2    2    1    1     0
6     5    4    3    3    3    2    2    2    1     1
7     6    4    4    4    3    3    2    2    2     1
8     5    5    4    4    3    3    3    2    1     1
9     5    5    4    3    3    2    2    2    2     2
10    6    5    3    3    3    2    2    2    1     1

A benchmark with:

library(microbenchmark)
microbenchmark(dc.lapply.sort = do.call(rbind, lapply(split(a, row(a)), sort, decreasing = TRUE)),
               t.apply.sort = t(apply(a, 1, sort, decreasing = TRUE)),
               for.order = for(i in 1:nrow(a)) a[i,] <- a[i,][order(a[i,], decreasing = TRUE)],
               for.sort = for(i in 1:nrow(a)) a[i,] <- sort(a[i,], decreasing = TRUE),
               for.sort.list = for(x in seq_len(nrow(a))) a[x,] <- a[x,][sort.list(a[x,], decreasing = TRUE, method="radix")])

gives:

Unit: microseconds
           expr     min       lq      mean   median       uq      max neval cld
 dc.lapply.sort 189.811 206.5890 222.52223 217.8070 228.0905  332.034   100   c
   t.apply.sort 185.474 200.4515 212.59608 210.4930 220.0025  286.288   100  bc
      for.order  82.631  91.1860  98.66552  97.8475 102.9680  176.666   100 a  
       for.sort 167.939 187.5025 192.90728 192.1195 198.8690  256.494   100  b 
  for.sort.list 187.617 206.4475 230.82960 215.7060 221.6115 1541.343   100   c

It should be noted however that benchmarks are only meaningful on larger datasets, so:

set.seed(123)
a <- matrix(rbinom(10e5, 10, 0.3), ncol = 10)

microbenchmark(dc.lapply.sort = do.call(rbind, lapply(split(a, row(a)), sort, decreasing = TRUE)),
               t.apply.sort = t(apply(a, 1, sort, decreasing = TRUE)),
               for.order = for(i in 1:nrow(a)) a[i,] <- a[i,][order(a[i,], decreasing = TRUE)],
               for.sort = for(i in 1:nrow(a)) a[i,] <- sort(a[i,], decreasing = TRUE),
               for.sort.list = for(x in seq_len(nrow(a))) a[x,] <- a[x,][sort.list(a[x,], decreasing = TRUE, method="radix")],
               times = 10)

gives:

Unit: seconds
           expr      min       lq     mean   median       uq      max neval  cld
 dc.lapply.sort 6.790179 6.924036 7.036330 7.013996 7.121343 7.351729    10    d
   t.apply.sort 5.032052 5.057022 5.151560 5.081459 5.177159 5.538416    10   c 
      for.order 1.368351 1.463285 1.514652 1.471467 1.583873 1.736544    10 a   
       for.sort 5.028314 5.102993 5.317597 5.154104 5.348614 6.123278    10   c 
  for.sort.list 2.417857 2.464817 2.573294 2.519408 2.726118 2.815964    10  b  

Conclusion: the for-loop in combination with order is still the fastest solution.


Using the order2 and sort2 functions of the grr-package can give a further improvement in speed. Comparing them with the fastest solution from above:

set.seed(123)
a <- matrix(rbinom(10e5, 10, 0.3), ncol = 10)

microbenchmark(for.order = for(i in 1:nrow(a)) a[i,] <- a[i,][order(a[i,], decreasing = TRUE)],
               for.order2 = for(i in 1:nrow(a)) a[i,] <- a[i,][rev(grr::order2(a[i,]))],
               for.sort2 = for(i in 1:nrow(a)) a[i,] <- rev(grr::sort2(a[i,])),
               times = 10)

giving:

Unit: milliseconds
       expr       min        lq      mean    median        uq      max neval cld
  for.order 1243.8140 1263.4423 1316.4662 1305.1823 1378.5836 1404.251    10   c
 for.order2  956.1536  962.8226 1110.1778 1090.9984 1233.4241 1368.416    10  b 
  for.sort2  830.1887  843.6765  920.5668  847.1601  972.8703 1144.135    10 a  

t(apply(a, 1, sort, decreasing = TRUE)) gives:

#       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
#  [1,]    6    6    5    4    4    2    2    1    1     1
#  [2,]    5    4    4    4    4    3    3    3    3     1
#  [3,]    4    4    4    4    3    3    3    3    3     2
#  [4,]    7    5    4    4    4    3    2    2    1     0
#  [5,]    5    4    4    3    3    2    2    1    1     0
#  [6,]    5    4    3    3    3    2    2    2    1     1
#  [7,]    6    4    4    4    3    3    2    2    2     1
#  [8,]    5    5    4    4    3    3    3    2    1     1
#  [9,]    5    5    4    3    3    2    2    2    2     2
# [10,]    6    5    3    3    3    2    2    2    1     1

I did also microbenchmarking and it seems that the order solutions win :)

>     microbenchmark(jaap1 = do.call(rbind, lapply(split(a, row(a)), sort, decreasing = TRUE)),
+                    apom = t(apply(a, 1, sort, decreasing = TRUE)),
+                    jaap2 = for(i in 1:nrow(a)) a[i,] <- a[i,][order(a[i,], decreasing = TRUE)],
+                    jaap3 = for(i in 1:nrow(a)) a[i,] <- sort(a[i,], decreasing = TRUE), 
+                    alpha = t(apply(a, 1, function(x) order(x, decreasing = T))),
+                    times = 1000L)
Unit: microseconds
  expr     min       lq     mean   median       uq      max neval
 jaap1 318.193 364.6125 404.3224 389.5845 417.6405 1422.087  1000
  apom 276.764 340.2740 389.1302 364.9650 398.3680 2854.710  1000
 jaap2 121.332 158.4845 189.5616 182.2070 202.2390 1170.602  1000
 jaap3 247.387 309.2445 351.6959 332.2710 365.3640 1361.720  1000
 alpha 139.244 178.7460 209.6122 202.8580 226.7585 1092.301  1000
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!