This document serves as an overview for measuring the performance of
RcppAlgos against other tools for generating combinations,
permutations, and partitions. This stackoverflow post: How to generate
permutations or combinations of object in R? has some benchmarks.
You will note that the examples in that post are relatively small. The
benchmarks below will focus on larger examples where performance really
matters and for this reason we only consider the packages arrangements,
partitions,
and RcppAlgos.
For the benchmarks below, we used a
2022 Macbook Air Apple M2 24 GB machine.
library(RcppAlgos)
library(partitions)
library(arrangements)
#> 
#> Attaching package: 'arrangements'
#> The following object is masked from 'package:partitions':
#> 
#>     compositions
library(microbenchmark)
options(digits = 4)
options(width = 90)
pertinent_output <- capture.output(sessionInfo())
cat(paste(pertinent_output[1:3], collapse = "\n"))
#> R version 4.3.1 (2023-06-16)
#> Platform: aarch64-apple-darwin20 (64-bit)
#> Running under: macOS Ventura 13.4.1
pkgs <- c("RcppAlgos", "arrangements", "partitions", "microbenchmark")
sapply(pkgs, packageVersion, simplify = FALSE)
#> $RcppAlgos
#> [1] '2.8.0'
#> 
#> $arrangements
#> [1] '1.1.9'
#> 
#> $partitions
#> [1] '1.10.7'
#> 
#> $microbenchmark
#> [1] '1.4.10'
numThreads <- min(as.integer(RcppAlgos::stdThreadMax() / 2), 6)
numThreads
#> [1] 4set.seed(13)
v1 <- sort(sample(100, 30))
m <- 21
t1 <- comboGeneral(v1, m, Parallel = T)
t2 <- combinations(v1, m)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 14307150       21
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = comboGeneral(v1, m, nThreads = numThreads),
               cbRcppAlgosSer = comboGeneral(v1, m),
               cbArrangements = combinations(v1, m),
               times = 15, unit = "relative")
#> Warning in microbenchmark(cbRcppAlgosPar = comboGeneral(v1, m, nThreads = numThreads), :
#> less accurate nanosecond times to avoid potential integer overflows
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval cld
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    15 a  
#>  cbRcppAlgosSer 3.440 3.091 3.099  3.089 3.077 2.982    15  b 
#>  cbArrangements 3.493 3.141 3.163  3.134 3.147 3.117    15   cv2 <- v1[1:10]
m <- 20
t1 <- comboGeneral(v2, m, repetition = TRUE, nThreads = numThreads)
t2 <- combinations(v2, m, replace = TRUE)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 10015005       20
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = comboGeneral(v2, m, TRUE, nThreads = numThreads),
               cbRcppAlgosSer = comboGeneral(v2, m, TRUE),
               cbArrangements = combinations(v2, m, replace = TRUE),
               times = 15, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval cld
#>  cbRcppAlgosPar 1.000 1.000 1.000   1.00 1.000 1.000    15  a 
#>  cbRcppAlgosSer 2.925 3.018 3.059   3.01 3.000 3.873    15   b
#>  cbArrangements 3.055 3.052 3.018   3.04 3.012 2.883    15   bmyFreqs <- c(2, 4, 4, 5, 3, 2, 2, 2, 3, 4, 1, 4, 2, 5)
v3 <- as.integer(c(1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610))
t1 <- comboGeneral(v3, 20, freqs = myFreqs, nThreads = numThreads)
t2 <- combinations(freq = myFreqs, k = 20, x = v3)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 14594082       20
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = comboGeneral(v3, 20, freqs = myFreqs, nThreads = numThreads),
               cbRcppAlgosSer = comboGeneral(v3, 20, freqs = myFreqs),
               cbArrangements = combinations(freq = myFreqs, k = 20, x = v3),
               times = 10, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval cld
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    10 a  
#>  cbRcppAlgosSer 3.184 3.156 3.148  3.162 3.114 3.121    10  b 
#>  cbArrangements 6.014 6.059 6.030  6.044 5.947 6.173    10   cv4 <- as.integer(c(2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59))
t1 <- permuteGeneral(v4, 6, nThreads = numThreads)
t2 <- permutations(v4, 6)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 8910720       6
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(v4, 6, nThreads = numThreads),
               cbRcppAlgosSer = permuteGeneral(v4, 6),
               cbArrangements = permutations(v4, 6),
               times = 15, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval cld
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    15 a  
#>  cbRcppAlgosSer 1.491 1.494 1.428  1.500 1.283 1.350    15  b 
#>  cbArrangements 2.563 2.583 2.421  2.583 2.310 2.117    15   c
## Indexing permutation example with the partitions package
t1 <- permuteGeneral(11, nThreads = 4)
t2 <- permutations(11)
t3 <- perms(11)
dim(t1)
#> [1] 39916800       11
stopifnot(identical(t1, t2), identical(t1, t(as.matrix(t3))))
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(11, nThreads = 4),
               cbRcppAlgosSer = permuteGeneral(11),
               cbArrangements = permutations(11),
               cbPartitions   = perms(11),
               times = 5, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval  cld
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000     5 a   
#>  cbRcppAlgosSer 2.502 2.493 2.173  2.498 1.756 1.921     5  b  
#>  cbArrangements 4.091 4.094 3.545  4.092 2.990 2.973     5   c 
#>    cbPartitions 7.997 8.175 7.188  8.288 6.004 6.380     5    dv5 <- v3[1:5]
t1 <- permuteGeneral(v5, 10, repetition = TRUE, nThreads = numThreads)
t2 <- permutations(v5, 10, replace = TRUE)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 9765625      10
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(v5, 10, TRUE, nThreads = numThreads),
               cbRcppAlgosSer = permuteGeneral(v5, 10, TRUE),
               cbArrangements = permutations(x = v5, k = 10, replace = TRUE),
               times = 10, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval cld
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    10 a  
#>  cbRcppAlgosSer 2.867 2.836 2.956  2.785 2.766 4.196    10  b 
#>  cbArrangements 3.567 3.508 3.629  3.464 3.448 4.836    10   cv6 <- sort(runif(12))
t1 <- permuteGeneral(v6, 7, freqs = rep(1:3, 4), nThreads = numThreads)
t2 <- permutations(freq = rep(1:3, 4), k = 7, x = v6)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 19520760        7
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(v6, 7, freqs = rep(1:3, 4), nThreads = numThreads),
               cbRcppAlgosSer = permuteGeneral(v6, 7, freqs = rep(1:3, 4)),
               cbArrangements = permutations(freq = rep(1:3, 4), k = 7, x = v6),
               times = 10, unit = "relative")
#> Unit: relative
#>            expr   min   lq  mean median    uq   max neval cld
#>  cbRcppAlgosPar 1.000 1.00 1.000  1.000 1.000 1.000    10 a  
#>  cbRcppAlgosSer 3.157 3.22 3.000  3.214 3.210 1.906    10  b 
#>  cbArrangements 3.601 3.58 3.339  3.572 3.564 2.109    10   ct1 <- comboGeneral(0:140, freqs=c(140, rep(1, 140)),
                   constraintFun = "sum", comparisonFun = "==",
                   limitConstraints = 140)
t2 <- partitions(140, distinct = TRUE)
t3 <- diffparts(140)
# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 140
stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))),
                    all(rowSums(t1) == 140), all(rowSums(t2) == 140),
                    all(colSums(t3) == 140))
dim(t1)
#> [1] 9617150      16
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(0:140, freqs=c(140, rep(1, 140)), nThreads = numThreads),
               cbRcppAlgosSer = partitionsGeneral(0:140, freqs=c(140, rep(1, 140))),
               cbArrangements = partitions(140, distinct = TRUE),
               cbPartitions   = diffparts(140),
               times = 10, unit = "relative")
#> Unit: relative
#>            expr    min    lq   mean median     uq    max neval  cld
#>  cbRcppAlgosPar  1.000  1.00  1.000  1.000  1.000  1.000    10 a   
#>  cbRcppAlgosSer  3.283  3.30  3.017  3.551  2.602  2.492    10  b  
#>  cbArrangements  2.629  2.61  2.355  2.605  2.106  1.984    10   c 
#>    cbPartitions 17.812 18.44 16.317 19.205 14.092 13.282    10    dt1 <- comboGeneral(160, 10,
                   constraintFun = "sum", comparisonFun = "==",
                   limitConstraints = 160)
t2 <- partitions(160, 10, distinct = TRUE)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 8942920      10
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(160, 10, nThreads = numThreads),
               cbRcppAlgosSer = partitionsGeneral(160, 10),
               cbArrangements = partitions(160, 10, distinct = TRUE),
               times = 10, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval cld
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    10 a  
#>  cbRcppAlgosSer 3.348 3.344 2.966  3.260 2.233 2.552    10  b 
#>  cbArrangements 4.384 4.412 3.785  4.284 2.928 2.927    10   ct1 <- comboGeneral(0:65, repetition = TRUE, constraintFun = "sum",
                   comparisonFun = "==", limitConstraints = 65)
t2 <- partitions(65)
t3 <- parts(65)
# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 65
stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))),
          all(rowSums(t1) == 65), all(rowSums(t2) == 65),
          all(colSums(t3) == 65))
dim(t1)
#> [1] 2012558      65
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(0:65, repetition = TRUE,
                                                  nThreads = numThreads),
               cbRcppAlgosSer = partitionsGeneral(0:65, repetition = TRUE),
               cbArrangements = partitions(65),
               cbPartitions   = parts(65),
               times = 20, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median     uq    max neval cld
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000  1.000 1.0000    20 a  
#>  cbRcppAlgosSer 2.890 2.635 2.317  2.584  2.441 0.8822    20  b 
#>  cbArrangements 2.330 2.046 1.897  2.008  1.912 1.2706    20  b 
#>    cbPartitions 9.132 9.167 9.273 11.193 10.536 3.7732    20   ct1 <- comboGeneral(100, 15, TRUE, constraintFun = "sum",
                   comparisonFun = "==", limitConstraints = 100)
t2 <- partitions(100, 15)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 9921212      15
rm(t1, t2)
# This takes a really long time... not because of restrictedparts,
# but because apply is not that fast. This transformation is
# needed for proper comparisons. As a result, we will compare
# a smaller example
# t3 <- t(apply(as.matrix(restrictedparts(100, 15, include.zero = F)), 2, sort))
t3 <- t(apply(as.matrix(restrictedparts(50, 15, include.zero = F)), 2, sort))
stopifnot(identical(partitions(50, 15), t3))
rm(t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(100, 15, TRUE,
                                                  nThreads = numThreads),
               cbRcppAlgosSer = partitionsGeneral(100, 15, TRUE),
               cbArrangements = partitions(100, 15),
               cbPartitions   = restrictedparts(100, 15,
                                                include.zero = FALSE),
               times = 10, unit = "relative")
#> Unit: relative
#>            expr    min     lq   mean median     uq   max neval  cld
#>  cbRcppAlgosPar  1.000  1.000  1.000  1.000  1.000 1.000    10 a   
#>  cbRcppAlgosSer  3.359  3.304  2.546  2.735  2.779 1.227    10  b  
#>  cbArrangements  4.201  4.138  3.153  3.405  3.343 1.504    10   c 
#>    cbPartitions 15.543 15.511 11.818 12.758 12.242 6.145    10    dCurrenlty, RcppAlgos is the only package capable of
efficiently generating partitions of multisets. Therefore, we will only
time RcppAlgos and use this as a reference for future
improvements.
t1 <- comboGeneral(120, 10, freqs=rep(1:8, 15),
                   constraintFun = "sum", comparisonFun = "==",
                   limitConstraints = 120)
dim(t1)
#> [1] 7340225      10
stopifnot(all(rowSums(t1) == 120))
microbenchmark(cbRcppAlgos = partitionsGeneral(120, 10, freqs=rep(1:8, 15)),
               times = 10)
#> Unit: milliseconds
#>         expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgos 281.7 282.6 287.2  284.2 292.5 295.8    10t1 <- compositionsGeneral(0:15, repetition = TRUE)
t2 <- arrangements::compositions(15)
t3 <- partitions::compositions(15)
# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 15
stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))),
          all(rowSums(t1) == 15), all(rowSums(t2) == 15),
          all(colSums(t3) == 15))
dim(t1)
#> [1] 16384    15
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosSer = compositionsGeneral(0:15, repetition = TRUE),
               cbArrangements = arrangements::compositions(15),
               cbPartitions   = partitions::compositions(15),
               times = 20, unit = "relative")
#> Unit: relative
#>            expr    min      lq    mean  median      uq     max neval cld
#>  cbRcppAlgosSer   1.00   1.000   1.000   1.000   1.000   1.000    20  a 
#>  cbArrangements   1.23   1.221   1.199   1.199   1.188   1.108    20  a 
#>    cbPartitions 133.80 148.531 172.109 175.967 192.882 222.542    20   bFor the next two examples, we will exclude the
partitions package for efficiency reasons.
t1 <- compositionsGeneral(0:23, repetition = TRUE)
t2 <- arrangements::compositions(23)
# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 23
stopifnot(identical(dim(t1), dim(t2)), all(rowSums(t1) == 23),
          all(rowSums(t2) == 23))
dim(t1)
#> [1] 4194304      23
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = compositionsGeneral(0:23, repetition = TRUE,
                                                    nThreads = numThreads),
               cbRcppAlgosSer = compositionsGeneral(0:23, repetition = TRUE),
               cbArrangements = arrangements::compositions(23),
               times = 20, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval cld
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    20 a  
#>  cbRcppAlgosSer 3.337 3.374 3.365  3.383 3.379 3.175    20  b 
#>  cbArrangements 3.730 3.760 3.779  3.776 3.775 4.014    20   ct1 <- compositionsGeneral(30, 10, repetition = TRUE)
t2 <- arrangements::compositions(30, 10)
stopifnot(identical(t1, t2), all(rowSums(t1) == 30))
dim(t1)
#> [1] 10015005       10
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = compositionsGeneral(30, 10, repetition = TRUE,
                                                    nThreads = numThreads),
               cbRcppAlgosSer = compositionsGeneral(30, 10, repetition = TRUE),
               cbArrangements = arrangements::compositions(30, 10),
               times = 20, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval cld
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    20 a  
#>  cbRcppAlgosSer 3.005 3.089 3.078  3.094 3.081 3.046    20  b 
#>  cbArrangements 3.153 3.129 3.127  3.132 3.119 3.128    20   cWe will show one example from each category to demonstrate the
efficiency of the iterators in RcppAlgos. The results are
similar for the rest of the cases not shown.
pkg_arrangements <- function(n, total) {
    a <- icombinations(n, as.integer(n / 2))
    for (i in 1:total) a$getnext()
}
pkg_RcppAlgos <- function(n, total) {
    a <- comboIter(n, as.integer(n / 2))
    for (i in 1:total) a@nextIter()
}
total <- comboCount(18, 9)
total
#> [1] 48620
microbenchmark(cbRcppAlgos    = pkg_RcppAlgos(18, total),
               cbArrangements = pkg_arrangements(18, total),
               times = 15, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval cld
#>     cbRcppAlgos  1.00  1.00  1.00   1.00  1.00  1.00    15  a 
#>  cbArrangements 19.57 19.36 18.79  19.15 19.06 15.26    15   bpkg_arrangements <- function(n, total) {
    a <- ipermutations(n)
    for (i in 1:total) a$getnext()
}
pkg_RcppAlgos <- function(n, total) {
    a <- permuteIter(n)
    for (i in 1:total) a@nextIter()
}
total <- permuteCount(8)
total
#> [1] 40320
microbenchmark(cbRcppAlgos    = pkg_RcppAlgos(8, total),
               cbArrangements = pkg_arrangements(8, total),
               times = 15, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval cld
#>     cbRcppAlgos  1.00  1.00  1.00   1.00  1.00  1.00    15  a 
#>  cbArrangements 19.56 19.44 19.18  19.41 19.13 18.18    15   bpkg_partitions <- function(n, total) {
    a <- firstpart(n)
    for (i in 1:(total - 1)) a <- nextpart(a)
}
pkg_arrangements <- function(n, total) {
    a <- ipartitions(n)
    for (i in 1:total) a$getnext()
}
pkg_RcppAlgos <- function(n, total) {
    a <- partitionsIter(0:n, repetition = TRUE)
    for (i in 1:total) a@nextIter()
}
total <- partitionsCount(0:40, repetition = TRUE)
total
#> [1] 37338
microbenchmark(cbRcppAlgos    = pkg_RcppAlgos(40, total),
               cbArrangements = pkg_arrangements(40, total),
               cbPartitions   = pkg_partitions(40, total),
               times = 15, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval cld
#>     cbRcppAlgos  1.00  1.00  1.00   1.00  1.00  1.00    15 a  
#>  cbArrangements 15.55 15.56 14.85  15.64 14.13 14.34    15  b 
#>    cbPartitions 25.36 25.54 24.22  25.60 23.23 22.02    15   cpkg_partitions <- function(n, total) {
    a <- firstcomposition(n)
    for (i in 1:(total - 1)) a <- nextcomposition(a, FALSE)
}
pkg_arrangements <- function(n, total) {
    a <- icompositions(n)
    for (i in 1:total) a$getnext()
}
pkg_RcppAlgos <- function(n, total) {
    a <- compositionsIter(0:n, repetition = TRUE)
    for (i in 1:total) a@nextIter()
}
total <- compositionsCount(0:15, repetition = TRUE)
total
#> [1] 16384
microbenchmark(cbRcppAlgos    = pkg_RcppAlgos(15, total),
               cbArrangements = pkg_arrangements(15, total),
               cbPartitions   = pkg_partitions(15, total),
               times = 15, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval cld
#>     cbRcppAlgos  1.00  1.00  1.00   1.00  1.00  1.00    15 a  
#>  cbArrangements 14.21 14.10 13.56  14.20 13.84 11.64    15  b 
#>    cbPartitions 46.54 46.16 44.64  46.03 44.68 44.44    15   c