Benchmarks

Setup

library(rstruct)
library(r2r)
library(hash)
library(microbenchmark)
library(fastmap)

library(ggplot2)

set.seed(42)

# Benchmarks

N <- 1e4

chars <- c(letters, LETTERS, 0:9)
random_keys <- function(n) paste0(
  sample(chars, n, replace = TRUE),
  sample(chars, n, replace = TRUE),
  sample(chars, n, replace = TRUE),
  sample(chars, n, replace = TRUE),
  sample(chars, n, replace = TRUE)
)

set.seed(840)
keys <- random_keys(N)
values <- rnorm(N)

# Hash maps

m_rstruct <- rstruct::hashmap(as.list(setNames(values, keys)))

m_r2r <- r2r::hashmap()
m_r2r[keys] <- values

m_hash <- hash::hash()
m_hash[keys] <- values
m_fastmap <- fastmap::fastmap()

for (i in seq_along(keys)) {
  m_fastmap$set(keys[i], values[i])
}

# Hash sets (only rstruct & r2r)

hs_rstruct <- rstruct::hashset(keys)
hs_r2r <- r2r::hashset(keys)

Hash Maps

Random individual lookup

Slightly slower but overall similar to r2r & hash (it's hard to beat base R due to overhead introduced by interfacing between R & C++):

random_lookup <- microbenchmark(
  `rstruct` = {
    m_rstruct[[sample(keys, 1)]]
  },
  `r2r` = {
    m_r2r[[sample(keys, 1)]]
  },
  `hash` = {
    m_hash[[sample(keys, 1)]]
  },
  `fastmap` = {
    m_fastmap$get(sample(keys, 1))
  },
  times = 1e3
)

random_lookup
#> Unit: microseconds
#>     expr    min      lq     mean  median      uq     max neval cld
#>  rstruct 30.315 36.0995 41.66674 38.7310 41.6125 856.104  1000 a  
#>      r2r 27.720 32.8780 37.53605 35.1035 37.5250 307.261  1000  b 
#>     hash 14.748 18.4100 22.27964 20.7575 22.5665 328.557  1000   c
#>  fastmap 13.818 16.8320 20.49452 19.3845 21.4940 140.817  1000   c

Bulk retrieval

This is what rstruct is optimized for; here it outperforms the other packages by orders of magnitude.

bulk_lookup <- microbenchmark(
  `rstruct` = {
    m_rstruct[keys]
  },
  `r2r` = {
    m_r2r[keys]
  },
  `hash` = {
    m_hash[keys]
  },
  `fastmap` = {
    m_fastmap$mget(keys)
  },
  times = 50
)

bulk_lookup
#> Unit: microseconds
#>     expr        min         lq        mean      median         uq        max neval  cld
#>  rstruct    519.882    670.962    777.4081    711.8475    774.145   1616.488    50 a   
#>      r2r 151023.863 186794.500 211503.9563 204688.5570 222825.869 341436.095    50  b  
#>     hash 105025.494 137638.709 156059.0240 152346.3065 168229.644 232997.247    50   c 
#>  fastmap  23327.795  28004.905  34033.9940  30370.3995  36015.858  78538.938    50    d

Hash sets

hash has no implementation, so we only test rstruct against r2r.

Random individual lookup

random_lookup_hs <- microbenchmark(
  rstruct = {
    hs_rstruct[[sample(keys, 1)]]
  },
  r2r = {
    hs_r2r[[sample(keys, 1)]]
  },
  times = 1e3
)

random_lookup_hs
#> Unit: microseconds
#>     expr    min     lq     mean  median      uq      max neval cld
#>  rstruct 29.096 32.392 40.80372 33.5655 35.7250 1125.763  1000  a 
#>      r2r 19.364 21.671 26.25696 22.8875 24.5205  122.012  1000   b

Bulk retrieval

bulk_lookup_hs <- microbenchmark(
  rstruct = {
    hs_rstruct[keys]
  },
  r2r = {
    hs_r2r[keys]
  },
  times = 50
)
#> Unit: milliseconds
#>     expr       min        lq      mean    median        uq        max neval cld
#>  rstruct  1.024232  1.133172  1.460557  1.504055  1.644674   2.894455    50  a 
#>      r2r 81.494306 84.356345 93.083116 90.419758 96.557059 140.492647    50   b

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Benchmarks

Setup

Hash Maps

Random individual lookup

Bulk retrieval

Hash sets

Random individual lookup

Bulk retrieval

FilesExpand file tree

benchmarks.md

Latest commit

History

benchmarks.md

File metadata and controls

Benchmarks

Setup

Hash Maps

Random individual lookup

Bulk retrieval

Hash sets

Random individual lookup

Bulk retrieval