By default, reads and writes are performed sequentially (i.e., not in parallel). Users can opt-in to parallel read/write functionality via options
.
library(pizzarr)
if(!requireNamespace("pbapply", quietly = TRUE)) {
install.packages("pbapply")
}
Simulate slow operations
SlowDirectoryStore <- R6::R6Class("SlowDirectoryStore",
inherit = DirectoryStore,
public = list(
get_item = function(key) {
Sys.sleep(0.5) # Simulate a slow read.
return(super$get_item(key))
},
set_item = function(key, value) {
Sys.sleep(0.5) # Simulate a slow write.
return(super$set_item(key, value))
}
)
)
Read in parallel
Provide an integer >= 2 to the option to use forking-based parallelism. This value will be passed to the cl
parameter of pbapply::pblapply
.
options(pizzarr.parallel_read_enabled = 4)
root <- pizzarr_sample("dog.ome.zarr")
store <- SlowDirectoryStore$new(root)
zarr_arr <- zarr_open(store = store, path = "/0")
arr <- zarr_arr$get_item("...")$data
sum(arr)
#> [1] 134538481
Write in parallel
options(pizzarr.parallel_write_enabled = 4)
root <- pizzarr_sample("dog.ome.zarr")
store <- SlowDirectoryStore$new(root)
zarr_arr <- zarr_open(store = store, path = "/0")
arr <- zarr_arr$get_item("...")$data
zarr_arr$set_item("...", arr * 2.0)
#> NULL
doubled_arr <- zarr_arr$get_item("...")$data
sum(doubled_arr)
#> [1] 269076962
Parallel operations with future backend
To use the future
backend for pbapply
, set the value of the option to the string "future"
.
Cluster-based:
options(pizzarr.parallel_read_enabled = "future")
cl <- parallel::makeCluster(2)
future::plan(future::cluster, workers = cl)
root <- pizzarr_sample("dog.ome.zarr")
store <- SlowDirectoryStore$new(root)
zarr_arr <- zarr_open(store = store, path = "/0")
arr <- zarr_arr$get_item("...")$data
#> Loading required namespace: future.apply
#> Warning in apply_func(parts, function(proj, cl = NA) {: You need some packages
#> for cl='future' to work: install.packages('future.apply')
sum(arr)
#> [1] 134538481
parallel::stopCluster(cl)
Multisession-based:
options(pizzarr.parallel_read_enabled = "future")
future::plan(future::multisession, workers = 4)
root <- pizzarr_sample("dog.ome.zarr")
store <- SlowDirectoryStore$new(root)
zarr_arr <- zarr_open(store = store, path = "/0")
arr <- zarr_arr$get_item("...")$data
#> Loading required namespace: future.apply
#> Warning in apply_func(parts, function(proj, cl = NA) {: You need some packages
#> for cl='future' to work: install.packages('future.apply')
sum(arr)
#> [1] 134538481
Sequential operations
To return to sequential mode, run:
options(
pizzarr.parallel_read_enabled = FALSE,
pizzarr.parallel_write_enabled = FALSE
)