Adding Progress Bar to '*apply' Functions
Adding progress bar to *apply
functions, possibly leveraging
parallel processing.
pblapply(X, FUN, ..., cl = NULL) pbapply(X, MARGIN, FUN, ..., cl = NULL) pbsapply(X, FUN, ..., simplify = TRUE, USE.NAMES = TRUE, cl = NULL) pbreplicate(n, expr, simplify = "array", cl = NULL) pbmapply(FUN, ..., MoreArgs = NULL, SIMPLIFY = TRUE, USE.NAMES = TRUE) pbtapply(X, INDEX, FUN = NULL, ..., default = NA, simplify = TRUE, cl = NULL)
X |
For |
MARGIN |
A vector giving the subscripts which the function will be applied over.
|
FUN |
The function to be applied to each element of |
... |
Optional arguments to |
simplify, SIMPLIFY |
Logical; should the result be simplified to a vector or matrix if possible?
|
USE.NAMES |
Logical; if |
n |
Number of replications. |
expr |
Expression (language object, usually a call) to evaluate repeatedly. |
cl |
A cluster object created by |
MoreArgs |
a list of other arguments to |
INDEX |
a |
default |
(only in the case of simplification to an array) the value with which the array
is initialized as |
The behaviour of the progress bar is controlled by the option
type
in pboptions
,
it can take values c("txt", "win", "tk", "none",)
on Windows,
and c("txt", "tk", "none",)
on Unix systems.
Other options have elements that are arguments used in the functions
timerProgressBar
, txtProgressBar
,
and tkProgressBar
.
See pboptions
for how to conveniently set these.
Parallel processing can be enabled through the cl
argument.
parLapply
is called when cl
is a 'cluster' object,
mclapply
is called when cl
is an integer.
Showing the progress bar increases the communication overhead
between the main process and nodes / child processes compared to the
parallel equivalents of the functions without the progress bar.
The functions fall back to their original equivalents when the progress bar is
disabled (i.e. getOption("pboptions")$type == "none"
or dopb()
is
FALSE
). This is the default when interactive()
if FALSE
(i.e. called from command line R script).
When doing parallel processing, other objects might need to pushed to the workers, and random numbers must be handled with care (see Examples).
Updating the progress bar with mclapply
can be slightly slower compared to using a Fork cluster
(i.e. calling makeForkCluster
).
Care must be taken to set appropriate random numbers in this case.
Similar to the value returned by the standard *apply
functions.
A progress bar is showed as a side effect.
Progress bar can add an overhead to the computation.
Peter Solymos <solymos@ualberta.ca>
Progress bars used in the functions:
txtProgressBar
,
tkProgressBar
,
timerProgressBar
Setting the options: pboptions
## --- simple linear model simulation --- set.seed(1234) n <- 200 x <- rnorm(n) y <- rnorm(n, crossprod(t(model.matrix(~ x)), c(0, 1)), sd = 0.5) d <- data.frame(y, x) ## model fitting and bootstrap mod <- lm(y ~ x, d) ndat <- model.frame(mod) B <- 100 bid <- sapply(1:B, function(i) sample(nrow(ndat), nrow(ndat), TRUE)) fun <- function(z) { if (missing(z)) z <- sample(nrow(ndat), nrow(ndat), TRUE) coef(lm(mod$call$formula, data=ndat[z,])) } ## standard '*apply' functions system.time(res1 <- lapply(1:B, function(i) fun(bid[,i]))) system.time(res2 <- sapply(1:B, function(i) fun(bid[,i]))) system.time(res3 <- apply(bid, 2, fun)) system.time(res4 <- replicate(B, fun())) ## 'pb*apply' functions ## try different settings: ## "none", "txt", "tk", "win", "timer" op <- pboptions(type = "timer") # default system.time(res1pb <- pblapply(1:B, function(i) fun(bid[,i]))) pboptions(op) pboptions(type = "txt") system.time(res2pb <- pbsapply(1:B, function(i) fun(bid[,i]))) pboptions(op) pboptions(type = "txt", style = 1, char = "=") system.time(res3pb <- pbapply(bid, 2, fun)) pboptions(op) pboptions(type = "txt", char = ":") system.time(res4pb <- pbreplicate(B, fun())) pboptions(op) ## Not run: ## parallel evaluation using the parallel package ## (n = 2000 and B = 1000 will give visible timing differences) library(parallel) cl <- makeCluster(2L) clusterExport(cl, c("fun", "mod", "ndat", "bid")) ## parallel with no progress bar: snow type cluster ## (RNG is set in the main process to define the object bid) system.time(res1cl <- parLapply(cl = cl, 1:B, function(i) fun(bid[,i]))) system.time(res2cl <- parSapply(cl = cl, 1:B, function(i) fun(bid[,i]))) system.time(res3cl <- parApply(cl, bid, 2, fun)) ## parallel with progress bar: snow type cluster ## (RNG is set in the main process to define the object bid) system.time(res1pbcl <- pblapply(1:B, function(i) fun(bid[,i]), cl = cl)) system.time(res2pbcl <- pbsapply(1:B, function(i) fun(bid[,i]), cl = cl)) ## (RNG needs to be set when not using bid) parallel::clusterSetRNGStream(cl, iseed = 0L) system.time(res4pbcl <- pbreplicate(B, fun(), cl = cl)) system.time(res3pbcl <- pbapply(bid, 2, fun, cl = cl)) stopCluster(cl) if (.Platform$OS.type != "windows") { ## parallel with no progress bar: multicore type forking ## (mc.set.seed = TRUE in parallel::mclapply by default) system.time(res2mc <- mclapply(1:B, function(i) fun(bid[,i]), mc.cores = 2L)) ## parallel with progress bar: multicore type forking ## (mc.set.seed = TRUE in parallel::mclapply by default) system.time(res1pbmc <- pblapply(1:B, function(i) fun(bid[,i]), cl = 2L)) system.time(res2pbmc <- pbsapply(1:B, function(i) fun(bid[,i]), cl = 2L)) system.time(res4pbmc <- pbreplicate(B, fun(), cl = 2L)) } ## End(Not run) ## --- Examples taken from standard '*apply' functions --- ## --- sapply, lapply, and replicate --- require(stats); require(graphics) x <- list(a = 1:10, beta = exp(-3:3), logic = c(TRUE,FALSE,FALSE,TRUE)) # compute the list mean for each list element pblapply(x, mean) # median and quartiles for each list element pblapply(x, quantile, probs = 1:3/4) pbsapply(x, quantile) i39 <- sapply(3:9, seq) # list of vectors pbsapply(i39, fivenum) ## sapply(*, "array") -- artificial example (v <- structure(10*(5:8), names = LETTERS[1:4])) f2 <- function(x, y) outer(rep(x, length.out = 3), y) (a2 <- pbsapply(v, f2, y = 2*(1:5), simplify = "array")) hist(pbreplicate(100, mean(rexp(10)))) ## use of replicate() with parameters: foo <- function(x = 1, y = 2) c(x, y) # does not work: bar <- function(n, ...) replicate(n, foo(...)) bar <- function(n, x) pbreplicate(n, foo(x = x)) bar(5, x = 3) ## --- apply --- ## Compute row and column sums for a matrix: x <- cbind(x1 = 3, x2 = c(4:1, 2:5)) dimnames(x)[[1]] <- letters[1:8] pbapply(x, 2, mean, trim = .2) col.sums <- pbapply(x, 2, sum) row.sums <- pbapply(x, 1, sum) rbind(cbind(x, Rtot = row.sums), Ctot = c(col.sums, sum(col.sums))) stopifnot( pbapply(x, 2, is.vector)) ## Sort the columns of a matrix pbapply(x, 2, sort) ## keeping named dimnames names(dimnames(x)) <- c("row", "col") x3 <- array(x, dim = c(dim(x),3), dimnames = c(dimnames(x), list(C = paste0("cop.",1:3)))) identical(x, pbapply( x, 2, identity)) identical(x3, pbapply(x3, 2:3, identity)) ##- function with extra args: cave <- function(x, c1, c2) c(mean(x[c1]), mean(x[c2])) pbapply(x, 1, cave, c1 = "x1", c2 = c("x1","x2")) ma <- matrix(c(1:4, 1, 6:8), nrow = 2) ma pbapply(ma, 1, table) #--> a list of length 2 pbapply(ma, 1, stats::quantile) # 5 x n matrix with rownames stopifnot(dim(ma) == dim(pbapply(ma, 1:2, sum))) ## Example with different lengths for each call z <- array(1:24, dim = 2:4) zseq <- pbapply(z, 1:2, function(x) seq_len(max(x))) zseq ## a 2 x 3 matrix typeof(zseq) ## list dim(zseq) ## 2 3 zseq[1,] pbapply(z, 3, function(x) seq_len(max(x))) # a list without a dim attribute ## --- mapply --- pbmapply(rep, 1:4, 4:1) pbmapply(rep, times = 1:4, x = 4:1) pbmapply(rep, times = 1:4, MoreArgs = list(x = 42)) pbmapply(function(x, y) seq_len(x) + y, c(a = 1, b = 2, c = 3), # names from first c(A = 10, B = 0, C = -10)) word <- function(C, k) paste(rep.int(C, k), collapse = "") utils::str(pbmapply(word, LETTERS[1:6], 6:1, SIMPLIFY = FALSE)) ## --- tapply --- require(stats) groups <- as.factor(rbinom(32, n = 5, prob = 0.4)) pbtapply(groups, groups, length) #- is almost the same as table(groups) ## contingency table from data.frame : array with named dimnames pbtapply(warpbreaks$breaks, warpbreaks[,-1], sum) pbtapply(warpbreaks$breaks, warpbreaks[, 3, drop = FALSE], sum) n <- 17; fac <- factor(rep_len(1:3, n), levels = 1:5) table(fac) pbtapply(1:n, fac, sum) pbtapply(1:n, fac, sum, default = 0) # maybe more desirable pbtapply(1:n, fac, sum, simplify = FALSE) pbtapply(1:n, fac, range) pbtapply(1:n, fac, quantile) pbtapply(1:n, fac, length) ## NA's pbtapply(1:n, fac, length, default = 0) # == table(fac) ## example of ... argument: find quarterly means pbtapply(presidents, cycle(presidents), mean, na.rm = TRUE) ind <- list(c(1, 2, 2), c("A", "A", "B")) table(ind) pbtapply(1:3, ind) #-> the split vector pbtapply(1:3, ind, sum) ## Some assertions (not held by all patch propsals): nq <- names(quantile(1:5)) stopifnot( identical(pbtapply(1:3, ind), c(1L, 2L, 4L)), identical(pbtapply(1:3, ind, sum), matrix(c(1L, 2L, NA, 3L), 2, dimnames = list(c("1", "2"), c("A", "B")))), identical(pbtapply(1:n, fac, quantile)[-1], array(list(`2` = structure(c(2, 5.75, 9.5, 13.25, 17), .Names = nq), `3` = structure(c(3, 6, 9, 12, 15), .Names = nq), `4` = NULL, `5` = NULL), dim=4, dimnames=list(as.character(2:5)))))
Please choose more modern alternatives, such as Google Chrome or Mozilla Firefox.