diff --git a/NEWS.md b/NEWS.md index e9e50e2d..8ce9c8c3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,17 @@ This is also now used in `runSimulation(..., packages)`, when applicable, to ensure package version pass sanity checks before code execution +- Fixed a bug where list-valued `generate`/`analyse` definitions failed on + parallel workers, terminating with `object 'GENERATE_FUNCTIONS' not found` + or `object 'ANALYSE_FUNCTIONS' not found`. When these arguments are lists the + original function lists are stashed in the package-internal environment, which + was not delivered to the workers: the socket/MPI path (`parallel = TRUE`) + exported `ANALYSE_FUNCTIONS` but not `GENERATE_FUNCTIONS`, and the + `parallel = "future"` path exported neither. The `future` worker function is + now wrapped to restore the captured function lists before each replication, + and `GENERATE_FUNCTIONS` is included in the socket cluster export. The serial + (`parallel = FALSE`) path was unaffected + - Fixed a bug where list-based (L'Ecuyer-CMRG) seeds were not applied on the serial (non-parallel) execution path, causing `runArraySimulation(..., iseed)` and `runSimulation(seed = )` to be non-reproducible when diff --git a/R/analysis.R b/R/analysis.R index 8a9bbadb..70e93341 100644 --- a/R/analysis.R +++ b/R/analysis.R @@ -53,6 +53,32 @@ Analysis <- function(Functions, condition, condition.row, replications, fixed_ob if(!is.null(seed)) set_seed(seed) iters <- 1L:replications p <- progressr::progressor(along = iters) + # When generate/analyse are supplied as lists the corresponding + # function lists are stashed in the package-internal .SIMDENV (see + # runSimulation()) and read back by combined_Generate()/combined_Analyses(). + # The SOCK/MPI path exports them to the workers via clusterExport(), but + # future workers start from a freshly loaded package whose .SIMDENV is + # empty. Carry the lists along with each future and restore them into the + # worker's .SIMDENV before the replication runs, so the lookup succeeds + # regardless of the chosen future plan. + if(!is.null(.SIMDENV$GENERATE_FUNCTIONS) || !is.null(.SIMDENV$ANALYSE_FUNCTIONS)){ + used_mainsim <- local({ + FUN <- used_mainsim + GENERATE_FUNCTIONS <- .SIMDENV$GENERATE_FUNCTIONS + ANALYSE_FUNCTIONS <- .SIMDENV$ANALYSE_FUNCTIONS + TRY_ALL_ANALYSE <- .SIMDENV$TRY_ALL_ANALYSE + function(...){ + SIMDENV <- utils::getFromNamespace('.SIMDENV', 'SimDesign') + if(!is.null(GENERATE_FUNCTIONS)) + SIMDENV$GENERATE_FUNCTIONS <- GENERATE_FUNCTIONS + if(!is.null(ANALYSE_FUNCTIONS)){ + SIMDENV$ANALYSE_FUNCTIONS <- ANALYSE_FUNCTIONS + SIMDENV$TRY_ALL_ANALYSE <- TRY_ALL_ANALYSE + } + FUN(...) + } + }) + } results <- try(future.apply::future_lapply(iters, used_mainsim, condition=condition, condition.row=condition.row, diff --git a/R/runSimulation.R b/R/runSimulation.R index fae7b4bf..ea83cc7d 100644 --- a/R/runSimulation.R +++ b/R/runSimulation.R @@ -1205,7 +1205,7 @@ runSimulation <- function(design, replications, generate, analyse, summarise, resume <- TRUE } if(!verbose) control$print_RAM <- FALSE - ANALYSE_FUNCTIONS <- TRY_ALL_ANALYSE <- NULL + GENERATE_FUNCTIONS <- ANALYSE_FUNCTIONS <- TRY_ALL_ANALYSE <- NULL if(is.character(parallel)){ useFuture <- tolower(parallel) == 'future' parallel <- TRUE @@ -1234,7 +1234,7 @@ runSimulation <- function(design, replications, generate, analyse, summarise, } else { for(i in 1L:length(generate)) generate[[i]] <- compiler::cmpfun(generate[[i]]) - .SIMDENV$GENERATE_FUNCTIONS <- generate + .SIMDENV$GENERATE_FUNCTIONS <- GENERATE_FUNCTIONS <- generate generate <- combined_Generate for(i in 1L:length(generate)){ char_functions <- deparse(substitute(.SIMDENV$GENERATE_FUNCTIONS[[i]])) @@ -1531,6 +1531,8 @@ runSimulation <- function(design, replications, generate, analyse, summarise, } if(!useFuture){ parallel::clusterExport(cl=cl, export_funs, envir = parent.frame(1L)) + if(!is.null(GENERATE_FUNCTIONS)) + parallel::clusterExport(cl=cl, "GENERATE_FUNCTIONS", envir = environment()) parallel::clusterExport(cl=cl, "ANALYSE_FUNCTIONS", envir = environment()) parallel::clusterExport(cl=cl, "TRY_ALL_ANALYSE", envir = environment()) if(!is.null(prepare)) diff --git a/tests/tests/test-01-core.R b/tests/tests/test-01-core.R index e4b20188..39ff4fda 100644 --- a/tests/tests/test-01-core.R +++ b/tests/tests/test-01-core.R @@ -166,6 +166,63 @@ test_that('SimDesign', { plan(sequential) detach("package:future") + # regression tests: list-valued generate/analyse must work on every backend. + # The function lists are stashed in the package-internal .SIMDENV and read + # back by combined_Generate()/combined_Analyses() on the workers. Two gaps + # caused fatal terminations: + # (1) future workers start from a freshly loaded package whose .SIMDENV is + # empty, so the lists must travel with the futures, otherwise + # "object 'ANALYSE_FUNCTIONS' not found" / "object 'GENERATE_FUNCTIONS' + # not found". + # (2) the SOCK/MPI path exported ANALYSE_FUNCTIONS but not + # GENERATE_FUNCTIONS, so a list-valued generate raised + # "object 'GENERATE_FUNCTIONS' not found" under parallel = TRUE. + listGen_a <- function(condition, fixed_objects) data.frame(y = rnorm(condition$n)) + listGen_b <- function(condition, fixed_objects) data.frame(y = rnorm(condition$n)) + listGenerate <- function(condition, fixed_objects) data.frame(y = rnorm(condition$n)) + listAnalyse_m <- function(condition, dat, fixed_objects) c(m = mean(dat$y)) + listAnalyse_s <- function(condition, dat, fixed_objects) c(s = sd(dat$y)) + listSummarise <- function(condition, results, fixed_objects) colMeans(results) + listDesign <- createDesign(n = c(30, 60)) + + # list-valued analyse + expected_cols <- c("mean.m", "sd.s") + run_alist_sim <- function(...) + runSimulation(listDesign, replications = 4, generate = listGenerate, + analyse = list(mean = listAnalyse_m, sd = listAnalyse_s), + summarise = listSummarise, verbose = FALSE, ...) + # list-valued generate + run_glist_sim <- function(...) + runSimulation(listDesign, replications = 4, + generate = list(G1 = listGen_a, G2 = listGen_b), + analyse = listAnalyse_m, summarise = listSummarise, + verbose = FALSE, ...) + + res <- run_alist_sim(parallel = FALSE) + expect_null(res$FATAL_TERMINATION) + expect_true(all(expected_cols %in% names(res))) + res <- run_glist_sim(parallel = FALSE) + expect_null(res$FATAL_TERMINATION) + expect_true("m" %in% names(res)) + + res <- run_alist_sim(parallel = TRUE, ncores = 2L) + expect_null(res$FATAL_TERMINATION) + expect_true(all(expected_cols %in% names(res))) + res <- run_glist_sim(parallel = TRUE, ncores = 2L) + expect_null(res$FATAL_TERMINATION) + expect_true("m" %in% names(res)) + + suppressPackageStartupMessages(suppressWarnings(library(future))) + plan(multisession, workers = 2L) + res <- run_alist_sim(parallel = "future") + expect_null(res$FATAL_TERMINATION) + expect_true(all(expected_cols %in% names(res))) + res <- run_glist_sim(parallel = "future") + expect_null(res$FATAL_TERMINATION) + expect_true("m" %in% names(res)) + plan(sequential) + detach("package:future") + Final <- runSimulation(Design, generate=mysim, analyse=mycompute, summarise=mycollect, replications = 2, parallel=FALSE, save=FALSE, verbose = FALSE, store_results = TRUE)