diff --git a/CHANGELOG.md b/CHANGELOG.md index 10c76509186..1d5165828e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,10 @@ For more information about this file see also [Keep a Changelog](http://keepacha - Support for inspecting and plotting NetCDF output variables within the notebook workflow. - added support for soil temperature, relative humidity, soil moisture, and PPFD downscaling to `met_temporal_downscale.Gaussian_ensemble` - The PEcAn uncertainty analysis tutorial ("Demo 2") has been updated and reimplemented as a Quarto notebook at `documentation/tutorials/Demo_02_Uncertainty_Analysis/uncertainty.qmd`. (#3570) +- Added the shared `input_design` matrix, generated via + `runModule.run.write.configs()`/`generate_joint_ensemble_design()`, that keeps + parameter draws and sampled inputs aligned across `run.write.configs()`, + `write.ensemble.configs()`(#3535, #3634, #3677). ### Fixed diff --git a/base/workflow/R/run.write.configs.R b/base/workflow/R/run.write.configs.R index 18756b45559..31e0a388d29 100644 --- a/base/workflow/R/run.write.configs.R +++ b/base/workflow/R/run.write.configs.R @@ -8,7 +8,8 @@ #' #' @param settings a PEcAn settings list #' @param ensemble.size number of ensemble runs -#' @param input_design input indices for samples +#' @param input_design data frame containing the design matrix describing parameter and input indices, as +#' documented in \code{runModule.run.write.configs()}. #' @param write should the runs be written to the database? #' @param posterior.files Filenames for posteriors for drawing samples for ensemble and sensitivity #' analysis (e.g. post.distns.Rdata, or prior.distns.Rdata) @@ -28,6 +29,15 @@ run.write.configs <- function(settings, ensemble.size, input_design, write = TRUE, posterior.files = rep(NA, length(settings$pfts)), overwrite = TRUE) { + + # Validate that input_design matches ensemble.size + if (nrow(input_design) != ensemble.size) { + stop( + "input_design has ", nrow(input_design), " rows, but ensemble.size is ", + ensemble.size, ".The design matrix must have exactly one row for each run." + ) + } + ## Skip database connection if settings$database is NULL or write is False if (!isTRUE(write) && is.null(settings$database)) { PEcAn.logger::logger.info("Not writing this run to database, so database connection skipped") diff --git a/base/workflow/R/runModule.run.write.configs.R b/base/workflow/R/runModule.run.write.configs.R index 959cde2549c..ae6ef91a90c 100644 --- a/base/workflow/R/runModule.run.write.configs.R +++ b/base/workflow/R/runModule.run.write.configs.R @@ -2,7 +2,14 @@ #' #' @param settings a PEcAn Settings or MultiSettings object #' @param overwrite logical: Replace config files if they already exist? -#' @param input_design the input indices for samples +#' @param input_design data.frame design matrix linking parameter draws and any +#' sampled inputs across runs. Include a `param` column whose values select +#' rows from `trait.samples`/`ensemble.samples` plus optional columns named for +#' `settings$run$inputs` tags (e.g. `met`, `soil`) with index (i.e., row number) +#' into each input's `path` list. Provide at least one row per planned run +#' (median + all SA members and/or `ensemble.size`). Usually generated by +#' `generate_joint_ensemble_design()` but custom designs may be supplied. +#' If NULL, `generate_joint_ensemble_design()` will be called internally. #' @return A modified settings object, invisibly #' @importFrom dplyr %>% #' @export @@ -24,6 +31,13 @@ runModule.run.write.configs <- function(settings, ) input_design <- design_result$X } + + # Validate design matrix size for MultiSettings + if (!is.null(settings$ensemble$size) && nrow(input_design) != settings$ensemble$size) { + PEcAn.logger::logger.severe("Input_design has", nrow(input_design), "rows but settings$ensemble$size is", + settings$ensemble$size, ". Design matrix must have exactly one row per run.") + } + return(PEcAn.settings::papply(settings, runModule.run.write.configs, overwrite = FALSE, @@ -41,6 +55,13 @@ runModule.run.write.configs <- function(settings, ) input_design <- design_result$X } + + # Validate design matrix size for Settings + if (!is.null(settings$ensemble$size) && nrow(input_design) != settings$ensemble$size) { + PEcAn.logger::logger.severe("Input_design has", nrow(input_design), "rows but settings$ensemble$size is", + settings$ensemble$size, ". Design matrix must have exactly one row per run.") + } + ensemble_size <- nrow(input_design) diff --git a/base/workflow/man/run.write.configs.Rd b/base/workflow/man/run.write.configs.Rd index 84a33d2596c..e53c6924db0 100644 --- a/base/workflow/man/run.write.configs.Rd +++ b/base/workflow/man/run.write.configs.Rd @@ -18,7 +18,8 @@ run.write.configs( \item{ensemble.size}{number of ensemble runs} -\item{input_design}{input indices for samples} +\item{input_design}{data frame containing the design matrix describing parameter and input indices, as +documented in \code{runModule.run.write.configs()}.} \item{write}{should the runs be written to the database?} diff --git a/base/workflow/man/runModule.run.write.configs.Rd b/base/workflow/man/runModule.run.write.configs.Rd index 9b3341d67c6..3ab40fc4b2c 100644 --- a/base/workflow/man/runModule.run.write.configs.Rd +++ b/base/workflow/man/runModule.run.write.configs.Rd @@ -11,7 +11,14 @@ runModule.run.write.configs(settings, overwrite = TRUE, input_design = NULL) \item{overwrite}{logical: Replace config files if they already exist?} -\item{input_design}{the input indices for samples} +\item{input_design}{data.frame design matrix linking parameter draws and any + sampled inputs across runs. Include a `param` column whose values select + rows from `trait.samples`/`ensemble.samples` plus optional columns named for + `settings$run$inputs` tags (e.g. `met`, `soil`) with index (i.e., row number) + into each input's `path` list. Provide at least one row per planned run + (median + all SA members and/or `ensemble.size`). Usually generated by + `generate_joint_ensemble_design()` but custom designs may be supplied. +If NULL, `generate_joint_ensemble_design()` will be called internally.} } \value{ A modified settings object, invisibly diff --git a/book_source/03_topical_pages/03_pecan_xml.Rmd b/book_source/03_topical_pages/03_pecan_xml.Rmd index f99455cf365..86a94c321e4 100644 --- a/book_source/03_topical_pages/03_pecan_xml.Rmd +++ b/book_source/03_topical_pages/03_pecan_xml.Rmd @@ -636,6 +636,35 @@ This information is currently used by the following PEcAn workflow functions: - `PEcAn.::write.configs.` -- See [above](#pecan-write-configs) - `PEcAn.uncertainty::run.sensitivity.analysis` -- Executes the uncertainty analysis +#### Coordinating inputs with the `input_design` design matrix {#xml-input-design} + +Multi-site ensembles that sample over input files use an `input_design` +data.frame to keep parameter draws and input files aligned across runs. The +design is created up front (typically via `generate_joint_ensemble_design()`) +and passed to `runModule.run.write.configs()`. It is not saved automatically to +`samples.Rdata`, so keep your copy if you need to reuse it. + +- **Parameter column:** `param` gives the index (i.e. row number) of the +posterior draw to use for this run. For example, `param = 5` means use the 5th +parameter sample from `samples.Rdata`. +- **Input columns:** any name that matches a tag under `run/inputs` (for +example `met`, `soil`, `veg`, `poolinitcond`). Values are indices into that +input’s `path` list. Leaving a column out keeps that input fixed across runs. +- **Row count and order:** must include exactly one row per run. For ensembles + this means `ensemble.size` rows. + +Example layout (CSV or `data.frame`): + +| param | met | soil | +|------:|----:|-----:| +| 1 | 1 | 1 | +| 2 | 2 | 1 | +| 3 | 1 | 2 | +| 4 | 2 | 2 | + +In this example, run 2 would reuse the second parameter draw and also switch to +the second met driver while keeping the first soil file. + ### Parameter Data Assimilation {#xml-parameter-data-assimilation} The following tags can be used for parameter data assimilation. More detailed information can be found here: [Parameter Data Assimilation Documentation](#pda) diff --git a/modules/uncertainty/R/ensemble.R b/modules/uncertainty/R/ensemble.R index b2807eef320..6863135ea32 100644 --- a/modules/uncertainty/R/ensemble.R +++ b/modules/uncertainty/R/ensemble.R @@ -193,7 +193,11 @@ get.ensemble.samples <- function( ensemble.size, pft.samples, env.samples, ##' Given a pft.xml object, a list of lists as supplied by get.sa.samples, ##' a name to distinguish the output files, and the directory to place the files. ##' -##' @param input_design the input indices for samples +##' @param input_design design matrix describing sampled inputs (see +##' `run.write.configs()`). Columns named after `settings$run$inputs` tags give +##' 1-based indices into each input's `path` list and rows follow run order. +##' Requires `nrow(input_design) >= ensemble.size`; +##' extra rows are ignored. ##' @param ensemble.size size of ensemble ##' @param defaults pft ##' @param ensemble.samples list of lists supplied by \link{get.ensemble.samples} diff --git a/modules/uncertainty/man/write.ensemble.configs.Rd b/modules/uncertainty/man/write.ensemble.configs.Rd index 0545b691418..34decd212ce 100644 --- a/modules/uncertainty/man/write.ensemble.configs.Rd +++ b/modules/uncertainty/man/write.ensemble.configs.Rd @@ -18,7 +18,11 @@ write.ensemble.configs( ) } \arguments{ -\item{input_design}{the input indices for samples} +\item{input_design}{design matrix describing sampled inputs (see +`run.write.configs()`). Columns named after `settings$run$inputs` tags give +1-based indices into each input's `path` list and rows follow run order. +Requires `nrow(input_design) >= ensemble.size`; +extra rows are ignored.} \item{ensemble.size}{size of ensemble}