From 15b74f51474156ed4f57a646314738a8d380ae5a Mon Sep 17 00:00:00 2001 From: Jason Law Date: Tue, 18 Jun 2024 17:03:53 -0700 Subject: [PATCH 1/2] Add methods for formula interface and tidy (tidybayes / broom). --- R/interface.R | 175 +++++++++++++++++++++++++++++++++++++++++++++++ R/tidy-methods.R | 27 ++++++++ 2 files changed, 202 insertions(+) create mode 100644 R/interface.R create mode 100644 R/tidy-methods.R diff --git a/R/interface.R b/R/interface.R new file mode 100644 index 0000000..9e0f584 --- /dev/null +++ b/R/interface.R @@ -0,0 +1,175 @@ +#' @export +spatial_gev <- function(x, ...){ + UseMethod("spatial_gev") +} + +#' @export +spatial_gev.formula <- function( + formula, data, coordinates = ~ x + y, + method = c("laplace", "maxsmooth"), + kernel = c("spde", "matern", "exp"), + reparam_s = c("zero", "unconstrained", "negative", "positive"), + prior = list(), + control = list(), + init = NULL +){ + method <- match.arg(method) + kernel <- match.arg(kernel) + + formula <- Formula::Formula(formula) + formula <- expand_dot(formula) + flen <- length(formula) + stopifnot(identical(flen[1], 1L), flen[2] <= 3 & flen[2]) + + lhs <- rlang::f_lhs(formula) + data <- tidyr::chop(data, lhs) # make data nloc x nvar with a list response + + Xmats <- regression_matrix(formula, data) + + ret <- list( + data = get(lhs, data), + locs = model.frame(coordinates, data), + random = switch(flen[2], "a", "ab", "abc"), + method = method, + reparam_s = reparam_s, + kernel = kernel, + formula = formula, + control = set_control(control), + prior = set_prior(prior) + ) + if(is.null(init)){ + ret$init_param <- init_params( + x = ret$data, + random = Filter(Negate(is.null), lapply(Xmats, ncol)), + kernel = ret$kernel, + use_fgev = TRUE + ) + } + ret <- c(ret, setNames(Xmats, paste0("X_", names(Xmats)))) + mycall <- rlang::call2("spatialGEV_fit", !!!ret[c(1:6,9:12)], !!!ret$control, !!!ret$prior, .ns = "SpatialGEV") + fit <- eval(mycall) + attr(fit, "formula") <- ret$formula + attr(fit, "call") <- mycall + fit +} + +set_prior <- function(prior){ + ret <- list( + nu = 1, + s_prior = NULL, + beta_prior = NULL, + matern_pc_prior = NULL + ) + nms <- names(ret) + nms_set <- names(prior) + ret[nms_set] <- prior + if (length(noNms <- nms_set[!nms_set %in% nms])){ + stop("Unknown control parameters passed: ", paste(noNms, collapse = ", ")) + } + ret +} + +set_control <- function(control){ + ret <- list( + return_levels = 0, + get_return_levels_cov = TRUE, + sp_thres = -1, + adfun_only = FALSE, + ignore_random = FALSE, + get_hessian = TRUE, + silent = TRUE, + mesh_extra_init = list( + a = 0, + log_b = -1, + s = 0.001 + ) + ) + nms <- names(ret) + nms_set <- names(control) + ret[nms_set] <- control + if (length(noNms <- nms_set[!nms_set %in% nms])){ + stop("Unknown control parameters passed: ", paste(noNms, collapse = ", ")) + } + ret +} + +#' @export +init_params <- function(x, random, kernel, use_fgev = TRUE){ + # For each parameter need: + # list("a", "log_b", "s") where length is 1 when not random and nloc when random + # For each random parameter above, need + hyper <- if(identical(kernel, "exp")){ + c("log_sigma", "log_ell") + } else { + c("log_sigma", "log_kappa") + } + random_nms <- names(random) + betas <- paste0("beta_", random_nms) + hyper <- paste0(hyper, "_", rep(random_nms, each = 2)) + + n_loc <- length(x) + x <- unlist(x) + if(use_fgev){ + start <- evd::fgev(x) + start <- unname(mvtnorm::rmvnorm(1, start$estimate, vcov(start))[1, , drop = TRUE]) + } else { + m <- mean(x) + s <- sqrt(6 * var(x)) / pi + start <- c(m - 0.57722 * s, s, 1e-8) + } + + params <- c( + list( + a = rep(start[1], n_loc), + log_b = log(start[2]), + s = start[3] + ), + as.list(setNames(rep(0, length(betas)), betas)), + as.list(setNames(rep(0, length(hyper)), hyper)) + ) + params$beta_a <- rep(params$beta_a, random$a) + if(is.element("b", random_nms)){ + params$log_b <- rep(params$log_b, n_loc) + params$beta_b <- rep(params$beta_b, random$b) + } + if(is.element("s", random_nms)){ + params$s <- rep(params$s, n_loc) + params$beta_s <- rep(params$beta_s, random$s) + } + params +} + +# Formula parsing helpsers ----------------------------------------------------- + +expand_dot <- function(x){ + ret <- attr(terms(x, dot = "previous"), "Formula_without_dot") + ret <- if(is.null(ret)) x else ret + ret +} + +#' Extract design matrix from all RHS random effect terms +#' +#' Extract the design matrix from the data.frame for all RHS random effect terms. +#' For the spatial GEV model here, any RHS terms should consist solely of a single +#' RE term, optionally with regression terms. +regression_matrix <- function(x, data){ + f <- \(i){ + ret <- formula(x, lhs = 0, rhs = i) + ret <- get_design_formula(ret)[[1]] #Only allow one RE term per rhs + model.matrix(ret, data) + } + ret <- setNames(rep(list(NULL), 3), c('a', 'b', 's')) + n <- length(x)[2] + ret[1:n] <- lapply(1:n, f) + ret +} + +#' Extract the regression matrix component of a lme style random effect term +#' i.e., the dots in ( ... | group). Returns a list of formulas with as many values as there +#' are RE terms in the formula. +get_design_formula <- function(x){ + bar <- lme4::findbars(x) + design <- lapply(bar, rlang::call_args) + lapply(design, \(x) eval(call("~", x[[1]]))) +} + diff --git a/R/tidy-methods.R b/R/tidy-methods.R new file mode 100644 index 0000000..0fb764c --- /dev/null +++ b/R/tidy-methods.R @@ -0,0 +1,27 @@ +#' @export +#' @importFrom posterior as_draws_df +as_draws_df.spatialGEVsam <- function(x, ...){ + foo <- function(x){ + matches <- str_match(x, '([abs])([0-9]{1,})') + new <- str_c(matches[,2], "[", matches[,3], "]") + ifelse(is.na(new), x, new) + } + colnames(x$parameter_draws) <- foo(colnames(x$parameter_draws)) + as_draws_df(x$parameter_draws) +} + +#' @export +#' @importFrom broom tidy +tidy.spatialGEVfit <- function(x, effects = c("fixed", "random")){ + effects <- match.arg(effects) + model_summary <- summary(x$report, select = effects) + ret <- as_tibble(model_summary, rownames = "term") + names(ret) <- c("term", "estimate", "std.error", "statistic", "p.value") + if(identical(effects, "random")){ + ret <- ret |> + mutate( + term = sprintf("%s[%s]", term, rownames(x$locs_obs)) + ) + } + ret +} From f85e5ad2c51dc4bddb3c543a520cf2feb9b0607e Mon Sep 17 00:00:00 2001 From: Jason Law Date: Tue, 18 Jun 2024 17:04:19 -0700 Subject: [PATCH 2/2] roxygenize --- .Rbuildignore | 2 ++ .gitignore | 2 +- DESCRIPTION | 7 +++++-- NAMESPACE | 7 +++++++ man/SpatialGEV-package.Rd | 10 +++++++++- man/get_design_formula.Rd | 15 +++++++++++++++ man/regression_matrix.Rd | 13 +++++++++++++ 7 files changed, 52 insertions(+), 4 deletions(-) create mode 100644 man/get_design_formula.Rd create mode 100644 man/regression_matrix.Rd diff --git a/.Rbuildignore b/.Rbuildignore index 5323071..1499958 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -9,3 +9,5 @@ ^examples$ vignettes/SpatialGEV-vignette.Rmd.orig vignettes/rebuild_vignette.R +^.*\.Rproj$ +^\.Rproj\.user$ diff --git a/.gitignore b/.gitignore index 0c0b068..2d8e1e4 100644 --- a/.gitignore +++ b/.gitignore @@ -7,10 +7,10 @@ *.dll *.aux .DS_store - # swap [._]*.s[a-w][a-z] [._]s[a-w][a-z] *~ /doc/ /Meta/ +SpatialGEV.Rproj diff --git a/DESCRIPTION b/DESCRIPTION index 6f5f8b4..2d11c3b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,10 +27,13 @@ Imports: evd, stats, Matrix, - methods + methods, + rlang, + tidyr, + Formula LinkingTo: TMB, RcppEigen -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 Roxygen: list(markdown = TRUE) Suggests: INLA, diff --git a/NAMESPACE b/NAMESPACE index 1015fd9..9229712 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,12 +1,16 @@ # Generated by roxygen2: do not edit by hand +S3method(as_draws_df,spatialGEVsam) S3method(print,spatialGEVfit) S3method(print,spatialGEVpred) S3method(print,spatialGEVsam) +S3method(spatial_gev,formula) S3method(summary,spatialGEVfit) S3method(summary,spatialGEVpred) S3method(summary,spatialGEVsam) +S3method(tidy,spatialGEVfit) export(grid_location) +export(init_params) export(kernel_exp) export(kernel_matern) export(matern_pc_prior) @@ -15,10 +19,13 @@ export(spatialGEV_fit) export(spatialGEV_model) export(spatialGEV_predict) export(spatialGEV_sample) +export(spatial_gev) import(evd) import(mvtnorm) +importFrom(broom,tidy) importFrom(methods,as) importFrom(methods,is) +importFrom(posterior,as_draws_df) importFrom(stats,dist) importFrom(stats,nlminb) importFrom(stats,quantile) diff --git a/man/SpatialGEV-package.Rd b/man/SpatialGEV-package.Rd index bc12ea1..99406b5 100644 --- a/man/SpatialGEV-package.Rd +++ b/man/SpatialGEV-package.Rd @@ -6,7 +6,15 @@ \alias{SpatialGEV-package} \title{SpatialGEV: Fit Spatial Generalized Extreme Value Models} \description{ -Fit latent variable models with the GEV distribution as the data likelihood and the GEV parameters following latent Gaussian processes. The models in this package are built using the template model builder 'TMB' in R, which has the fast ability to integrate out the latent variables using Laplace approximation. This package allows the users to choose in the fit function which GEV parameter(s) is considered as a spatially varying random effect following a Gaussian process, so the users can fit spatial GEV models with different complexities to their dataset without having to write the models in 'TMB' by themselves. This package also offers methods to sample from both fixed and random effects posteriors as well as the posterior predictive distributions at different spatial locations. Methods for fitting this class of models are described in Chen, Ramezan, and Lysy (2021) \href{https://arxiv.org/abs/2110.07051}{arXiv:2110.07051}. +Fit latent variable models with the GEV distribution as the data likelihood and the GEV parameters following latent Gaussian processes. The models in this package are built using the template model builder 'TMB' in R, which has the fast ability to integrate out the latent variables using Laplace approximation. This package allows the users to choose in the fit function which GEV parameter(s) is considered as a spatially varying random effect following a Gaussian process, so the users can fit spatial GEV models with different complexities to their dataset without having to write the models in 'TMB' by themselves. This package also offers methods to sample from both fixed and random effects posteriors as well as the posterior predictive distributions at different spatial locations. Methods for fitting this class of models are described in Chen, Ramezan, and Lysy (2024) \doi{10.48550/arXiv.2110.07051}. +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://github.com/meixichen/SpatialGEV} + \item Report bugs at \url{https://github.com/meixichen/SpatialGEV/issues} +} + } \author{ \strong{Maintainer}: Meixi Chen \email{meixi.chen@uwaterloo.ca} diff --git a/man/get_design_formula.Rd b/man/get_design_formula.Rd new file mode 100644 index 0000000..d80c7a9 --- /dev/null +++ b/man/get_design_formula.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/interface.R +\name{get_design_formula} +\alias{get_design_formula} +\title{Extract the regression matrix component of a lme style random effect term +i.e., the dots in ( ... | group). Returns a list of formulas with as many values as there +are RE terms in the formula.} +\usage{ +get_design_formula(x) +} +\description{ +Extract the regression matrix component of a lme style random effect term +i.e., the dots in ( ... | group). Returns a list of formulas with as many values as there +are RE terms in the formula. +} diff --git a/man/regression_matrix.Rd b/man/regression_matrix.Rd new file mode 100644 index 0000000..67930aa --- /dev/null +++ b/man/regression_matrix.Rd @@ -0,0 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/interface.R +\name{regression_matrix} +\alias{regression_matrix} +\title{Extract design matrix from all RHS random effect terms} +\usage{ +regression_matrix(x, data) +} +\description{ +Extract the design matrix from the data.frame for all RHS random effect terms. +For the spatial GEV model here, any RHS terms should consist solely of a single +RE term, optionally with regression terms. +}