Skip to content
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ export(psislw)
export(relative_eff)
export(scrps)
export(sis)
export(srs_diff_est)
export(stacking_weights)
export(tis)
export(waic)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
* Added contribution section. by @VisruthSK in #286
* Update LOO uncertainty paper to use BA doi by @avehtari in #311
* Update documentation for `E_loo()` function by @avehtari in #312
* Export `srs_diff_est()` function by @vinniott and @avehtari in #340


# loo 2.8.0
Expand Down
90 changes: 83 additions & 7 deletions R/loo_subsample.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#' same length containing the posterior density and the approximation density
#' for the individual draws.
#'
#' @seealso [loo()], [psis()], [loo_compare()]
#' @seealso [loo()], [psis()], [loo_compare()], [srs_diff_est()]
#' @template loo-large-data-references
#'
#' @export loo_subsample loo_subsample.function
Expand Down Expand Up @@ -1166,12 +1166,88 @@ loo_subsample_estimation_diff_srs <- function(x) {
update_psis_loo_ss_estimates(x)
}

#' Difference estimation using SRS-WOR sampling (Magnusson et al., 2020)
#' @noRd
#' @param y_approx Approximated values of all observations.
#' @param y The values observed.
#' @param y_idx The index of `y` in `y_approx`.
#' @return A list with estimates.
#' Difference estimator with simple random sampling without replacement.
#'
#' The difference estimator `srs_diff()` estimates
#' the expectation \eqn{n E[y]} when we have \eqn{n} approximate values \eqn{\tilde{y}_i},
#' \eqn{i = 1, \ldots, n} and \eqn{m < n} accurate values \eqn{y_j}, \eqn{j \in \mathcal{S}},
#' where \eqn{m} is the subsample size and \eqn{\mathcal{S}} is
#' a simple random subsample without replacement. The original
#' approach is by Cochran (1977) and we follow the equations 7--9 by
#' Magnusson et al. (2020).
#'
#' @details In Magnusson et al. (2020) Eq (9) first row, the second `+` should
#' be a `-`; Supplementary Material Eq (6) has this correct.
#' As `srs_diff_est()` in the `loo` package is used for \eqn{n E[y]}, there is
#' a proportional difference of \eqn{1/n} compared to the paper.
#'
#' @param y_approx (numeric) `n` approximated values.
#' @param y (numeric) `m<n` subsampled values.
#' @param y_idx (integerish) The index of `y` in `y_approx`.
#'
#' @return A named list containing numeric values:
#' * `y_hat`: estimated mean of \eqn{y} (Eq 7),
#' * `v_y_hat`: variance of the mean estimate (Eq 8), and
#' * `hat_v_y`: estimated variance of \eqn{y} (Eq 9).
#'
#' @references
#' Magnusson, M., Riis Andersen, M., Jonasson, J. and Vehtari, A. (2020).
#' Leave-One-Out Cross-Validation for Model Comparison in Large Data.
#' In _Proceedings of the 23rd International Conference on Artificial
#' Intelligence and Statistics (AISTATS)_, PMLR 108:341-351.
#'
#' Cochran, W. G. (1977). _Sampling Techniques, 3rd Edition_. John Wiley.
#'
#' Cortez, P., Cerdeira, A.L., Almeida, F., Matos, T., & Reis, J. (2009).
#' Modeling wine preferences by data mining from physicochemical properties.
#' _Decis. Support Syst._, _47_, 547-553.
#'
#' @seealso [loo_subsample()]
#'
#' @examples
#' ### This example predicts wine quality (data from Cortez et al., 2009).
#' ## The code is commented out for easier installation of the package
#' ## because brm() takes two or three seconds to fit.
#' ## A log_lik_matrix is generated from a fit, then it is used for srs_diff_est().
#' # library(dplyr)
#' # library(brms)
#' # options(brms.backend = "cmdstanr")
#' # options(mc.cores = 4)
#' # library(loo)
#' #
#' # wine <- read.delim(root("winequality-red", "winequality-red.csv"), sep = ";") |>
#' # distinct()
#' #
#' # wine_scaled <- as.data.frame(scale(wine))
#' #
#' # fitos <- brm(ordered(quality) ~ .,
#' # family = cumulative("logit"),
#' # prior = prior(R2D2(mean_R2 = 1/3, prec_R2 = 3)),
#' # data = wine_scaled,
#' # seed = 1,
#' # silent = 2,
#' # refresh = 0)
#' #
#' # log_lik_matrix <- log_lik(fitos)
#' #
#' # N <- nrow(wine_scaled)
#' # Nsub <- 100
#' #
#' # # posterior log-score
#' # lpd <- elpd(log_lik_matrix)
#' # sum(lpd$pointwise[,"elpd"])
#' # # Use PSIS-LOO for subsample of Nsub randomly selected observations
#' # set.seed(1)
#' # idx <- sample(1:N, Nsub)
#' # elpd_loo_sub <- loo(log_lik_matrix[,idx])
#' # sum(elpd_loo_sub$pointwise[,"elpd_loo"]) / Nsub * N
#' #
#' # # Use difference estimator to combine fast result and subsampled accurate result
#' # loo:::srs_diff_est(lpd$pointwise[,"elpd"], elpd_loo_sub$pointwise[,"elpd_loo"], idx)
#' #
#' # # Comparison to using PSIS-LOO for all observations
#' # loo(log_lik_matrix)
#' @export
srs_diff_est <- function(y_approx, y, y_idx) {
checkmate::assert_numeric(y_approx)
checkmate::assert_numeric(y, max.len = length(y_approx))
Expand Down
2 changes: 1 addition & 1 deletion man/loo_subsample.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

97 changes: 97 additions & 0 deletions man/srs_diff_est.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading