From a2db12074c0a9fd037db560bdf70bb9c701009f5 Mon Sep 17 00:00:00 2001 From: vinniott Date: Tue, 17 Mar 2026 19:58:14 +0100 Subject: [PATCH 01/13] set up documentation structure --- NAMESPACE | 1 + R/loo_subsample.R | 25 ++++++++++++++++++++++--- man/srs_diff_est.Rd | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 3 deletions(-) create mode 100644 man/srs_diff_est.Rd diff --git a/NAMESPACE b/NAMESPACE index 3405d737..05ffb3d1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -154,6 +154,7 @@ export(psislw) export(relative_eff) export(scrps) export(sis) +export(srs_diff_est) export(stacking_weights) export(tis) export(waic) diff --git a/R/loo_subsample.R b/R/loo_subsample.R index bcac4b17..1ba59ae9 100644 --- a/R/loo_subsample.R +++ b/R/loo_subsample.R @@ -1166,12 +1166,31 @@ loo_subsample_estimation_diff_srs <- function(x) { update_psis_loo_ss_estimates(x) } -#' Difference estimation using SRS-WOR sampling (Magnusson et al., 2020) -#' @noRd +#' @title Difference estimation using SRS-WOR sampling +#' +#' @description This paragraph describes the function. +#' +#' @return A list with estimates. Function 9 of Magnusson et al. (2020). +#' +#' @details +#' Implements Equations 7-9 of Magnusson et al. (2020). +#' +#' #' @param y_approx Approximated values of all observations. #' @param y The values observed. #' @param y_idx The index of `y` in `y_approx`. -#' @return A list with estimates. +#' +#' @references +#' Magnusson, M., Riis Andersen, M., Jonasson, J. and Vehtari, A. (2020). +#' Leave-One-Out Cross-Validation for Model Comparison in Large Data. +#' In _Proceedings of the 23rd International Conference on Artificial +#' Intelligence and Statistics (AISTATS)_, PMLR 108:341-351. +#' +#' @seealso [loo_subsample()] +#' @export +#' +#' @examples +#' print(42) srs_diff_est <- function(y_approx, y, y_idx) { checkmate::assert_numeric(y_approx) checkmate::assert_numeric(y, max.len = length(y_approx)) diff --git a/man/srs_diff_est.Rd b/man/srs_diff_est.Rd new file mode 100644 index 00000000..0fdc5ce7 --- /dev/null +++ b/man/srs_diff_est.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/loo_subsample.R +\name{srs_diff_est} +\alias{srs_diff_est} +\title{Difference estimation using SRS-WOR sampling} +\usage{ +srs_diff_est(y_approx, y, y_idx) +} +\arguments{ +\item{y_approx}{Approximated values of all observations.} + +\item{y}{The values observed.} + +\item{y_idx}{The index of \code{y} in \code{y_approx}.} +} +\value{ +A list with estimates. +} +\description{ +This paragraph describes the function. +} +\details{ +Implements Equations 7-9 of Magnusson et al. (2020). +} +\examples{ +print(42) +} +\references{ +Magnusson, M., Riis Andersen, M., Jonasson, J. and Vehtari, A. (2020). +Leave-One-Out Cross-Validation for Model Comparison in Large Data. +In \emph{Proceedings of the 23rd International Conference on Artificial +Intelligence and Statistics (AISTATS)}, PMLR 108:341-351. +} +\code{\link[=loo_subsample]{loo_subsample()}} +} From 5fe5c340bcf3dc2019f84079f47322782772c012 Mon Sep 17 00:00:00 2001 From: vinniott Date: Tue, 17 Mar 2026 20:01:34 +0100 Subject: [PATCH 02/13] srs_diff_est.Rd matches .R documentation --- man/srs_diff_est.Rd | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/man/srs_diff_est.Rd b/man/srs_diff_est.Rd index 0fdc5ce7..175c2356 100644 --- a/man/srs_diff_est.Rd +++ b/man/srs_diff_est.Rd @@ -14,7 +14,7 @@ srs_diff_est(y_approx, y, y_idx) \item{y_idx}{The index of \code{y} in \code{y_approx}.} } \value{ -A list with estimates. +A list with estimates. Function 9 of Magnusson et al. (2020). } \description{ This paragraph describes the function. @@ -31,5 +31,6 @@ Leave-One-Out Cross-Validation for Model Comparison in Large Data. In \emph{Proceedings of the 23rd International Conference on Artificial Intelligence and Statistics (AISTATS)}, PMLR 108:341-351. } +\seealso{ \code{\link[=loo_subsample]{loo_subsample()}} } From 6e8862d8f641d455ed29669382c70484472d9fac Mon Sep 17 00:00:00 2001 From: vinniott Date: Sun, 22 Mar 2026 11:53:51 +0100 Subject: [PATCH 03/13] added documentation as proposed by @avehtari in issue #333 --- R/loo_subsample.R | 27 +++++++++++++++++++-------- man/srs_diff_est.Rd | 28 ++++++++++++++++++++++------ 2 files changed, 41 insertions(+), 14 deletions(-) diff --git a/R/loo_subsample.R b/R/loo_subsample.R index 1ba59ae9..48b95e1b 100644 --- a/R/loo_subsample.R +++ b/R/loo_subsample.R @@ -1166,19 +1166,30 @@ loo_subsample_estimation_diff_srs <- function(x) { update_psis_loo_ss_estimates(x) } -#' @title Difference estimation using SRS-WOR sampling +#' Difference estimator with simple random sampling without replacement. #' -#' @description This paragraph describes the function. +#' The difference estimator `srs_diff()` estimates +#' the expectation $nE[y]$ when we have $n$ approximate values $\tilde{y}_i$, +#' $i=1,\ldots,n$ and $m Date: Sun, 22 Mar 2026 11:56:36 +0100 Subject: [PATCH 04/13] added @seealso at loo_subsample() --- R/loo_subsample.R | 2 +- man/loo_subsample.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/loo_subsample.R b/R/loo_subsample.R index 48b95e1b..9a1143d7 100644 --- a/R/loo_subsample.R +++ b/R/loo_subsample.R @@ -24,7 +24,7 @@ #' same length containing the posterior density and the approximation density #' for the individual draws. #' -#' @seealso [loo()], [psis()], [loo_compare()] +#' @seealso [loo()], [psis()], [loo_compare()], [srs_diff_est()] #' @template loo-large-data-references #' #' @export loo_subsample loo_subsample.function diff --git a/man/loo_subsample.Rd b/man/loo_subsample.Rd index 6f381db6..5889be1e 100644 --- a/man/loo_subsample.Rd +++ b/man/loo_subsample.Rd @@ -196,5 +196,5 @@ In \emph{Proceedings of the 23rd International Conference on Artificial Intelligence and Statistics (AISTATS)}, PMLR 108:341-351. } \seealso{ -\code{\link[=loo]{loo()}}, \code{\link[=psis]{psis()}}, \code{\link[=loo_compare]{loo_compare()}} +\code{\link[=loo]{loo()}}, \code{\link[=psis]{psis()}}, \code{\link[=loo_compare]{loo_compare()}}, \code{\link[=srs_diff_est]{srs_diff_est()}} } From 6f485ac5165e4041dbac49358ceae68d7dccdf30 Mon Sep 17 00:00:00 2001 From: vinniott Date: Sun, 22 Mar 2026 11:59:29 +0100 Subject: [PATCH 05/13] added reference Cochran (1977) --- R/loo_subsample.R | 2 ++ man/srs_diff_est.Rd | 2 ++ 2 files changed, 4 insertions(+) diff --git a/R/loo_subsample.R b/R/loo_subsample.R index 9a1143d7..60261e58 100644 --- a/R/loo_subsample.R +++ b/R/loo_subsample.R @@ -1197,6 +1197,8 @@ loo_subsample_estimation_diff_srs <- function(x) { #' In _Proceedings of the 23rd International Conference on Artificial #' Intelligence and Statistics (AISTATS)_, PMLR 108:341-351. #' +#' Cochran, W. G. (1977). _Sampling Techniques, 3rd Edition_. John Wiley. +#' #' @seealso [loo_subsample()] #' @export #' diff --git a/man/srs_diff_est.Rd b/man/srs_diff_est.Rd index 1f6843d5..a82b35e5 100644 --- a/man/srs_diff_est.Rd +++ b/man/srs_diff_est.Rd @@ -46,6 +46,8 @@ Magnusson, M., Riis Andersen, M., Jonasson, J. and Vehtari, A. (2020). Leave-One-Out Cross-Validation for Model Comparison in Large Data. In \emph{Proceedings of the 23rd International Conference on Artificial Intelligence and Statistics (AISTATS)}, PMLR 108:341-351. + +Cochran, W. G. (1977). \emph{Sampling Techniques, 3rd Edition}. John Wiley. } \seealso{ \code{\link[=loo_subsample]{loo_subsample()}} From 67275cf92ae6cbb103e72ec0379326617787762c Mon Sep 17 00:00:00 2001 From: vinniott Date: Sun, 22 Mar 2026 12:01:20 +0100 Subject: [PATCH 06/13] removed oudated @return duplicate --- R/loo_subsample.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/loo_subsample.R b/R/loo_subsample.R index 60261e58..788e4bb2 100644 --- a/R/loo_subsample.R +++ b/R/loo_subsample.R @@ -1176,8 +1176,6 @@ loo_subsample_estimation_diff_srs <- function(x) { #' approach is by Cochran (1977) and we follow the equations 7--9 by #' Magnusson et al. (2020). #' -#' @return A list with estimates. Function 9 of Magnusson et al. (2020). -#' #' @details Magnusson et al. (2020) Eq (9) first row second `+` should #' be `-`. Supplementary material Eq (6) has this correct. #' As `srs_diff_est()` in `loo` package is used for $nE[y]$, there is @@ -1186,6 +1184,7 @@ loo_subsample_estimation_diff_srs <- function(x) { #' @param y_approx (numeric) `n` approximated values. #' @param y (numeric) `m Date: Sun, 22 Mar 2026 12:14:14 +0100 Subject: [PATCH 07/13] corrected .R formulas to render in .Rd --- R/loo_subsample.R | 18 +++++++++--------- man/srs_diff_est.Rd | 20 +++++++++----------- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/R/loo_subsample.R b/R/loo_subsample.R index 788e4bb2..1805cd36 100644 --- a/R/loo_subsample.R +++ b/R/loo_subsample.R @@ -1169,26 +1169,26 @@ loo_subsample_estimation_diff_srs <- function(x) { #' Difference estimator with simple random sampling without replacement. #' #' The difference estimator `srs_diff()` estimates -#' the expectation $nE[y]$ when we have $n$ approximate values $\tilde{y}_i$, -#' $i=1,\ldots,n$ and $m Date: Sun, 22 Mar 2026 12:15:25 +0100 Subject: [PATCH 08/13] removed example placeholder --- R/loo_subsample.R | 3 --- 1 file changed, 3 deletions(-) diff --git a/R/loo_subsample.R b/R/loo_subsample.R index 1805cd36..24e042a9 100644 --- a/R/loo_subsample.R +++ b/R/loo_subsample.R @@ -1200,9 +1200,6 @@ loo_subsample_estimation_diff_srs <- function(x) { #' #' @seealso [loo_subsample()] #' @export -#' -#' @examples -#' print(42) srs_diff_est <- function(y_approx, y, y_idx) { checkmate::assert_numeric(y_approx) checkmate::assert_numeric(y, max.len = length(y_approx)) From 9699240759b8bcb5fcbbfb241e13a322e13efc04 Mon Sep 17 00:00:00 2001 From: vinniott Date: Sun, 22 Mar 2026 12:17:03 +0100 Subject: [PATCH 09/13] updated .Rd to match .R --- man/srs_diff_est.Rd | 3 --- 1 file changed, 3 deletions(-) diff --git a/man/srs_diff_est.Rd b/man/srs_diff_est.Rd index 274a7e0e..6f49e9c5 100644 --- a/man/srs_diff_est.Rd +++ b/man/srs_diff_est.Rd @@ -36,9 +36,6 @@ be a \code{-}; Supplementary Material Eq (6) has this correct. As \code{srs_diff_est()} in the \code{loo} package is used for \eqn{n E[y]}, there is a proportional difference of \eqn{1/n} compared to the paper. } -\examples{ -print(42) -} \references{ Magnusson, M., Riis Andersen, M., Jonasson, J. and Vehtari, A. (2020). Leave-One-Out Cross-Validation for Model Comparison in Large Data. From c0a5447cc0f7f9f842804ed67fdeaadcc687e74b Mon Sep 17 00:00:00 2001 From: vinniott Date: Sun, 22 Mar 2026 12:23:51 +0100 Subject: [PATCH 10/13] Update NEWS.md --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 6f182376..30903c38 100644 --- a/NEWS.md +++ b/NEWS.md @@ -17,6 +17,7 @@ * Added contribution section. by @VisruthSK in #286 * Update LOO uncertainty paper to use BA doi by @avehtari in #311 * Update documentation for `E_loo()` function by @avehtari in #312 +* Export `srs_diff_est()` function by @vinniott in #340 # loo 2.8.0 From 94fb75799a419fabcb6cbfbeee54e45388579cc6 Mon Sep 17 00:00:00 2001 From: vinniott Date: Sun, 26 Apr 2026 11:00:39 +0200 Subject: [PATCH 11/13] added @examples placeholder --- R/loo_subsample.R | 5 +++++ man/srs_diff_est.Rd | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/R/loo_subsample.R b/R/loo_subsample.R index 24e042a9..0cb30f20 100644 --- a/R/loo_subsample.R +++ b/R/loo_subsample.R @@ -1199,6 +1199,11 @@ loo_subsample_estimation_diff_srs <- function(x) { #' Cochran, W. G. (1977). _Sampling Techniques, 3rd Edition_. John Wiley. #' #' @seealso [loo_subsample()] +#' +#' @examples +#' print("Hello, World!") +#' +#' #' @export srs_diff_est <- function(y_approx, y, y_idx) { checkmate::assert_numeric(y_approx) diff --git a/man/srs_diff_est.Rd b/man/srs_diff_est.Rd index 6f49e9c5..9dded25f 100644 --- a/man/srs_diff_est.Rd +++ b/man/srs_diff_est.Rd @@ -35,6 +35,11 @@ In Magnusson et al. (2020) Eq (9) first row, the second \code{+} should be a \code{-}; Supplementary Material Eq (6) has this correct. As \code{srs_diff_est()} in the \code{loo} package is used for \eqn{n E[y]}, there is a proportional difference of \eqn{1/n} compared to the paper. +} +\examples{ +print("Hello, World!") + + } \references{ Magnusson, M., Riis Andersen, M., Jonasson, J. and Vehtari, A. (2020). From 535f464f6105e1a359a1dad647774a75aa4560d4 Mon Sep 17 00:00:00 2001 From: vinniott Date: Sun, 26 Apr 2026 11:29:30 +0200 Subject: [PATCH 12/13] added generation of wine example --- R/loo_subsample.R | 29 ++++++++++++++++++++++++++++- man/srs_diff_est.Rd | 30 +++++++++++++++++++++++++++++- 2 files changed, 57 insertions(+), 2 deletions(-) diff --git a/R/loo_subsample.R b/R/loo_subsample.R index 0cb30f20..8473bb07 100644 --- a/R/loo_subsample.R +++ b/R/loo_subsample.R @@ -1198,10 +1198,37 @@ loo_subsample_estimation_diff_srs <- function(x) { #' #' Cochran, W. G. (1977). _Sampling Techniques, 3rd Edition_. John Wiley. #' +#' Cortez, P., Cerdeira, A.L., Almeida, F., Matos, T., & Reis, J. (2009). +#' Modeling wine preferences by data mining from physicochemical properties. +#' _Decis. Support Syst._, _47_, 547-553. +#' #' @seealso [loo_subsample()] #' #' @examples -#' print("Hello, World!") +#' ### This example predicts wine quality (data from Cortez et al., 2009). +#' ## First, commented out code shows to generate a loglik_matrix. +#' ## Second, running code illustrates how to use srs_diff_est(). +#' # library(dplyr) +#' # library(brms) +#' # options(brms.backend = "cmdstanr") +#' # options(mc.cores = 4) +#' # library(loo) +#' # +#' # wine <- read.delim(root("winequality-red", "winequality-red.csv"), sep = ";") |> +#' # distinct() +#' # +#' # wine_scaled <- as.data.frame(scale(wine)) +#' # +#' # fitos <- brm(ordered(quality) ~ ., +#' # family = cumulative("logit"), +#' # prior = prior(R2D2(mean_R2 = 1/3, prec_R2 = 3)), +#' # data = wine_scaled, +#' # seed = 1, +#' # silent = 2, +#' # refresh = 0) +#' # +#' # wine_loglik_matrix <- log_lik(fitos) +#' wine_loglik_matrix <- example_wine_loglik_matrix() # Installed with loo to save time of fitting model shown above #' #' #' @export diff --git a/man/srs_diff_est.Rd b/man/srs_diff_est.Rd index 9dded25f..c481d095 100644 --- a/man/srs_diff_est.Rd +++ b/man/srs_diff_est.Rd @@ -37,7 +37,31 @@ As \code{srs_diff_est()} in the \code{loo} package is used for \eqn{n E[y]}, the a proportional difference of \eqn{1/n} compared to the paper. } \examples{ -print("Hello, World!") +### This example predicts wine quality (data from Cortez et al., 2009). +## The following, commented out code shows how you to fit the model +## to generate the wine_log_lik_matrix. +## The matrix is part of loo as wine_log_lik_matrix() +# library(dplyr) +# library(brms) +# options(brms.backend = "cmdstanr") +# options(mc.cores = 4) +# library(loo) +# +# wine <- read.delim(root("winequality-red", "winequality-red.csv"), sep = ";") |> +# distinct() +# +# wine_scaled <- as.data.frame(scale(wine)) +# +# fitos <- brm(ordered(quality) ~ ., +# family = cumulative("logit"), +# prior = prior(R2D2(mean_R2 = 1/3, prec_R2 = 3)), +# data = wine_scaled, +# seed = 1, +# silent = 2, +# refresh = 0) +# +# wine_log_lik_matrix <- log_lik(fitos) +wine_log_lik_matrix <- "hi" } @@ -48,6 +72,10 @@ In \emph{Proceedings of the 23rd International Conference on Artificial Intelligence and Statistics (AISTATS)}, PMLR 108:341-351. Cochran, W. G. (1977). \emph{Sampling Techniques, 3rd Edition}. John Wiley. + +Cortez, P., Cerdeira, A.L., Almeida, F., Matos, T., & Reis, J. (2009). +Modeling wine preferences by data mining from physicochemical properties. +\emph{Decis. Support Syst.}, \emph{47}, 547-553. } \seealso{ \code{\link[=loo_subsample]{loo_subsample()}} From 30cc34e539c36d6a6f0df682016936a95657510e Mon Sep 17 00:00:00 2001 From: vinniott Date: Sun, 10 May 2026 15:39:00 +0200 Subject: [PATCH 13/13] added full example --- NEWS.md | 2 +- R/loo_subsample.R | 28 ++++++++++++++++++++++------ man/srs_diff_est.Rd | 29 ++++++++++++++++++++++------- 3 files changed, 45 insertions(+), 14 deletions(-) diff --git a/NEWS.md b/NEWS.md index 30903c38..8ebccfd6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -17,7 +17,7 @@ * Added contribution section. by @VisruthSK in #286 * Update LOO uncertainty paper to use BA doi by @avehtari in #311 * Update documentation for `E_loo()` function by @avehtari in #312 -* Export `srs_diff_est()` function by @vinniott in #340 +* Export `srs_diff_est()` function by @vinniott and @avehtari in #340 # loo 2.8.0 diff --git a/R/loo_subsample.R b/R/loo_subsample.R index 8473bb07..a5c44141 100644 --- a/R/loo_subsample.R +++ b/R/loo_subsample.R @@ -1206,8 +1206,9 @@ loo_subsample_estimation_diff_srs <- function(x) { #' #' @examples #' ### This example predicts wine quality (data from Cortez et al., 2009). -#' ## First, commented out code shows to generate a loglik_matrix. -#' ## Second, running code illustrates how to use srs_diff_est(). +#' ## The code is commented out for easier installation of the package +#' ## because brm() takes two or three seconds to fit. +#' ## A log_lik_matrix is generated from a fit, then it is used for srs_diff_est(). #' # library(dplyr) #' # library(brms) #' # options(brms.backend = "cmdstanr") @@ -1227,10 +1228,25 @@ loo_subsample_estimation_diff_srs <- function(x) { #' # silent = 2, #' # refresh = 0) #' # -#' # wine_loglik_matrix <- log_lik(fitos) -#' wine_loglik_matrix <- example_wine_loglik_matrix() # Installed with loo to save time of fitting model shown above -#' -#' +#' # log_lik_matrix <- log_lik(fitos) +#' # +#' # N <- nrow(wine_scaled) +#' # Nsub <- 100 +#' # +#' # # posterior log-score +#' # lpd <- elpd(log_lik_matrix) +#' # sum(lpd$pointwise[,"elpd"]) +#' # # Use PSIS-LOO for subsample of Nsub randomly selected observations +#' # set.seed(1) +#' # idx <- sample(1:N, Nsub) +#' # elpd_loo_sub <- loo(log_lik_matrix[,idx]) +#' # sum(elpd_loo_sub$pointwise[,"elpd_loo"]) / Nsub * N +#' # +#' # # Use difference estimator to combine fast result and subsampled accurate result +#' # loo:::srs_diff_est(lpd$pointwise[,"elpd"], elpd_loo_sub$pointwise[,"elpd_loo"], idx) +#' # +#' # # Comparison to using PSIS-LOO for all observations +#' # loo(log_lik_matrix) #' @export srs_diff_est <- function(y_approx, y, y_idx) { checkmate::assert_numeric(y_approx) diff --git a/man/srs_diff_est.Rd b/man/srs_diff_est.Rd index c481d095..72f8faf5 100644 --- a/man/srs_diff_est.Rd +++ b/man/srs_diff_est.Rd @@ -38,9 +38,9 @@ a proportional difference of \eqn{1/n} compared to the paper. } \examples{ ### This example predicts wine quality (data from Cortez et al., 2009). -## The following, commented out code shows how you to fit the model -## to generate the wine_log_lik_matrix. -## The matrix is part of loo as wine_log_lik_matrix() +## The code is commented out for easier installation of the package +## because brm() takes two or three seconds to fit. +## A log_lik_matrix is generated from a fit, then it is used for srs_diff_est(). # library(dplyr) # library(brms) # options(brms.backend = "cmdstanr") @@ -60,10 +60,25 @@ a proportional difference of \eqn{1/n} compared to the paper. # silent = 2, # refresh = 0) # -# wine_log_lik_matrix <- log_lik(fitos) -wine_log_lik_matrix <- "hi" - - +# log_lik_matrix <- log_lik(fitos) +# +# N <- nrow(wine_scaled) +# Nsub <- 100 +# +# # posterior log-score +# lpd <- elpd(log_lik_matrix) +# sum(lpd$pointwise[,"elpd"]) +# # Use PSIS-LOO for subsample of Nsub randomly selected observations +# set.seed(1) +# idx <- sample(1:N, Nsub) +# elpd_loo_sub <- loo(log_lik_matrix[,idx]) +# sum(elpd_loo_sub$pointwise[,"elpd_loo"]) / Nsub * N +# +# # Use difference estimator to combine fast result and subsampled accurate result +# loo:::srs_diff_est(lpd$pointwise[,"elpd"], elpd_loo_sub$pointwise[,"elpd_loo"], idx) +# +# # Comparison to using PSIS-LOO for all observations +# loo(log_lik_matrix) } \references{ Magnusson, M., Riis Andersen, M., Jonasson, J. and Vehtari, A. (2020).