From 0a5ac76cb1bd29acc26a977d934e50de67c0b994 Mon Sep 17 00:00:00 2001
From: Andree Valle Campos <avallecam@gmail.com>
Date: Thu, 24 Jul 2025 13:52:27 +0100
Subject: [PATCH 1/7] add helper functions for peak size new infections

---
 episodes/simulating-transmission.Rmd | 63 ++++++++++++++++++++++++----
 1 file changed, 54 insertions(+), 9 deletions(-)

diff --git a/episodes/simulating-transmission.Rmd b/episodes/simulating-transmission.Rmd
index 2763457a..ec58c761 100644
--- a/episodes/simulating-transmission.Rmd
+++ b/episodes/simulating-transmission.Rmd
@@ -454,7 +454,8 @@ output %>%
     aes(
       x = time,
       y = value,
-      colour = demography_group
+      color = demography_group,
+      linetype = compartment
     )
   ) +
   scale_y_continuous(
@@ -472,22 +473,66 @@ output %>%
 ::::::::::::::::::::::::::::::::::::: callout
 ### Time increments
 
-Note that there is a default argument of `increment = 1`. This relates to the time step of the ODE solver. When the parameters are specified on a daily time-scale and maximum number of time steps (`time_end`) is days, the default time step of the ODE solver one day. 
+Note that there is a default argument of `increment = 1`. This relates to the time step of the ODE solver. When the parameters are specified on a daily time scale and the maximum number of time steps (`time_end`) is days, the default time step of the ODE solver is one day. 
 
-The choice of increment will depend on the time scale of the parameters, and the rate at which events can occur. In general, the increment should be smaller than the fastest event that can occur. For example: 
+The choice of increment will depend on the time scale of the parameters and the rate at which events can occur. In general, the increment should be smaller than the fastest event that can occur. For example: 
 
-- if parameters are on a daily time scale, and all events are reported on a daily basis, then the increment should be equal to one day;
-- if parameters are on a monthly time scale, but some events will occur within a month, then the increment should be less than one month.
+- If parameters are on a *daily* time scale, and all events are reported on a daily basis, then the increment should be equal to one day;
+- If parameters are on a *monthly* time scale, but some events will occur within a month, then the increment should be less than one month.
 
 ::::::::::::::::::::::::::::::::::::::::::::::::
 
+:::::::::::::::: testimonial
+
+**Two helper functions in `{epidemics}`**
+
+Use `epidemics::epidemic_peak()` to get the time and size of a compartment's highest peak for all demographic groups. By default, this will calculate for the infectious compartment.
+
+```{r}
+epidemics::epidemic_peak(data = output)
+```
+
+Use `epidemics::epidemic_size()` to get the size of the epidemic at any stage between the start and the end. This is calculated as the number of individuals *recovered* from infection at that stage of the epidemic.
+
+```{r}
+epidemics::epidemic_size(data = output)
+```
+
+These summary functions can help you get outputs relevant to scenario comparisons or any other downstream analysis.
+
+::::::::::::::::
+
+The figure above shows the total number or cumulative amount of individuals in the infectious compartment at each time.
+If you want to show the *total burden* of the disease, the `infectious` compartment is the most appropriate.
+On the other hand, if you want to show the *daily burden*, then you could use `epidemics::new_infections()` to get the daily incidence.
+
+::::::::::::::::::::::: spoiler
+
+Notice that the number of new infected individuals at each time (as in the figure below) is lower than the cumulative number of infectious individuals at each time (as in the figure above).
+
+```{r}
+# New infections
+newinfections_bygroup <- epidemics::new_infections(data = output)
+
+# Visualise the spread of the epidemic in terms of new infections
+newinfections_bygroup %>%
+  ggplot(aes(time, new_infections, colour = demography_group)) +
+  geom_line() +
+  scale_y_continuous(
+    breaks = scales::breaks_pretty(n = 10),
+    labels = scales::comma
+  )
+```
+
+:::::::::::::::::::::::
+
 ## Accounting for uncertainty
 
 The epidemic model is [deterministic](../learners/reference.md#deterministic), which means it runs like clockwork: the same parameters will always lead to the same trajectory. A deterministic model is one where the outcome is completely determined by the initial conditions and parameters, with no random variation. However, reality is not so predictable. There are two main reasons for this: the transmission process can involve randomness, and we may not know the exact epidemiological characteristics of the pathogen we're interested in. In the next episode, we will consider 'stochastic' models (i.e. models where we can define the process that creates randomness in transmission). In the meantime, we can include uncertainty in the value of the parameters that go into the deterministic model. To account for this, we must run our model for different parameter combinations. 
 
-We ran our model with $R_0= 1.5$. However, we believe that $R_0$ follows a normal distribution with mean 1.5 and standard deviation 0.05. To account for uncertainty we will run the model for different values of $R_0$. The steps we will follow to do this are:
+We ran our model with $R_0= 1.5$. However, we believe that $R_0$ follows a normal distribution with mean 1.5 and standard deviation 0.05. To account for uncertainty, we will run the model for different values of $R_0$. The steps we will follow to do this are:
 
-1. Obtain 100 samples from the from a normal distribution
+1. Obtain 100 samples from a normal distribution
 
 ```{r normal, echo = TRUE}
 # specify the mean and standard deviation of R0
@@ -518,7 +563,7 @@ output_samples <- model_default(
 )
 ```
 
-3. Calculate the mean and 95% quantiles of number of infectious individuals across each model simulation and visualise output
+3. Calculate the mean and 95% quantiles of the number of infectious individuals across each model simulation and visualise the output
 
 ```{r  plot, fig.width = 10}
 output_samples %>%
@@ -548,7 +593,7 @@ output_samples %>%
 ```
 
 
-Deciding which parameters to include uncertainty in depends on a few factors: how well informed a parameter value is e.g. consistency of estimates from the literature; how sensitive model outputs are to parameter value changes; and the purpose of the modelling task. See [McCabe et al. 2021](https://doi.org/10.1016%2Fj.epidem.2021.100520) to learn about different types of uncertainty in infectious disease modelling.
+Deciding which parameters to include uncertainty in depends on a few factors: how well informed a parameter value is, e.g. consistency of estimates from the literature; how sensitive model outputs are to parameter value changes; and the purpose of the modelling task. See [McCabe et al. 2021](https://doi.org/10.1016%2Fj.epidem.2021.100520) to learn about different types of uncertainty in infectious disease modelling.
 
 :::::::::::::::::: challenge
 

From fa3de066096bb5be37d5f1be8690348685d242f5 Mon Sep 17 00:00:00 2001
From: Andree Valle Campos <avallecam@gmail.com>
Date: Thu, 24 Jul 2025 14:03:22 +0100
Subject: [PATCH 2/7] sync format with previous figure

---
 episodes/simulating-transmission.Rmd | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/episodes/simulating-transmission.Rmd b/episodes/simulating-transmission.Rmd
index ec58c761..75c476c5 100644
--- a/episodes/simulating-transmission.Rmd
+++ b/episodes/simulating-transmission.Rmd
@@ -516,12 +516,13 @@ newinfections_bygroup <- epidemics::new_infections(data = output)
 
 # Visualise the spread of the epidemic in terms of new infections
 newinfections_bygroup %>%
-  ggplot(aes(time, new_infections, colour = demography_group)) +
+  ggplot(aes(x = time, y = new_infections, colour = demography_group)) +
   geom_line() +
   scale_y_continuous(
-    breaks = scales::breaks_pretty(n = 10),
+    breaks = scales::breaks_pretty(n = 5),
     labels = scales::comma
-  )
+  ) +
+  theme_bw()
 ```
 
 :::::::::::::::::::::::

From 650d4f9d7170f138805f09c026ee75055e8b5914 Mon Sep 17 00:00:00 2001
From: Andree Valle Campos <avallecam@gmail.com>
Date: Thu, 24 Jul 2025 14:30:38 +0100
Subject: [PATCH 3/7] add new infections argument when vaccination

---
 episodes/modelling-interventions.Rmd | 43 ++++++++++++++++++++++++++--
 1 file changed, 41 insertions(+), 2 deletions(-)

diff --git a/episodes/modelling-interventions.Rmd b/episodes/modelling-interventions.Rmd
index a3e4691d..f6389618 100644
--- a/episodes/modelling-interventions.Rmd
+++ b/episodes/modelling-interventions.Rmd
@@ -378,7 +378,7 @@ vaccinate <- vaccination(
 )
 ```
 
-We pass our vaccination object into the model using argument `vaccination = vaccinate`:
+We pass our vaccination object into the model using the argument `vaccination = vaccinate`:
 
 ```{r output_vaccinate}
 output_vaccinate <- model_default(
@@ -434,12 +434,51 @@ output %>%
   )
 ```
 
-From the plot we see that the peak number of total number of infectious individuals when vaccination is in place is much lower compared to school closures and mask wearing interventions. 
+From the plot, we see that the peak number of total number of infectious individuals when vaccination is in place is much lower compared to school closures and mask-wearing interventions. 
 
 :::::::::::::::::::::::::::::::::
 ::::::::::::::::::::::::::::::::::::::::::::::::
 
+Lastly, if you are interested in plotting the trajectory of new infections from an `epidemics::model_default()` with a `vaccination` intervention, you need to specify one additional argument.
+At `epidemics::new_infections()` specify `compartments_from_susceptible = "vaccinated"` to name the model compartments into which individuals transition from the "susceptible" compartment, and which are not related to infection. In the scenario above,  "vaccinated" individuals who are no longer susceptible should not be counted as infected.
 
+::::::::::::::::::::: spoiler
+
+Notice that if we use `by_group = FALSE` in `epidemics::new_infections()`, we get a summary of the new infections in all demographic groups.
+Try keeping the default `by_group = TRUE` and adding `linetype = demography_group` when declaring variables in `ggplot(aes(...))`.
+
+```{r}
+infections_baseline <- epidemics::new_infections(
+  data = output_baseline,
+  compartments_from_susceptible = "vaccinated", # if vaccination
+  by_group = FALSE
+)
+
+infections_intervention <- epidemics::new_infections(
+  data = output_vaccinate,
+  compartments_from_susceptible = "vaccinated", # if vaccination
+  by_group = FALSE
+)
+
+# Assign scenario names
+infections_baseline$scenario <- "Baseline"
+infections_intervention$scenario <- "Vaccination"
+
+# Combine the data from both scenarios
+infections_baseline_intervention <- dplyr::bind_rows(infections_baseline, infections_intervention)
+
+infections_baseline_intervention %>%
+  ggplot(aes(x = time, y = new_infections, colour = scenario)) +
+  geom_line() +
+  geom_vline(
+    xintercept = c(vaccinate$time_begin, vaccinate$time_end),
+    linetype = "dashed",
+    linewidth = 0.2
+  ) +
+  scale_y_continuous(labels = scales::comma)
+```
+
+:::::::::::::::::::::
 
 
 ## Summary

From 26e7de4001f60cb034c037b14e792c896b35bcd3 Mon Sep 17 00:00:00 2001
From: Andree Valle Campos <avallecam@gmail.com>
Date: Thu, 24 Jul 2025 14:36:24 +0100
Subject: [PATCH 4/7] fix format and lintr

---
 episodes/modelling-interventions.Rmd | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/episodes/modelling-interventions.Rmd b/episodes/modelling-interventions.Rmd
index f6389618..ca95ed1a 100644
--- a/episodes/modelling-interventions.Rmd
+++ b/episodes/modelling-interventions.Rmd
@@ -465,7 +465,10 @@ infections_baseline$scenario <- "Baseline"
 infections_intervention$scenario <- "Vaccination"
 
 # Combine the data from both scenarios
-infections_baseline_intervention <- dplyr::bind_rows(infections_baseline, infections_intervention)
+infections_baseline_intervention <- dplyr::bind_rows(
+  infections_baseline,
+  infections_intervention
+)
 
 infections_baseline_intervention %>%
   ggplot(aes(x = time, y = new_infections, colour = scenario)) +
@@ -475,7 +478,8 @@ infections_baseline_intervention %>%
     linetype = "dashed",
     linewidth = 0.2
   ) +
-  scale_y_continuous(labels = scales::comma)
+  scale_y_continuous(labels = scales::comma) +
+  theme_bw()
 ```
 
 :::::::::::::::::::::

From d5be597be22c94050c5c0a4db0d1d0a370852062 Mon Sep 17 00:00:00 2001
From: Andree Valle Campos <avallecam@gmail.com>
Date: Thu, 24 Jul 2025 14:38:43 +0100
Subject: [PATCH 5/7] add rationale of using by_group

---
 episodes/modelling-interventions.Rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/episodes/modelling-interventions.Rmd b/episodes/modelling-interventions.Rmd
index ca95ed1a..fb916e5c 100644
--- a/episodes/modelling-interventions.Rmd
+++ b/episodes/modelling-interventions.Rmd
@@ -445,7 +445,7 @@ At `epidemics::new_infections()` specify `compartments_from_susceptible = "vacci
 ::::::::::::::::::::: spoiler
 
 Notice that if we use `by_group = FALSE` in `epidemics::new_infections()`, we get a summary of the new infections in all demographic groups.
-Try keeping the default `by_group = TRUE` and adding `linetype = demography_group` when declaring variables in `ggplot(aes(...))`.
+To get an age-stratified plot, keep the default `by_group = TRUE` and then add `linetype = demography_group` when declaring variables in `ggplot(aes(...))`.
 
 ```{r}
 infections_baseline <- epidemics::new_infections(

From 6eac973d22708a2fcc9a21060e25004738debcff Mon Sep 17 00:00:00 2001
From: Andree Valle Campos <avallecam@gmail.com>
Date: Thu, 24 Jul 2025 14:44:20 +0100
Subject: [PATCH 6/7] test lintr

---
 episodes/modelling-interventions.Rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/episodes/modelling-interventions.Rmd b/episodes/modelling-interventions.Rmd
index fb916e5c..67ef8488 100644
--- a/episodes/modelling-interventions.Rmd
+++ b/episodes/modelling-interventions.Rmd
@@ -464,7 +464,7 @@ infections_intervention <- epidemics::new_infections(
 infections_baseline$scenario <- "Baseline"
 infections_intervention$scenario <- "Vaccination"
 
-# Combine the data from both scenarios
+# Combine data from both scenarios
 infections_baseline_intervention <- dplyr::bind_rows(
   infections_baseline,
   infections_intervention

From 50bb75c022e76ef1221850ec04531869fe2ff5b3 Mon Sep 17 00:00:00 2001
From: Andree Valle Campos <avallecam@gmail.com>
Date: Thu, 24 Jul 2025 14:47:09 +0100
Subject: [PATCH 7/7] fix lintr due to variable name

---
 episodes/modelling-interventions.Rmd | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/episodes/modelling-interventions.Rmd b/episodes/modelling-interventions.Rmd
index 67ef8488..bebaf734 100644
--- a/episodes/modelling-interventions.Rmd
+++ b/episodes/modelling-interventions.Rmd
@@ -464,13 +464,13 @@ infections_intervention <- epidemics::new_infections(
 infections_baseline$scenario <- "Baseline"
 infections_intervention$scenario <- "Vaccination"
 
-# Combine data from both scenarios
-infections_baseline_intervention <- dplyr::bind_rows(
+# Combine the data from both scenarios
+infections_baseline_interv <- dplyr::bind_rows(
   infections_baseline,
   infections_intervention
 )
 
-infections_baseline_intervention %>%
+infections_baseline_interv %>%
   ggplot(aes(x = time, y = new_infections, colour = scenario)) +
   geom_line() +
   geom_vline(