Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 14 additions & 41 deletions model-2pp/model-2pp.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@ sample_sizes <- ozpolls_2016 %>%
sample_sizes <- sample_sizes %>%
mutate(ss = ifelse(is.na(ss), min(sample_sizes$ss, na.rm = TRUE), ss))

all_firms <- ozpolls %>%
all_firms_index <- ozpolls %>%
filter(!firm %in% c("Election result") &
firm %in% firms_today) %>%
pull(firm) %>%
unique() %>%
as.character()
select(firm) %>%
distinct() %>%
mutate(firm_idx = as.numeric(factor(firm)))

# 2 party preferred vote - both elections and polls:
data_2pp <- ozpolls %>%
Expand Down Expand Up @@ -64,10 +64,10 @@ elections_2pp <- data_2pp %>%
election_days <- c(pull(elections_2pp, n_days), next_election - first_election)
election_results <- pull(elections_2pp, intended_vote)

# Data for one pollster as a time, elements in a list for ease of use:
one_pollster <- lapply(all_firms, function(x){
filter(data_2pp, firm == x)
})
# Polling data
polls_2pp <- data_2pp %>%
left_join(all_firms_index, by = "firm") %>%
filter(!is.na(firm_idx))

# Put all the data into a single list we can pass to Stan:
model_data <- list(
Expand All @@ -78,40 +78,13 @@ model_data <- list(
# This inflator doubles the variance of polls; ie allows for non-sampling error
inflator =sqrt(2),

y1_values = one_pollster[[1]]$intended_vote,
y1_days = one_pollster[[1]]$n_days,
y1_n = nrow(one_pollster[[1]]),
y1_se = one_pollster[[1]]$se[1],

y2_values = one_pollster[[2]]$intended_vote,
y2_days = one_pollster[[2]]$n_days,
y2_n = nrow(one_pollster[[2]]),
y2_se = one_pollster[[2]]$se[1],

y3_values = one_pollster[[3]]$intended_vote,
y3_days = one_pollster[[3]]$n_days,
y3_n = nrow(one_pollster[[3]]),
y3_se = one_pollster[[3]]$se[1],

y4_values = one_pollster[[4]]$intended_vote,
y4_days = one_pollster[[4]]$n_days,
y4_n = nrow(one_pollster[[4]]),
y4_se = one_pollster[[4]]$se[1],

y5_values = one_pollster[[5]]$intended_vote,
y5_days = one_pollster[[5]]$n_days,
y5_n = nrow(one_pollster[[5]]),
y5_se = one_pollster[[5]]$se[1],

y6_values = one_pollster[[6]]$intended_vote,
y6_days = one_pollster[[6]]$n_days,
y6_n = nrow(one_pollster[[6]]),
y6_se = one_pollster[[6]]$se[1],
n_polling_firms = nrow(all_firms_index),

y7_values = one_pollster[[7]]$intended_vote,
y7_days = one_pollster[[7]]$n_days,
y7_n = nrow(one_pollster[[7]]),
y7_se = one_pollster[[7]]$se[1]
polls_n = nrow(polls_2pp),
polls_firm_idx = polls_2pp$firm_idx,
polls_intended_vote = polls_2pp$intended_vote,
polls_se = polls_2pp$se,
polls_day = polls_2pp$n_days

)

Expand Down
55 changes: 11 additions & 44 deletions model-2pp/model-2pp.stan
Original file line number Diff line number Diff line change
Expand Up @@ -7,48 +7,19 @@ data {
real election_results[number_elections - 1]; // historical election results
real inflator; // amount by which to multiply the standard error of polls

// note - pollsters are individually hard coded in to avoid having to use some kind of ragged array:
int n_polling_firms; // number of polling firms

int y1_n; // number of polls conducted by pollster 1
real y1_values[y1_n]; // actual values in polls for pollster 1
int y1_days[y1_n]; // the number of days since first election each poll was taken
real y1_se; // the standard error for each party from pollster 1 (note sometimes made up)

int y2_n;
real y2_values[y2_n];
int y2_days[y2_n];
real y2_se;

int y3_n;
real y3_values[y3_n];
int y3_days[y3_n];
real y3_se;

int y4_n;
real y4_values[y4_n];
int y4_days[y4_n];
real y4_se;

int y5_n;
real y5_values[y5_n];
int y5_days[y5_n];
real y5_se;

int y6_n;
real y6_values[y6_n];
int y6_days[y6_n];
real y6_se;

int y7_n;
real y7_values[y7_n];
int y7_days[y7_n];
real y7_se;
int polls_n; // number of polls
int polls_firm_idx[polls_n]; // index of firm
real polls_intended_vote[polls_n]; // actual values in polls
real polls_se[polls_n]; // the standard error for each party (note sometimes made up)
int polls_day[polls_n]; // the number of days since first election each poll was taken

}

parameters {
vector[election_days[number_elections]] mu; //
real d[7]; // polling effects
real d[n_polling_firms]; // polling effects
real<lower=0> sigma; // sd of innovations
}

Expand All @@ -71,13 +42,9 @@ model {


// 2. Polls
y1_values ~ normal(mu[y1_days] + d[1], y1_se * inflator);
y2_values ~ normal(mu[y2_days] + d[2], y2_se * inflator);
y3_values ~ normal(mu[y3_days] + d[3], y3_se * inflator);
y4_values ~ normal(mu[y4_days] + d[4], y4_se * inflator);
y5_values ~ normal(mu[y5_days] + d[5], y5_se * inflator);
y6_values ~ normal(mu[y6_days] + d[6], y6_se * inflator);
y7_values ~ normal(mu[y7_days] + d[7], y7_se * inflator);

for(ii in 1:polls_n){
polls_intended_vote[ii] ~ normal(mu[polls_day[ii]] + d[polls_firm_idx[ii]], polls_se[ii] * inflator);
}

}

7 changes: 5 additions & 2 deletions model-2pp/model-interpretation.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@ load("output/model_2pp_2019051801014.rdata")
# Polling firms

d <- as.data.frame(extract(model_2pp, "d")$d)
names(d) <- all_firms

pd <- d %>%
gather(firm, overestimate) %>%
gather(firm_idx, overestimate) %>%
mutate(firm_idx = as.numeric(str_replace_all(firm_idx, "V", ""))) %>%
left_join(all_firms_index,
by = "firm_idx") %>%
mutate(firm = fct_reorder(firm, overestimate)) %>%
ggplot(aes(x = overestimate, colour = firm, fill = firm)) +
# facet_wrap(~firm) +
Expand Down