Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions R/analysis-descriptive.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ table_confint <- function(scores, group_var = NULL) {
) |>
mutate(
Models = paste0(n_models, " (", p_models, "%)"),
Forecasts = paste0(n_forecasts, " (", p_forecasts, "%)"),
"Mean WIS (SD)" = paste0(round(mean, 2), " (", round(sd, 2), ")")
)

Expand Down Expand Up @@ -99,7 +98,6 @@ print_table1 <- function(scores) {
select(
Variable,
starts_with("Models_"),
starts_with("Forecasts_"),
starts_with("Mean WIS (SD)_")
)
## reorder
Expand All @@ -122,7 +120,8 @@ print_table1 <- function(scores) {
"the European COVID-19 Forecast Hub, March 2021-2023. ",
"Forecast performance was measured using the weighted ",
"interval score (WIS), with a lower score indicating a more ",
"accurate forecast."
"accurate forecast. ",
"Numbers in brackets denote standard deviations."
),
col.names = str_remove(colnames(table1), "_.*$"),
align = c("l", rep("r", ncol(table1) - 1))
Expand Down Expand Up @@ -164,8 +163,8 @@ plot_over_time <- function(scores, ensemble, add_plot, show_uncertainty = TRUE)
scale_x_date(date_labels = "%b %Y") +
scale_fill_manual(
values = c(
"Single-country" = "#e7298a",
"Multi-country" = "#e6ab02"
"Single-country" = "#1b9e77",
"Multi-country" = "#d95f02"
),
aesthetics = c("col", "fill")
) +
Expand Down Expand Up @@ -201,7 +200,7 @@ plot_over_time <- function(scores, ensemble, add_plot, show_uncertainty = TRUE)
scale_x_date(date_labels = "%b %Y") +
scale_fill_brewer(
aesthetics = c("col", "fill"),
type = "qual", palette = 2
type = "qual", palette = "Set2"
) +
labs(
x = NULL, y = "Mean WIS (log scale)",
Expand Down
4 changes: 4 additions & 0 deletions R/plot-model-results.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ plot_models <- function(random_effects, scores, x_labels = TRUE,
position = position_dodge(width=1)) +
geom_hline(yintercept = 0, lty = 2) +
labs(y = "Partial effect", x = "", colour = NULL, shape = NULL) +
scale_shape_manual(
values = c("Single-country" = 16, "Multi-country" = 17),
drop = FALSE
) +
scale_alpha_manual(values = c("Adjusted" = 1, "Unadjusted" = 0.3)) +
scale_colour_brewer(type = "qual", palette = 2) +
facet_wrap(~epi_target, scales = "free_y", drop = TRUE) +
Expand Down
31 changes: 25 additions & 6 deletions report/results.qmd
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
title: "Results"
output: html_document
format: html
---
<!--- RMarkdown set up --->
```{r set-up, include=FALSE}
Expand Down Expand Up @@ -117,20 +117,23 @@ Only `r nrow(multi_targets |> filter(consistent))` models consistently forecast
We explored the interval score (WIS) as a measure of predictive performance (Figure \@ref(fig:scores-over-time)), and characterised its association with model structure and number of countries targeted.
We used descriptive statistics and an unadjusted univariate model for each explanatory variable.

```{r scores-over-time, fig.height=8,fig.width=6,fig.cap=scores_over_time_cap}
scores_over_time_cap <- "Predictive accuracy of multiple models' forecasts for COVID-19 cases and deaths across 32 European countries over time. Forecast performance is shown as the mean weighted interval score (WIS), where a lower score indicates better performance. Forecast performance is summarised across 32 target locations and 1 through 4 week forecast horizons, with varying numbers of forecasters participating over time. Shown for (A) the method structure used by each model; (B) the number of countries each model targeted (one or multiple); with (C) the total count of observed incidence across all 32 countries, shown on the log scale."
```{r scores-over-time, fig.height=6,fig.width=6,fig.cap=scores_over_time_cap}
scores_over_time_cap <- "Predictive accuracy of multiple models' forecasts for COVID-19 cases and deaths across 32 European countries over time. Forecast performance is shown as the unadjusted mean weighted interval score (WIS), where a lower score indicates better performance. Forecast performance is summarised across 32 target locations and 1 through 4 week forecast horizons, with varying numbers of forecasters participating over time. Shown for (A) the method structure used by each model; (B) the number of countries each model targeted (one or multiple). Unadjusted means are shown; see Figure 2 for adjusted estimates accounting for confounding factors."
scores_over_time <- plot_over_time(
scores = scores,
ensemble = ensemble,
add_plot = data_plot(scores, log = TRUE),
show_uncertainty = FALSE
)
scores_over_time
```

```{r}
results <- readRDS(here("output", "results.rds"))
table_effects <- results$effects |>
# Handle column name difference between saved results and current code
if ("outcome_target" %in% names(results$effects)) {
results$effects <- rename(results$effects, epi_target = outcome_target)
}
table_effects <- results$effects |>
mutate(upper_97.5_text = if_else(
upper_97.5 < 0,
paste0("(", round(upper_97.5, 2), ")"),
Expand Down Expand Up @@ -198,7 +201,7 @@ plot_effects(results$effects, variables = c("Method", "CountryTargets"))


```{r plot-models, fig.height=8, fig.width=10, fig.cap=model_cap}
model_cap <- "Partial effect size (95% CI) by model. This can be interpreted as adjusted performance after accounting for all other variables in the model, with remaining differences in effects as seen here representing unexplained variation between models beyond these factors."
model_cap <- "Partial effect size (95% CI) by individual model, shown by method classification (colour) and number of countries targeted (shape: circle = single-country, triangle = multi-country). Solid lines show adjusted estimates; faded lines show unadjusted estimates. This can be interpreted as adjusted performance after accounting for all other variables in the model, with remaining differences in effects as seen here representing unexplained variation between models beyond these factors."
plot_models(results$effects, scores)
```

Expand All @@ -209,3 +212,19 @@ In developing this work, we explored several alternative model specifications. W
We considered different ways of categorising explanatory variables,

and sensitivity analyses (see Supplementary Results).

```{r export-figures, eval=FALSE, echo=FALSE}
# Export figures in TIF format for journal submission (PLOS Comp Bio)
# Uncomment and run manually to generate submission-ready figures
# ggsave(here("output", "plots", "Figure1_scores_over_time.tif"),
# scores_over_time, width = 6, height = 6, dpi = 300,
# device = "tiff", compression = "lzw")
# ggsave(here("output", "plots", "Figure2_partial_effects.tif"),
# plot_effects(results$effects, variables = c("Method", "CountryTargets")),
# width = 5, height = 3, dpi = 300,
# device = "tiff", compression = "lzw")
# ggsave(here("output", "plots", "Figure3_model_effects.tif"),
# plot_models(results$effects, scores),
# width = 10, height = 8, dpi = 300,
# device = "tiff", compression = "lzw")
```
Loading