Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions R/analysis-model.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ library(purrr)
library(mgcv)
library(gammit)
library(gratia)
library(ggplot2)
source(here("R", "process-data.R"))

model_wis <- function(scoring_scale = "log", output_dir = "output") {
Expand Down
91 changes: 53 additions & 38 deletions R/utils-variants.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ classify_variant_phases <- function() {
# set a complete grid of dates and locations in ecdc year-weeks
date_grid <- tibble(
target_end_date = seq.Date(from = as.Date("2020-01-04"),
to = Sys.Date(), by = 7),
to = as.Date("2023-03-17"), by = 7),
year = isoyear(target_end_date),
week = isoweek(target_end_date)) |>
group_by(year, week) |>
Expand Down Expand Up @@ -102,53 +102,65 @@ classify_variant_phases <- function() {

# Identify dominant variant in each week and location
set_variant_phases <- function(variant_data, date_location) {
# Aggregate variant percentages: first sum sub-variants within each source
# (e.g., BA.4 + BA.5 → Omicron-BA.4/5), then average across sources
dominant <- variant_data |>
group_by(location, target_end_date, source) |>
filter(!is.na(variant_percent)) |>
group_by(location, target_end_date, source, variant_name) |>
summarise(variant_percent = sum(variant_percent), .groups = "drop") |>
group_by(location, target_end_date, variant_name) |>
summarise(variant_percent = mean(variant_percent), .groups = "drop") |>
group_by(location, target_end_date) |>
slice_max(order_by = variant_percent, with_ties = FALSE, n = 1) |>
mutate(variant_name = as.character(variant_name)) |>
ungroup()

# # use single sequential phases for each location -------
# dominant_phases <- dominant |>
# mutate(dominant_name = ifelse(variant_name == "Other", NA,
# variant_name)) |>
# filter(!is.na(dominant_name)) |>
# # get only one first date for each dominant variant in each location
# group_by(location) |>
# arrange(target_end_date) |>
# group_by(dominant_name, .add = TRUE) |>
# summarise(target_end_date = first(target_end_date), .groups = "drop") |>
# # expand out to all weeks
# right_join(date_location,
# by = c("location", "target_end_date")) |>
# group_by(location) |>
# arrange(target_end_date) |>
# fill(dominant_name, .direction = "downup") |>
# mutate(VariantPhase = factor(dominant_name)) |>
# select(location, target_end_date, VariantPhase)

# average date ------------------------------------------------------------
# TODO REMOVE - HACK
# get median average first date for each dominant variant across all locations
dominant_start_mid <- dominant |>
# Expected chronological order of variant phases
variant_order <- c("Alpha", "Delta", "Omicron-BA.1", "Omicron-BA.2",
"Omicron-BA.4/5", "Omicron-BQ/XBB")

# Use single sequential phases for each location
# Find first date each named variant was dominant (>50%) in each location
phase_starts <- dominant |>
mutate(dominant_name = ifelse(variant_name == "Other", NA,
variant_name)) |>
filter(!is.na(dominant_name)) |>
filter(!is.na(dominant_name), variant_percent > 50) |>
group_by(location) |>
arrange(target_end_date) |>
group_by(dominant_name, .add = TRUE) |>
summarise(target_end_date = first(target_end_date), .groups = "drop") |>
group_by(dominant_name) |>
summarise(target_end_date = median(target_end_date))
dominant_mid <- date_location |>
left_join(dominant_start_mid, by = "target_end_date") |>
# Enforce chronological ordering: remove out-of-sequence phases
mutate(variant_rank = match(dominant_name, variant_order)) |>
filter(!is.na(variant_rank)) |>
group_by(location) |>
arrange(target_end_date) |>
filter(variant_rank == cummax(variant_rank)) |>
select(-variant_rank)

# Manual override: Hungary had sparse ECDC surveillance data in early 2021,
# causing Delta to backfill to the start of the timeseries. Alpha was dominant
# before Delta spread from late July 2021.
# Source: https://abouthungary.hu/news-in-brief/delta-and-gamma-variant-identified-in-hungary
hu_alpha_start <- min(date_location$target_end_date)
hu_delta_start <- as.Date("2021-07-31") # week following 23 July 2021
phase_starts <- phase_starts |>
# Remove any data-derived Alpha/Delta starts for HU
filter(!(location == "HU" & dominant_name %in% c("Alpha", "Delta"))) |>
bind_rows(tibble(
location = "HU",
dominant_name = c("Alpha", "Delta"),
target_end_date = c(hu_alpha_start, hu_delta_start)
))

# Expand out to all weeks
dominant_phases <- phase_starts |>
right_join(date_location,
by = c("location", "target_end_date")) |>
group_by(location) |>
arrange(target_end_date) |>
fill(dominant_name, .direction = "downup") |>
mutate(VariantPhase = factor(dominant_name)) |>
select(location, target_end_date, VariantPhase)
dominant_phases <- dominant_mid
# TODO end ---------------------------------------------------------

return(dominant_phases)

Expand All @@ -164,7 +176,8 @@ get_variants_ch <- function(date_grid, variant_names) {
select(variant = variant_type,
target_end_date = date,
variant_percent = prct_mean7d) |>
filter(target_end_date %in% date_grid$target_end_date)
filter(target_end_date %in% date_grid$target_end_date) |>
mutate(source = "CH-WGS")

# 2022-2023: weekly data from hospital sampling
# https://covid19.admin.ch/api/data/20231206-0sxi4s4a/sources/COVID19Variants_hosp_w.csv
Expand All @@ -173,13 +186,15 @@ get_variants_ch <- function(date_grid, variant_names) {
select(variant = variant_type,
year_week,
variant_percent = prct) |>
left_join(date_grid, by = c("year_week")) |>
filter(year_week %in% date_grid$year_week &
!target_end_date %in% variants_ch_wgs$target_end_date)
left_join(distinct(date_grid, year_week, .keep_all = TRUE),
by = c("year_week")) |>
filter(year_week %in% date_grid$year_week) |>
mutate(source = "CH-Hosp")

# Keep both sources; overlap is handled by set_variant_phases() which
# sums sub-variants within each source then averages across sources
ch <- bind_rows(variants_ch_wgs, variants_ch_hosp) |>
mutate(location = "CH",
source = "National") |>
mutate(location = "CH") |>
filter(variant != "all_sequenced") |>
mutate(variant_name = as.character(fct_recode(variant, !!!variant_names))) |>
select(location, source, target_end_date, variant_name, variant_percent)
Expand Down
Binary file modified output/plots/check_Cases.pdf
Binary file not shown.
Binary file modified output/plots/check_Deaths.pdf
Binary file not shown.
Binary file modified output/results.rds
Binary file not shown.
2 changes: 1 addition & 1 deletion report/Revision_manuscript.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ We also classified models by geographic target specificity. We assessed the tota

*Confounding factors associated with epidemiological context*

We considered aspects of the forecasting context that we believed could influence associations with forecast performance beyond the effects of interest. Among models, forecast performance typically declines with longer forecast horizons. We also considered the wider epidemiological setting of the forecast target. This included the trend of incidence in the target week for each location, which we categorised as “Stable”, “Decreasing”, or “Increasing”, based on the difference over a three-week moving average of incidence (with a change of \+/-5% as “Stable”; see Supplement). We also considered differing transmission characteristics with each Sars-Cov2 variant. We used publicly available sequence data to classify each target location-week according to the dominant circulating variant, creating a sequence of phases (see Supplement) (\#ref-variant-data). We considered extraneous variation at the country level, to account for, for example, effects of population size or different transmission probabilities with national policy.
We considered aspects of the forecasting context that we believed could influence associations with forecast performance beyond the effects of interest. Among models, forecast performance typically declines with longer forecast horizons. We also considered the wider epidemiological setting of the forecast target. This included the trend of incidence in the target week for each location, which we categorised as “Stable”, “Decreasing”, or “Increasing”, based on the difference over a three-week moving average of incidence (with a change of \+/-5% as “Stable”; see Supplement). We also considered differing transmission characteristics with each SARS-CoV-2 variant. We used publicly available genomic surveillance data from ECDC, UKHSA, and the Swiss Federal Office of Public Health to classify each target location-week according to the dominant circulating variant, assigning sequential variant phases (Alpha, Delta, Omicron BA.1, BA.2, BA.4/5, BQ/XBB) separately for each country based on when each variant first exceeded 50% of sequenced samples (see Supplement for details). We considered extraneous variation at the country level, to account for, for example, effects of population size or different transmission probabilities with national policy.

**Analysis**

Expand Down
4 changes: 2 additions & 2 deletions report/results.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,8 @@ table_effects <- results$effects |>
paste0("(", round(upper_97.5, 2), ")"),
as.character(round(upper_97.5, 2))
)) |>
mutate(value_ci = paste0(round(value, 2),
" (", round(lower_2.5, 2), "-",
mutate(value_ci = paste0(round(value, 2),
" (", round(lower_2.5, 2), "-",
upper_97.5_text, ")"),
group = paste(epi_target, model, group, sep = "_")) |>
column_to_rownames("group")
Expand Down
4 changes: 3 additions & 1 deletion report/supplement/Supplement.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ scores |>

```{r variant-phases, fig.cap="Variant phases identified by dominant variant in each location and week"}
# see: R/utils-variants.R
variant_phases <- classify_variant_phases()
variant_phases <- classify_variant_phases()
variant_phases |>
ggplot(aes(x = target_end_date, y = location, fill = VariantPhase)) +
geom_tile() +
Expand All @@ -131,6 +131,8 @@ variant_phases |>
theme(legend.position = "bottom")
```

Genomic surveillance data were obtained from three sources: ECDC (covering 30 European countries), UKHSA (Great Britain), and the Swiss Federal Office of Public Health (Switzerland). Variant lineages were mapped to six named phases in expected chronological order: Alpha, Delta, Omicron-BA.1, Omicron-BA.2, Omicron-BA.4/5, and Omicron-BQ/XBB. For each country, we identified the first week in which each named variant exceeded 50% of sequenced samples. We enforced chronological ordering by removing any out-of-sequence phases, then expanded phase assignments to all weeks by filling forward and backward from observed transition dates. This per-location approach accounts for the fact that variant dominance dates differed substantially across European countries. Where genomic surveillance data were too sparse to identify a transition (Hungary), we supplemented with epidemiological reports to set the Alpha-to-Delta transition date.

\newpage

## Model fitting
Expand Down
Binary file modified report/supplement/Supplement.pdf
Binary file not shown.
Loading