|
| 1 | +#' Taking in the CDC NVSS Yearly life tables, extract and build the background mortality table expected by RESPOND |
| 2 | +#' |
| 3 | +#' Input: CDC NVSS Life Tables, output file name, stratifications |
| 4 | +#' Output: Resulting background mortality compatible with RESPOND |
| 5 | +#' |
| 6 | +#' @param files A set of files to extract the background mortality out of |
| 7 | +#' @param outputfile The name of the file to output the background mortality |
| 8 | +#' @param races A list of races |
| 9 | +#' @param sexes A list of sexes |
| 10 | +#' @param age_groups A list of age groups |
| 11 | +#' @param bin_size The size ages are grouped by |
| 12 | +#' |
| 13 | +#' @import data.table |
| 14 | +#' @importFrom utils write.csv |
| 15 | +#' @export |
| 16 | + |
| 17 | +build_background_mortality_file <- function( |
| 18 | + files, |
| 19 | + outputfile, |
| 20 | + races = c("black", "hispanic", "white"), |
| 21 | + sexes = c("female", "male"), |
| 22 | + age_groups = c("1_20", "21_40", "41_60", "61_80", "81_100"), |
| 23 | + bin_size = 20 |
| 24 | +) { |
| 25 | + background_mortality <- lapply( |
| 26 | + files, |
| 27 | + extract_background_mortality, |
| 28 | + bin_size = bin_size, |
| 29 | + age_groups = age_groups |
| 30 | + ) |
| 31 | + result_table <- create_and_fill_table( |
| 32 | + background_mortality, |
| 33 | + races, |
| 34 | + sexes, |
| 35 | + age_groups |
| 36 | + ) |
| 37 | + if (!missing(outputfile)) { |
| 38 | + write.csv(result_table, outputfile, row.names = FALSE) |
| 39 | + } |
| 40 | + |
| 41 | + return(result_table) |
| 42 | +} |
| 43 | + |
| 44 | +#' Function used to extract background mortality values based on age from a single yearly CDC NVSS life table |
| 45 | +#' |
| 46 | +#' @param file_path The path to the CDC NVSS Life Table |
| 47 | +#' @param bin_size The size ages are grouped by |
| 48 | +#' @param age_groups A list of age groups |
| 49 | +#' |
| 50 | +#' @import data.table |
| 51 | +#' @importFrom readxl read_excel |
| 52 | +#' @keywords internal |
| 53 | + |
| 54 | +extract_background_mortality <- function( |
| 55 | + file_path, |
| 56 | + bin_size = 20, |
| 57 | + age_groups = c("1_20", "21_40", "41_60", "61_80", "81_100") |
| 58 | +) { |
| 59 | + data <- readxl::read_excel(file_path, skip = 1) |
| 60 | + dt <- as.data.table(data)[(2:101)] |
| 61 | + |
| 62 | + # Rename columns to standard names |
| 63 | + setnames( |
| 64 | + dt, |
| 65 | + "Probability of dying between ages x and x + 1", |
| 66 | + "year_prob", |
| 67 | + skip_absent = TRUE |
| 68 | + ) |
| 69 | + setnames( |
| 70 | + dt, |
| 71 | + "Number dying between ages x and x + 1", |
| 72 | + "year_deaths", |
| 73 | + skip_absent = TRUE |
| 74 | + ) |
| 75 | + |
| 76 | + #Data table bindings |
| 77 | + year_prob <- year_deaths <- V1 <- NULL |
| 78 | + |
| 79 | + dt[, year_prob := as.numeric(year_prob)][, |
| 80 | + year_deaths := as.numeric(year_deaths) |
| 81 | + ] |
| 82 | + |
| 83 | + bin_groups <- (seq_len(nrow(dt)) - 1) %/% bin_size |
| 84 | + deaths_by_group <- dt[, sum(year_deaths), by = bin_groups][, V1] |
| 85 | + |
| 86 | + # 100k originates from the CDC NVSS data - reported in rates per 100,000 persons |
| 87 | + weekly_rates <- (deaths_by_group / 100000) / 52 |
| 88 | + weekly_probs <- 1 - exp(-weekly_rates) |
| 89 | + |
| 90 | + result <- data.table( |
| 91 | + agegrp = age_groups, |
| 92 | + weekly_probability = weekly_probs |
| 93 | + ) |
| 94 | + |
| 95 | + return(result) |
| 96 | +} |
| 97 | + |
| 98 | +#' Create and fill the table with mortality values for all demographic combinations |
| 99 | +#' |
| 100 | +#' @param background_mortality List of extracted background mortality data.tables |
| 101 | +#' @param races A list of races |
| 102 | +#' @param sexes A list of sexes |
| 103 | +#' @param age_groups A list of age groups |
| 104 | +#' |
| 105 | +#' @import data.table |
| 106 | +#' @keywords internal |
| 107 | + |
| 108 | +create_and_fill_table <- function( |
| 109 | + background_mortality, |
| 110 | + races = c("black", "hispanic", "white"), |
| 111 | + sexes = c("female", "male"), |
| 112 | + age_groups = c("1_20", "21_40", "41_60", "61_80", "81_100") |
| 113 | +) { |
| 114 | + #Data table bindings |
| 115 | + agegrp <- NULL |
| 116 | + |
| 117 | + combinations <- expand.grid( |
| 118 | + races = races, |
| 119 | + sexes = sexes, |
| 120 | + stringsAsFactors = FALSE |
| 121 | + ) |
| 122 | + combinations <- as.data.table(combinations) |
| 123 | + result_table <- combinations[rep( |
| 124 | + seq_len(nrow(combinations)), |
| 125 | + each = length(age_groups) |
| 126 | + )] |
| 127 | + result_table[, agegrp := rep(age_groups, times = nrow(combinations))] |
| 128 | + n_race_sex_combos <- length(races) * length(sexes) |
| 129 | + |
| 130 | + mortality_data <- data.table() |
| 131 | + for (i in seq_along(background_mortality)) { |
| 132 | + group_index <- ((i - 1) %% n_race_sex_combos) + 1 |
| 133 | + bg_mort <- background_mortality[[i]] |
| 134 | + demo_info <- combinations[group_index] |
| 135 | + mortality_group <- cbind(demo_info[rep(1, nrow(bg_mort))], bg_mort) |
| 136 | + mortality_data <- rbind(mortality_data, mortality_group) |
| 137 | + } |
| 138 | + |
| 139 | + setorder(mortality_data, races, sexes, agegrp) |
| 140 | + |
| 141 | + return(mortality_data) |
| 142 | +} |
0 commit comments