diff --git a/DESCRIPTION b/DESCRIPTION index 9cd5179..4477460 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: jenner Title: Internal Montagu Helpers -Version: 0.0.15 +Version: 0.0.16 Description: Helpers for Montagu. License: MIT + file LICENSE Author: Rich FitzJohn diff --git a/NEWS.md b/NEWS.md index 4f04375..2523c55 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,5 @@ +### jenner 0.0.16 + * VIMC-1738 modup method 1, method 2 verison 2 (gavi impact filtered from total impact by per-year-gavi-level) ### jenner 0.0.15 * VIMC-14628 impact_method2 sql modification post Wes' touchstone fast forward diff --git a/R/modified_update.R b/R/modified_update.R index 86a2579..b2b88b2 100644 --- a/R/modified_update.R +++ b/R/modified_update.R @@ -9,26 +9,38 @@ ##' @param touchstone_use Name of the touchstone that we are basing ##' this off of ##' +##' @param method two options: 1 = method1, 2 = method2 +##' method2_v2 is different from method2 in that, it takes per-year-gavi-level as an imput for gavi impact +##' ##' @export -modified_update_calculate <- function(con, touchstone_name_mod, touchstone_use) { +modified_update_calculate <- function(con, touchstone_name_mod, touchstone_use, method = 2) { + if (!(method %in% 1:2)) { + stop("currently, only method 1 and 2 have been developed.") + } year_min <- 2001 year_max <- 2030 - + sql <- paste("SELECT *", " FROM touchstone", " WHERE touchstone_name = $1", sep = "\n") touchstone_mod <- DBI::dbGetQuery(con, sql, touchstone_name_mod) - i <- touchstone_mod$id != '201510gavi-42' - touchstone_mod <- touchstone_mod[which.max(touchstone_mod$version[i]), ] + ### touchstone 201510gavi-42 is imported from Rshiny, it contains Rubella only + ### It is not a target touchstone we are interested in. + i <- touchstone_mod$id != '201510gavi-42' + touchstone_mod <- touchstone_mod[which.max(touchstone_mod$version[i]), ] modup_by_itself <- any(touchstone_name_mod == touchstone_use) meta <- mu_prepare(con, touchstone_mod$id, modup_by_itself = modup_by_itself) + ## 201403gavi - SDF9 - does not have MCV1 coverage, we need to get rid of it to avoid triggering error message + if (touchstone_name_mod == "201403gavi") { + meta <- meta[meta$vaccine != "MCV1", ] + } meta <- mu_impact_metadata(meta, touchstone_use) - + meta$touchstone_mod <- as.list(touchstone_mod[c("id", "touchstone_name", "version")]) meta$touchstone_use <- touchstone_use - + meta$group$touchstone_src <- meta$group$touchstone touchstone_src_native <- sub("-[0-9]{6}.*", "", meta$group$touchstone_name) meta$group$touchstone_name_dest <- @@ -37,22 +49,24 @@ modified_update_calculate <- function(con, touchstone_name_mod, touchstone_use) "[%s] %s", ifelse(meta$group$activity_type == "routine", "Rout", "SIA"), meta$group$vaccine) - + ## This is quite slow to run but we can just pull it once: fmt <- "SELECT * FROM v_target_pop_%s WHERE YEAR BETWEEN %d AND %d" pop <- list( total = DBI::dbGetQuery(con, sprintf(fmt, "total", year_min, year_max)), routine = DBI::dbGetQuery(con, sprintf(fmt, "routine", year_min, year_max))) - - + + index <- meta$group$index data <- lapply(index, function(i) mu_build_data(con, i, meta, pop)) - + data <- rbind_simple(data) ## Perform the update itself the update: - data$deaths_averted_new <- mu_scale("deaths_averted", data) - data$cases_averted_new <- mu_scale("cases_averted", data) + data$deaths_averted_new <- mu_scale("deaths_averted", data, method) + data$cases_averted_new <- mu_scale("cases_averted", data, method) + meta$data <- data + meta } @@ -79,27 +93,27 @@ modified_update_summary_output <- function(con, res, path_meta) { x[j] <- tr[[to]][i[j]] x } - + ## TODO: At the moment the metadata handling is very crude; the ## inputs are not properly documented or anything. meta_files <- c("gavi_country_data.csv", "tr_touchstone.csv", "years_output.csv") stopifnot(all(file.exists(file.path(path_meta, meta_files)))) - - + + ## Start on the meta group <- res$group - + ## TODO: we need to filter this to only include the "gavi focal model" ## for each disease. This will do for now: focal <- DBI::dbReadTable(con, "gavi_focal_model") focal$.code <- paste(focal$touchstone, focal$disease, focal$model, sep = "\n") group$is_focal <- paste(group$touchstone, group$disease, group$model, sep = "\n") %in% - focal$.code - + focal$.code + tr_touchstone <- read_csv("meta/tr_touchstone.csv") - + group_out <- data_frame( vaccine = group$vaccine, ## New columns @@ -111,10 +125,10 @@ modified_update_summary_output <- function(con, res, path_meta) { model = group$model, touchstone = group$touchstone_name, vaccine_delivery = group$vaccine_delivery) - + group_out$touchstone_short <- translate_vals(group$touchstone_name, tr_touchstone, "montagu_short") - + ## The metadata that we have on countries differs from what the shiny ## app has (e.g WHO region of EMR vs EMRO for AFG), so we'll use this: ## cmp <- read.csv("../gavi-shiny-app/data_20170117.csv", @@ -125,7 +139,7 @@ modified_update_summary_output <- function(con, res, path_meta) { ## write.csv(country, "meta/shiny/gavi_country_data.csv", row.names = FALSE) country <- read_csv(file.path(path_meta, "gavi_country_data.csv")) country$global <- NULL - + ## Which leaves us with the rest: ## ## TODO: this should be target_pop_estimated_new which needs to come @@ -146,17 +160,17 @@ modified_update_summary_output <- function(con, res, path_meta) { cases_averted = "cases_averted_new", fvps = "fvps_new", population = "target_pop_given_new") - + dat <- res$data[cols] names(dat) <- names(cols) tmp <- country[match(dat$country, country$country), setdiff(names(country), "country")] dat <- cbind(dat, tmp) rownames(dat) <- NULL - + dat_updated <- dat dat_updated[names(cols_update)] <- res$data[cols_update] - + group_out_updated <- group_out ## Makeup the suffix from res$touchstone_mod$touchstone_name name_suffix <- unlist(strsplit(res$touchstone_mod$touchstone_name, "-")) @@ -165,26 +179,26 @@ modified_update_summary_output <- function(con, res, path_meta) { paste0(sub("-[0-9]{6}.*", "", group_out$touchstone),"-", name_suffix ) stopifnot(all(group_out_updated$touchstone %in% - tr_touchstone$montagu)) + tr_touchstone$montagu)) group_out_updated$touchstone_short <- translate_vals(group_out_updated$touchstone, tr_touchstone, from = "montagu", to = "montagu_short") - + i <- match(res$data$index, res$group$index) d1 <- cbind(group_out[i, ], dat) d2 <- cbind(group_out_updated[i, ], dat_updated) d3 <- rbind(d1, d2) rownames(d3) <- NULL - + ## Filter output to a set of years per touchstone: keep <- read_csv("meta/years_output.csv") i <- logical(nrow(d3)) for (j in seq_len(nrow(keep))) { i[d3$touchstone == keep$touchstone[j] & - d3$year >= keep$year_from[j] & - d3$year <= keep$year_to[j]] <- TRUE + d3$year >= keep$year_from[j] & + d3$year <= keep$year_to[j]] <- TRUE } ## Exclude MCV1 i[d3$vaccine == "MCV1"] <- FALSE @@ -198,7 +212,7 @@ mu_prepare <- function(con, touchstone_new, modup_by_itself = FALSE) { ## this duplication later. ## sometimes we do modup for touchstone_old = touchstone_new ## the if-else clause is important to deal with this - if(modup_by_itself) { + if (modup_by_itself) { touchstone_new2 <- NULL } else { touchstone_new2 <- touchstone_new @@ -213,13 +227,13 @@ mu_prepare <- function(con, touchstone_new, modup_by_itself = FALSE) { whisker::whisker.render(txt, list(touchstone_new2 = touchstone_new2, touchstone_new = touchstone_new)) } - + views <- c("migrate_coverage", "burden_fvps", "impact", "coverage", "target_pop_total", "target_pop_routine") for (v in views) { DBI::dbExecute(con, temporary_view(v, read_sql(v))) } - + DBI::dbGetQuery(con, read_sql("query_metadata")) } @@ -229,14 +243,14 @@ mu_build_data <- function(con, index, meta, pop) { year_min <- min(pop$total$year) year_max <- max(pop$total$year) w <- 4L # 1/2 window for rolling average - + ## Process our arguments a bit more: x <- meta$group[meta$group$index == index, ]; print(x) xd <- meta$impacts[meta$impacts$index == index, ] x_deaths <- xd[xd$impact_outcome == "deaths_averted", ] x_cases <- xd[xd$impact_outcome == "cases_averted", ] x_fvps <- xd[xd$impact_outcome == "fvps_added", ] - + sql <- paste("SELECT country", " FROM touchstone_country", " WHERE touchstone = $1", @@ -244,22 +258,22 @@ mu_build_data <- function(con, index, meta, pop) { " ORDER BY country", sep = "\n") countries <- DBI::dbGetQuery(con, sql, list(x$touchstone, x$disease))[[1]] - + if (length(countries) == 0L) { stop("Import error") } - + window <- 2 * w + 1 year_max2 <- year_max + w years <- year_min:year_max2 n_years <- length(years) - + dat <- expand.grid(year = years, country = countries, index = index, stringsAsFactors = FALSE)[3:1] dat$.code <- paste(dat$country, dat$year, sep = "\r") - + merge_in <- function(dat, d, cols) { d$.code <- paste(d$country, d$year, sep = "\r") i <- match(dat$.code, d$.code) @@ -272,16 +286,16 @@ mu_build_data <- function(con, index, meta, pop) { } cbind(dat, keep) } - + ## 1. impact: sql <- paste("SELECT * FROM v_impact", " WHERE YEAR BETWEEN 2001 AND $2", " AND impact_estimate_set = $1") - + d_deaths_averted <- DBI::dbGetQuery(con, sql, list(x_deaths$impact_estimate_set, year_max2)) dat <- merge_in(dat, d_deaths_averted, c(deaths_averted = "value")) - + if (nrow(x_cases) == 0) { dat$cases_averted <- NA_real_ } else { @@ -289,7 +303,7 @@ mu_build_data <- function(con, index, meta, pop) { DBI::dbGetQuery(con, sql, list(x_cases$impact_estimate_set, year_max2)) dat <- merge_in(dat, d_cases_averted, c(cases_averted = "value")) } - + if (nrow(x_fvps) > 0) { d_fvps_added <- DBI::dbGetQuery(con, sql, list(x_fvps$impact_estimate_set, year_max2)) @@ -302,7 +316,7 @@ mu_build_data <- function(con, index, meta, pop) { d_fvps <- DBI::dbGetQuery(con, sql, list(x$burden_estimate_set, year_max2)) dat <- merge_in(dat, d_fvps, c(fvps = "value")) } - + ## 3. old coverage sql <- paste("SELECT * FROM v_coverage", " WHERE YEAR BETWEEN 2001 and $2", @@ -311,23 +325,23 @@ mu_build_data <- function(con, index, meta, pop) { dat <- merge_in(dat, mu_fix_coverage(d_cov_old), c(coverage_old = "coverage", coverage_target = "target")) - + ## 4. new coverage if (is.na(x$coverage_set_new)) { stop("Import error: no new coverage found") } else { - if(meta$touchstone_mod$touchstone_name == meta$group$touchstone_name[meta$group$index == index]) { + if (meta$touchstone_mod$touchstone_name == meta$group$touchstone_name[meta$group$index == index]) { d_cov_new <- d_cov_old } else { d_cov_new <- DBI::dbGetQuery(con, sql, list(x$coverage_set_new, year_max2)) } - dat <- merge_in(dat, mu_fix_coverage(d_cov_new), - c(coverage_new = "coverage", - coverage_target_new = "target")) } - - - + dat <- merge_in(dat, mu_fix_coverage(d_cov_new), + c(coverage_new = "coverage", + coverage_target_new = "target")) + + + ## 5. population estimates pop_total <- pop$total[pop$total$demographic_source == x$demographic_source, ] pop_routine <- pop$routine[ @@ -335,9 +349,9 @@ mu_build_data <- function(con, index, meta, pop) { pop$routine$demographic_source == x$demographic_source, ] dat <- merge_in(dat, pop_total, c(pop_total = "value")) dat <- merge_in(dat, pop_routine, c(pop_routine = "value")) - + ## 6. Impact rates - + ## NOTE: There is some fiddling required here. We might have cases ## where there is nonzero impact but zero fvps. There are two forms ## of this! @@ -354,8 +368,8 @@ mu_build_data <- function(con, index, meta, pop) { ## the coverage_target is not known; those cases I've opted to just ## zero the rate calculation but leave the impact in. ## (we have such cases for routines as well.) - - + + i <- is_blank(dat$fvps) & !(is_blank(dat$deaths_averted) & is_blank(dat$cases_averted)) & !is_blank(dat$coverage_target) @@ -365,23 +379,23 @@ mu_build_data <- function(con, index, meta, pop) { ## Otherwise use pop_routine. dat$fvps[i] <- dat$coverage_old[i] * dat$coverage_target[i] } - + ## There are cases where we have blank fvps_old, but positive coverage_old ## This caused the Inf/-Inf impact rate issue for CHN and TCD ## We create synthetic fvps for such cases when applicable. i <- is_blank(dat$fvps) & !is_blank(dat$coverage_old) if (any(i)) { message("Creating synthetic fvps for ", x$model) - if((x$activity_type == "campaign")){ + if ((x$activity_type == "campaign")){ dat$fvps[i] <- dat$coverage_old[i] * dat$coverage_target[i] - }else{ + } else { dat$fvps[i] <- dat$coverage_old[i] * dat$pop_routine[i] } } - + dat <- mu_calculate_rate("deaths", dat, window, n_years) dat <- mu_calculate_rate("cases", dat, window, n_years) - + ## And another quantity needed ## ## NOTE: Here, we can run into trouble where the coverage is zero @@ -390,10 +404,10 @@ mu_build_data <- function(con, index, meta, pop) { ## rolling calculation) dat$target_pop_estimated <- dat$fvps / dat$coverage_old dat$target_pop_estimated[dat$fvps > 0 & is_blank(dat$coverage_old)] <- NA - + dat$target_pop_estimated_avg <- roll_mean_by(dat$target_pop_estimated, window, n_years) - + if (x$activity_type == "routine") { dat$target_pop_given <- dat$pop_routine dat$target_pop_given_new <- dat$pop_routine @@ -401,12 +415,12 @@ mu_build_data <- function(con, index, meta, pop) { dat$target_pop_given <- dat$coverage_target dat$target_pop_given_new <- dat$coverage_target_new } - + ## We also want new fvps which we compute as coverage * target pop v <- if (x$activity_type == "campaign") "coverage_target_new" else "pop_routine" dat$fvps_new <- dat$coverage_new * dat[[v]] - + ## At the moment we can do nothing for MHL, TUV and XK, because we have no routine pop for them i <- !is_blank(dat$coverage_new) & is_blank(dat$fvps_new) & dat$year < 2031#& !is_blank(dat$target_pop_estimated) if (any(i)) { @@ -417,23 +431,23 @@ mu_build_data <- function(con, index, meta, pop) { stop("modified update error") } #For analysis purpose, a touchstone can be 'updated' by itself - assumes equal fvps - if(meta$touchstone_mod$touchstone_name == meta$group$touchstone_name[meta$group$index == index]) + if (meta$touchstone_mod$touchstone_name == meta$group$touchstone_name[meta$group$index == index]) dat$fvps_new <- dat$fvps ## drop excess temporary things dat$.code <- NULL dat <- dat[dat$year <= year_max, ] - + dat } mu_impact_metadata <- function(meta, touchstone_use) { meta <- meta[meta$touchstone_name %in% touchstone_use, ] rownames(meta) <- NULL - + vary <- c("impact_outcome", "impact_estimate_set", "impact_estimate_recipe", "focal_ingredient") same <- setdiff(names(meta), vary) - + f <- function(y) { n <- vapply(y, function(x) length(unique(x)), integer(1)) stopifnot(all(n[same] == 1)) @@ -443,16 +457,16 @@ mu_impact_metadata <- function(meta, touchstone_use) { meta$vaccine, meta$activity_type, meta$support_type, meta$responsibility, sep = "\r") meta_code <- factor(meta_code, unique(meta_code)) - + i1 <- unname(tapply(seq_along(meta_code), meta_code, min)) group <- meta[i1, same] group$index <- seq_len(nrow(group)) rownames(group) <- NULL - + impacts <- meta[vary] impacts$index <- group$index[tapply(meta_code, meta_code)] impacts <- impacts[order(impacts$index, impacts$impact_outcome), ] - + list(group = group, impacts = impacts) } @@ -463,13 +477,26 @@ mu_impact_metadata <- function(meta, touchstone_use) { ##' ##' @param d Data: use impact_rate_tot (method 2) ##' +##' @param method two options: method1 and method2 ##' @export -mu_scale <- function(name, d) { - ## This chunck becomes simpler, since only method 2 is used throughout. - rate_tot_old <- d[[paste0(name, "_rate_tot")]] - fvps_new <- d$fvps_new - - ret <- fvps_new * rate_tot_old +mu_scale <- function(name, d, method = 2) { + ## AS we need method 1 now. mu_scale includes both method 1 and method 2 now + ## This is method 1. + if (method == 1) { + # by default use impact_new = impact_old / coverage_old * coverage_new + ret <- d[[name]] / d$coverage_old * d$coverage_new + + # when coverage_old is NA or 0, use impact_rate_tot*fvps_new + i <- is.na(d[[name]]) | d[[name]] == 0 + rate_tot_old <- d[[paste0(name, "_rate_tot")]] + fvps_new <- d$fvps_new + ret[i] <- fvps_new[i] * rate_tot_old[i] + } else { + ## This is method 2 + rate_tot_old <- d[[paste0(name, "_rate_tot")]] + fvps_new <- d$fvps_new + ret <- fvps_new * rate_tot_old + } ret[!is.finite(ret)] <- NA j <- !is.na(d$coverage_new) & d$coverage_new == 0 ret[j] <- 0 @@ -494,19 +521,19 @@ mu_fix_coverage <- function(d) { } v } - + code <- make_code(args = d[c("coverage_set", "country", "year")]) n <- table(code) fix <- code %in% as.integer(names(n[n > 1])) - + d_fix <- d[fix, ] i <- code[fix] - + tmp <- d_fix[tapply(seq_along(i), i, utils::head, 1), ] tmp$target <- unname(tapply(d_fix$target, i, sum, na.rm = TRUE)) reached <- tapply(d_fix$coverage * d_fix$target, i, sum, na.rm = TRUE) tmp$coverage <- unname(reached / tmp$target) - + ret <- rbind(d[!fix, ], tmp) ret[order(ret$coverage_set, ret$country, ret$year), ] } @@ -518,19 +545,19 @@ mu_calculate_rate <- function(name, dat, window, n_years) { v_tot <- sprintf("%s_averted_rate_tot", name) v_use <- sprintf("%s_averted_rate", name) v_type <- sprintf("%s_averted_rate_type", name) - + dat[[v_inst]] <- dat[[v_averted]] / dat$fvps dat[[v_inst]][ is_blank(dat$fvps) & !is_blank(dat[[v_averted]])] <- NA - + dat[[v_avg]] <- roll_sum_by(dat[[v_averted]], window, n_years) / roll_sum_by(dat$fvps, window, n_years) - + dat[[v_tot]] <- roll_sum_by(dat[[v_averted]], Inf, n_years) / roll_sum_by(dat$fvps, Inf, n_years) - + ## Some cases don't have any non-NA rates type <- rep(NA, nrow(dat)) use <- rep(NA_real_, nrow(dat)) @@ -538,10 +565,10 @@ mu_calculate_rate <- function(name, dat, window, n_years) { i <- !is.na(dat[[v_tot]]) type[i] <- "tot" use[i] <- dat[[v_tot]][i] - + dat[[v_use]] <- use dat[[v_type]] <- type - + dat } @@ -562,15 +589,15 @@ mu_year_introduction <- function(con, dat, dat_summary){ meta.group <- unique(group[meta.group]) x <- merge(meta.group, dat_summary, by = c("touchstone", "modelling_group", "vaccine"), all.y=TRUE) stopifnot( nrow(x) == nrow(dat_summary) ) - + x$touchstone_src[ is.na(x$touchstone_src) ] <- dat$touchstone_mod$id - + coverage.touchstone <- as.list(c(unique(x$touchstone_src))) - + y <- do.call(rbind, lapply(coverage.touchstone, function(i) find_year_introduction(con, i))) - + z <- merge(x, y, by=c("touchstone_src", "activity_type", "vaccine", "country"), all.x = TRUE) - + z } diff --git a/inst/sql/modified_update/migrate_coverage.sql b/inst/sql/modified_update/migrate_coverage.sql index 09d4088..97c3bf8 100644 --- a/inst/sql/modified_update/migrate_coverage.sql +++ b/inst/sql/modified_update/migrate_coverage.sql @@ -9,6 +9,9 @@ LEFT JOIN (SELECT coverage_set.* FROM coverage_set JOIN touchstone ON touchstone.id = coverage_set.touchstone WHERE touchstone = '{{{touchstone_new}}}' AND (coverage_set.gavi_support_level = 'with' - OR coverage_set.gavi_support_level = 'bestminus') )AS coverage_new + OR coverage_set.gavi_support_level = 'bestminus') + AND name NOT IN ('Rota: MCV1, with, routine', + 'Hib: MCV1, with, routine', + 'PCV: MCV1, with, routine'))AS coverage_new ON coverage_old.vaccine = coverage_new.vaccine AND coverage_old.activity_type = coverage_new.activity_type diff --git a/man/modified_update_calculate.Rd b/man/modified_update_calculate.Rd index fea71ea..4b131c8 100644 --- a/man/modified_update_calculate.Rd +++ b/man/modified_update_calculate.Rd @@ -4,7 +4,8 @@ \alias{modified_update_calculate} \title{Do a modified update} \usage{ -modified_update_calculate(con, touchstone_name_mod, touchstone_use) +modified_update_calculate(con, touchstone_name_mod, touchstone_use, + method = 2) } \arguments{ \item{con}{Database connection} @@ -14,6 +15,9 @@ that we are creating} \item{touchstone_use}{Name of the touchstone that we are basing this off of} + +\item{method}{two options: 1 = method1, 2 = method2 +method2_v2 is different from method2 in that, it takes per-year-gavi-level as an imput for gavi impact} } \description{ Do a modified update diff --git a/man/mu_scale.Rd b/man/mu_scale.Rd index e8a3c5a..9b850d4 100644 --- a/man/mu_scale.Rd +++ b/man/mu_scale.Rd @@ -4,12 +4,14 @@ \alias{mu_scale} \title{Calculate updated impact} \usage{ -mu_scale(name, d) +mu_scale(name, d, method = 2) } \arguments{ \item{name}{Impact type: deaths_averted or cases_averted} \item{d}{Data: use impact_rate_tot (method 2)} + +\item{method}{two options: method1 and method2} } \description{ Calculate updated impact