From fd14c39587fe84d0c4d804613fc09b098bb53db7 Mon Sep 17 00:00:00 2001 From: athowes Date: Mon, 19 Aug 2024 17:42:18 +0300 Subject: [PATCH] Complete analysis of age-district interactions --- make/4_run_aaa_fit_multi-sexbehav-sae.R | 3 + .../functions.R | 97 +++++++++++- .../orderly.yml | 15 +- .../script.R | 142 ++++++++++++++++-- 4 files changed, 228 insertions(+), 29 deletions(-) diff --git a/make/4_run_aaa_fit_multi-sexbehav-sae.R b/make/4_run_aaa_fit_multi-sexbehav-sae.R index ca9959e..b933af0 100644 --- a/make/4_run_aaa_fit_multi-sexbehav-sae.R +++ b/make/4_run_aaa_fit_multi-sexbehav-sae.R @@ -60,3 +60,6 @@ sapply(na.omit(recent_ids), function(id) orderly::orderly_commit(id)) sapply(na.omit(recent_ids), function(id) orderly::orderly_push_archive(name = report, id = id)) run_commit_push("process_multi-sexbehav-sae") + +#' PhD revisions +orderly::orderly_run("aaa_fit_multi-sexbehav-sae-age-space-interaction", parameters = list(iso3 == "MWI")) diff --git a/src/aaa_fit_multi-sexbehav-sae-age-space-interaction/functions.R b/src/aaa_fit_multi-sexbehav-sae-age-space-interaction/functions.R index d801a7b..730139b 100644 --- a/src/aaa_fit_multi-sexbehav-sae-age-space-interaction/functions.R +++ b/src/aaa_fit_multi-sexbehav-sae-age-space-interaction/functions.R @@ -17,7 +17,100 @@ multinomial_model <- function(formula, model_name, S = 1000) { message(paste0("Completed fitting ", model_name, ".")) - message("Skipping post-processing") + message("Begin post-processing") - return(list(fit = fit)) + #' Full R-INLA samples + full_samples <- inla.posterior.sample(n = S, result = fit) + + #' Just the latent field + eta_samples <- lapply(full_samples, "[", "latent") + + #' For some reason "latent" is comprised of more than only the latent field + eta_samples <- lapply(eta_samples, function(eta_sample) { + data.frame(eta_sample) %>% + tibble::rownames_to_column() %>% + rename(eta = 2) %>% + filter(substr(rowname, 1, 10) == "Predictor:") %>% + select(-rowname) + }) + + #' Into a matrix with a row for each observation and a column for each sample + eta_samples_matrix <- matrix(unlist(eta_samples), ncol = S) + eta_samples_df <- data.frame(eta_samples_matrix) + + samples <- eta_samples_df %>% + mutate( + #' To split by + obs_idx = df$obs_idx, + #' To sample predictive + #' When n_eff_kish is missing there is no survey for that observation, + #' so the posterior predictive is meaningless. Setting to zero may save + #' some computation, but probably better to filter out entirely. + n_eff_kish_new = floor(ifelse(is.na(df$n_eff_kish), 0, df$n_eff_kish)) + ) %>% + split(.$obs_idx) %>% + mclapply(function(x) { + n_eff_kish_new <- x$n_eff_kish_new + #' Remove the obs_idx and n_eff_kish_new columns + x_samples <- x[1:(length(x) - 2)] + #' Normalise each column (to avoid overflow of softmax) + x_samples <- apply(x_samples, MARGIN = 2, FUN = function(x) x - max(x)) + #' Exponentiate (can be done outside apply) + #' WARNING: That these are samples from lambda posterior isn't true! Come back to this + lambda_samples <- exp(x_samples) + #' Calculate samples from posterior of probabilites + prob_samples <- apply(lambda_samples, MARGIN = 2, FUN = function(x) x / sum(x)) + #' Calculate predictive samples (including sampling variability) + prob_predictive_samples <- apply(prob_samples, MARGIN = 2, FUN = function(x) { + stats::rmultinom(n = 1, size = n_eff_kish_new, prob = x) / n_eff_kish_new + }) + #' Return list, allowing extraction of each set of samples + list( + lambda = data.frame(lambda_samples), + prob = data.frame(prob_samples), + prob_predictive = data.frame(prob_predictive_samples)) + }) + + lambda_samples_df <- bind_rows(lapply(samples, "[[", "lambda")) + prob_samples_df <- bind_rows(lapply(samples, "[[", "prob")) + prob_predictive_samples_df <- bind_rows(lapply(samples, "[[", "prob_predictive")) + + #' Helper functions + row_summary <- function(df, ...) unname(apply(df, MARGIN = 1, ...)) + median <- function(x) quantile(x, 0.5, na.rm = TRUE) + lower <- function(x) quantile(x, 0.025, na.rm = TRUE) + upper <- function(x) quantile(x, 0.975, na.rm = TRUE) + + #' Quantile of the observation within posterior predictive + prob_predictive_quantile <- prob_predictive_samples_df %>% + mutate(estimate = df$estimate) %>% + apply(MARGIN = 1, function(x) { + estimate <- x[S + 1] + samples <- x[1:S] + if(all(is.na(samples))) return(NA) + else ecdf(samples)(estimate) + }) + + #' Calculate mean, median, lower and upper for each set of samples + df <- df %>% + mutate( + lambda_mean = row_summary(lambda_samples_df, mean), + lambda_median = row_summary(lambda_samples_df, median), + lambda_lower = row_summary(lambda_samples_df, lower), + lambda_upper = row_summary(lambda_samples_df, upper), + prob_mean = row_summary(prob_samples_df, mean), + prob_median = row_summary(prob_samples_df, median), + prob_lower = row_summary(prob_samples_df, lower), + prob_upper = row_summary(prob_samples_df, upper), + prob_predictive_mean = row_summary(prob_predictive_samples_df, mean), + prob_predictive_median = row_summary(prob_predictive_samples_df, median), + prob_predictive_lower = row_summary(prob_predictive_samples_df, lower), + prob_predictive_upper = row_summary(prob_predictive_samples_df, upper), + prob_predictive_quantile = prob_predictive_quantile, + model = model_name + ) + + message("Completed post-processing") + + return(list(df = df, fit = fit)) } diff --git a/src/aaa_fit_multi-sexbehav-sae-age-space-interaction/orderly.yml b/src/aaa_fit_multi-sexbehav-sae-age-space-interaction/orderly.yml index 319e56c..d3f993a 100644 --- a/src/aaa_fit_multi-sexbehav-sae-age-space-interaction/orderly.yml +++ b/src/aaa_fit_multi-sexbehav-sae-age-space-interaction/orderly.yml @@ -15,9 +15,11 @@ artefacts: - multi-sexbehav-sae.csv - multi-sexbehav-sae.pdf - data: - description: Stacked proportion barplots + description: Age district effect comparison plots filenames: - - stacked-proportions.pdf + - age-district-plot-nosex12m.png + - age-district-plot-sexcohab.png + - age-district-plot-sexnonregplus.png - data: description: Model selection information criteria for multinomial models filenames: @@ -26,15 +28,6 @@ artefacts: description: Random effect variance parameter posterior means filenames: - variance-proportions.csv - - data: - description: Sample size recovery diagnostic - filenames: - - sample-size-recovery.pdf - - data: - description: Coverage posterior predictive checks - filenames: - - coverage-histograms.pdf - - coverage-ecdf-diff.pdf parameters: iso3: diff --git a/src/aaa_fit_multi-sexbehav-sae-age-space-interaction/script.R b/src/aaa_fit_multi-sexbehav-sae-age-space-interaction/script.R index 2cfbac5..b702678 100644 --- a/src/aaa_fit_multi-sexbehav-sae-age-space-interaction/script.R +++ b/src/aaa_fit_multi-sexbehav-sae-age-space-interaction/script.R @@ -191,34 +191,27 @@ formula4 <- update(formula_baseline, #' Model 4 extended (Chris thesis revision suggestion) #' * age x space x category random effects (IID) -# formula4_extended <- update(formula4, -# . ~ . + f(area_idx, model = "besag", graph = adjM, scale.model = TRUE, group = cat_idx, -# control.group = list(model = "iid"), constr = TRUE, hyper = multi.utils::tau_pc(x = 0.001, u = 2.5, alpha = 0.01)) + -# ) +formula4_extended <- update(formula4, + . ~ . + f(area_idx_copy, model = "iid", group = age_idx, replicate = cat_idx, + constr = TRUE, hyper = multi.utils::tau_pc(x = 0.001, u = 2.5, alpha = 0.01)) +) #' Fit the models #' Number of Monte Carlo samples S <- 1000 -formulas <- list(formula4) -models <- list("Model 4") - -#' tryCatch version for safety -try_multinomial_model <- function(...) { - return(tryCatch(multinomial_model(...), error = function(e) { - message("Error!") - return(NULL) - })) -} +formulas <- list(formula4, formula4_extended) +models <- list("Model 4", "Model 4 extended") res <- purrr::pmap( list(formula = formulas, model_name = models, S = S), - try_multinomial_model + multinomial_model ) #' Extract the fitted models -res_fit <- lapply(res, "[[", 1) +res_df <- lapply(res, "[[", 1) %>% bind_rows() +res_fit <- lapply(res, "[[", 2) #' Add columns for local DIC, WAIC, CPO ic <- lapply(res_fit, function(fit) { @@ -277,3 +270,120 @@ variance_df <- map(res_fit, function(fit) ) write_csv(variance_df, "variance-proportions.csv", na = "") + +#' Artefact: Smoothed district indicator estimates for multinomial models +res_df <- res_df %>% + #' Make it clear which of the estimates are raw and which are from the model (smoothed) + rename( + estimate_raw = estimate, + ci_lower_raw = ci_lower, + ci_upper_raw = ci_upper, + estimate_smoothed = prob_mean, + median_smoothed = prob_median, + ci_lower_smoothed = prob_lower, + ci_upper_smoothed = prob_upper + ) %>% + mutate(iso3 = iso3, .before = indicator) %>% + relocate(model, .before = estimate_smoothed) + +write_csv(res_df, "multi-sexbehav-sae.csv", na = "") + +#' Create plotting data +res_plot <- res_df %>% + filter(area_id != iso3) %>% + pivot_longer( + cols = c(starts_with("estimate")), + names_to = c(".value", "source"), + names_pattern = "(.*)\\_(.*)" + ) %>% + left_join( #' Use this to make it an sf again + select(areas, area_id), + by = "area_id" + ) %>% + st_as_sf() + +#' Artefact: Cloropleths +pdf("multi-sexbehav-sae.pdf", h = 8.25, w = 11.75) + +res_plot %>% + split(~indicator + model) %>% + lapply(function(x) + x %>% + mutate( + age_group = fct_recode(age_group, + "15-19" = "Y015_019", + "20-24" = "Y020_024", + "25-29" = "Y025_029" + ), + source = fct_relevel(source, "raw", "smoothed") %>% + fct_recode("Survey raw" = "raw", "Smoothed" = "smoothed") + ) %>% + ggplot(aes(fill = estimate)) + + geom_sf(size = 0.1, colour = scales::alpha("grey", 0.25)) + + scale_fill_viridis_c(option = "C", label = label_percent()) + + facet_grid(age_group ~ survey_id + source) + + theme_minimal() + + labs( + title = paste0(substr(x$survey_id[1], 1, 3), ": ", x$indicator[1], " (", x$model[1], ")") + ) + + theme( + axis.text = element_blank(), + axis.ticks = element_blank(), + panel.grid = element_blank(), + strip.text = element_text(face = "bold"), + plot.title = element_text(face = "bold"), + legend.position = "bottom", + legend.key.width = unit(4, "lines") + ) + ) + +dev.off() + +#' Artefact: Special revisions cloropleths +age_district_plot <- res_plot %>% + mutate(year = as.numeric(substr(survey_id, 4, 7))) %>% + filter(year == max(year)) %>% + mutate( + age_group = fct_recode(age_group, + "15-19" = "Y015_019", + "20-24" = "Y020_024", + "25-29" = "Y025_029" + ), + indicator_plot = fct_recode(indicator, + "Not sexually active" = "nosex12m", + "One cohabiting partner" = "sexcohab", + "Non-regular or multiple partners(s) +" = "sexnonregplus" + ), + source = fct_relevel(source, "raw", "smoothed") %>% + fct_recode("Survey raw" = "raw", "Smoothed" = "smoothed"), + source_extended = case_when( + source == "Survey raw" ~ "Survey raw", + source == "Smoothed" & model == "Model 4" ~ "Base", + source == "Smoothed" & model == "Model 4 extended" ~ "Extended" + ), + source_extended = fct_relevel(source_extended, "Survey raw", "Base", "Extended") + ) %>% + split(~ indicator) %>% + lapply(function(x) + x %>% + ggplot(aes(fill = estimate)) + + geom_sf(size = 0.1, colour = scales::alpha("grey", 0.25)) + + scale_fill_viridis_c(option = "C", label = label_percent()) + + facet_grid(age_group ~ source_extended) + + labs( + title = , + caption = paste0( + "Indicator: ", x$indicator_plot[1], "; Survey: ", x$survey_id[1], "\n", + "Extended includes age-district interaction effects" + ), + fill = "Estimate" + ) + + theme_minimal() + + theme( + axis.text = element_blank(), + axis.ticks = element_blank(), + panel.grid = element_blank(), + ) + ) + +imap(age_district_plot, \(x, name) ggsave(paste0("age-district-plot-", tolower(name), ".png"), x, h = 7.5, w = 6.25))