From 38e4a0a3a85b75710cf65c36368b047bf4f2e8cd Mon Sep 17 00:00:00 2001 From: holdenharris-NOAA Date: Tue, 28 Nov 2023 16:43:26 -0500 Subject: [PATCH] Code added to create individual files for each scaling period. Made loop along different scaling periods. --- Plots-to-compare-multiple-ecospace-outputs.R | 398 +++++++++++++------ 1 file changed, 271 insertions(+), 127 deletions(-) diff --git a/Plots-to-compare-multiple-ecospace-outputs.R b/Plots-to-compare-multiple-ecospace-outputs.R index 6b2130b..18dc75e 100644 --- a/Plots-to-compare-multiple-ecospace-outputs.R +++ b/Plots-to-compare-multiple-ecospace-outputs.R @@ -6,12 +6,11 @@ ## 'C' --> Denotes catch ## Setup ----------------------------------------------------------------------- -rm(list=ls()); graphics.off(); gc() +rm(list=ls()) source("./functions.R") ## Pull in functions library(dplyr) ## Input set up ---------------------------------------------------------------- -dir_pdf_out = "./PDF_plots/" ewe_name = "EwE_Outputs" sim_scenario = "sim-spa_01" obs_TS_name = "TS_updated_IB13" @@ -20,143 +19,291 @@ spa_scenarios = c("spa_00", "spa_01", "spa_02_MOM6-ISIMIP3a", "spa_03_MOM6-ISIM spa_scen_names = c("01 Base", "02 MODIS-ChlA", "03 MOM6-ChlA", "04 MOM6-Vint") ## User-defined output parameters ---------------------------------------------- +num_plot_pages = 4 ## Sets number of pages for PDF file today_date <- format(Sys.Date(), "%Y-%m-%d") out_file_notes = "multispa" -plot_name_xY = paste0("B_scaled_xY_", out_file_notes) -#plot_name_xM = paste0("B_scaled_xM_", out_file_notes) -#plot_name_C = paste0("C_", spa_scenario, out_file_notes) -num_plot_pages = 4 ## Sets number of pages for PDF file - -## Set scaling parameters -#init_years_toscale = 1 -init_years_toscale = 2016-1980 ## 36. This will scale all outputs to the global average -init_months_toscale = init_years_toscale * 12 - -## ----------------------------------------------------------------------------- -## -## Read-in ANNUAL Observed, Ecosim, and Ecospace TS -dir_sim = paste0("./", ewe_name, "/ecosim_", sim_scenario, "/") - -## Read-in Ecosim annual biomass -filename = paste0(dir_sim, "biomass_annual.csv") -num_skip_sim = f.find_start_line(filename, flag = srt_year) - -simB_xY <- read.csv(paste0(dir_sim, "biomass_annual.csv"), skip = num_skip_sim) -years = simB_xY$year.group ## Get date range from Ecosim -simB_xY$year.group = NULL - -## Read-in Ecosim annual catches -simC_xY <- read.csv(paste0(dir_sim, "catch_annual.csv"), skip = num_skip_sim) -simC_xY$year.group = NULL - -## Read-in Ecospace annual biomass and catches --------------------------------- -ls_spaB_xY <- list() -ls_spaC_xY <- list() -for (i in 1:length(spa_scenarios)) { - dir_spa = paste0("./", ewe_name, "/", spa_scenarios[i], "/") - filename <- paste0(dir_spa, "Ecospace_Annual_Average_Biomass.csv") - num_skip_spa <- f.find_start_line(filename, flag = "Year") - spaB_xY <- read.csv(paste0(dir_spa, "Ecospace_Annual_Average_Biomass.csv"), - skip = num_skip_spa, header = TRUE) - spaB_xY$Year = NULL - - ## Standardize FG names ------------------------------ - fg_names = f.standardize_group_names(colnames(spaB_xY)) - num_fg = length(fg_names) - fg_df <- data.frame( - pool_code = 1:num_fg, - group_name = paste(sprintf("%02d", 1:num_fg), - gsub("_", " ", fg_names)) - ) - - ## Set row and column names - rownames(spaB_xY) = rownames(simB_xY) = years - colnames(spaB_xY) = colnames(simB_xY) = fg_df$group_name - - ## Add current spaB_xY reading into the list object ---- - ls_spaB_xY[[i]] <- spaB_xY - - ## Read-in Ecospace annual catch - spaC_xY <- read.csv(paste0(dir_spa, "Ecospace_Annual_Average_Catch.csv"), - skip = num_skip_spa, header = TRUE) - spaC_xY$Year = NULL - ls_spaC_xY[[i]] <- spaC_xY -} - -## ----------------------------------------------------------------------------- -## Prepare months and date series objects -start_y <- min(years) -end_y <- max(years) -date_series <- seq(as.Date(paste0(start_y, "-01-01")), as.Date(paste0(end_y, "-12-01")), by = "1 month") -year_series <- seq(as.Date(paste0(start_y, "-01-01")), as.Date(paste0(end_y, "-12-01")), by = "1 year") -ym_series <- format(date_series, "%Y-%m") - -## Read in MONTHLY biomasses -#simB_xM <- read.csv(paste0(dir_sim, "biomass_monthly.csv"), skip = num_skip_sim); simB_xM$timestep.group = NULL -#simC_xM <- read.csv(paste0(dir_sim, "catch_monthly.csv"), skip = num_skip_sim); simC_xM$timestep.group = NULL -#spaB_xM <- read.csv(paste0(dir_spa, "Ecospace_Average_Biomass.csv"), skip = num_skip_spa, header = TRUE); spaB_xM$TimeStep = NULL -#rownames(spaB_xM) = rownames(simB_xM) = ym_series - - -## Read in "observed" timeseries ----------------------------------------------- -dir_obs = paste0("./", ewe_name, "/", obs_TS_name, ".csv") -obs.list = f.read_ecosim_timeseries(dir_obs, num_row_header = 4) -for(i in 1:length(obs.list)){assign(names(obs.list)[i],obs.list[[i]])} #make separate dataframe for each list element - -obsB.head <- merge(obsB.head, fg_df, by = "pool_code", all.x = TRUE) -obsC.head <- merge(obsC.head, fg_df, by = "pool_code", all.x = TRUE) - -colnames(obsB) = obsB.head$group_name -colnames(obsC) = obsC.head$group_name - -## ----------------------------------------------------------------------------- -## -## Plot biomasses -## Note: Make sure PDF readers are closed before running pdf() +scaling_list = c(1, 5, 10, 36) -## ----------------------------------------------------------------------------- -## Plot and compare ANNUAL biomass +for (init in scaling_list){ -## Plotting parameters -col_obs = 'black' -col_sim = rgb(0.2, 0.7, .1, alpha = 0.6) ## rgb (red, green, blue, alpha) -#col_spa <- colorRampPalette(c("deepskyblue", "blue1", "blue4", "purple4", "darkviolet"))(length(spaB_scaled_ls)) -#col_spa <- rainbow(length(spaB_scaled_ls)) -col_spa <- c("darkgoldenrod", "indianred2", "steelblue4", "darkorchid4") -#col_spa <- adjustcolor(col_spa, alpha.f = 1) ## Adjust transparance + ## Set scaling parameters + init_years_toscale = init + (plot_name_xY = paste0("BxY_scaled_", init_years_toscale, "y-", out_file_notes, ".PDF")) -x = year_series -num_plot_pages = 4; x_break = 5; y_break = 4; x_cex = 0.9; y_cex = 0.9; x_las = 2; -sim_lty = 1; spa_lty = 1 -sim_lwd = 2; spa_lwd = 1; obs_pch = 16; obs_cex = 0.8; -main_cex = 0.85; leg_cex = 0.9; leg_pos = 'topleft';leg_inset = 0.1 -#simB_scaled = spaB_scaled_ls - -(dir_pdf_out_xY <- paste0(dir_pdf_out, plot_name_xY, ".PDF")) -pdf(dir_pdf_out_xY, onefile = TRUE) + ## Plot output names + dir_pdf_out = "./Scenario_comps/PDF_plots/" + pdf_file_name= paste0(dir_pdf_out, plot_name_xY) + + ## ----------------------------------------------------------------------------- + ## + ## Read-in ANNUAL Observed, Ecosim, and Ecospace TS + dir_sim = paste0("./", ewe_name, "/ecosim_", sim_scenario, "/") + + ## Read-in Ecosim annual biomass + filename = paste0(dir_sim, "biomass_annual.csv") + num_skip_sim = f.find_start_line(filename, flag = srt_year) + + simB_xY <- read.csv(paste0(dir_sim, "biomass_annual.csv"), skip = num_skip_sim) + years = simB_xY$year.group ## Get date range from Ecosim + simB_xY$year.group = NULL + + ## Read-in Ecosim annual catches + simC_xY <- read.csv(paste0(dir_sim, "catch_annual.csv"), skip = num_skip_sim) + simC_xY$year.group = NULL + + ## Read-in Ecospace annual biomass and catches --------------------------------- + ls_spaB_xY <- list() + ls_spaC_xY <- list() + for (i in 1:length(spa_scenarios)) { + dir_spa = paste0("./", ewe_name, "/", spa_scenarios[i], "/") + filename <- paste0(dir_spa, "Ecospace_Annual_Average_Biomass.csv") + num_skip_spa <- f.find_start_line(filename, flag = "Year") + spaB_xY <- read.csv(paste0(dir_spa, "Ecospace_Annual_Average_Biomass.csv"), + skip = num_skip_spa, header = TRUE) + spaB_xY$Year = NULL + + ## Standardize FG names ------------------------------ + fg_names = f.standardize_group_names(colnames(spaB_xY)) + num_fg = length(fg_names) + fg_df <- data.frame( + pool_code = 1:num_fg, + group_name = paste(sprintf("%02d", 1:num_fg), + gsub("_", " ", fg_names)) + ) + + ## Set row and column names + rownames(spaB_xY) = rownames(simB_xY) = years + colnames(spaB_xY) = colnames(simB_xY) = fg_df$group_name + + ## Add current spaB_xY reading into the list object ---- + ls_spaB_xY[[i]] <- spaB_xY + + ## Read-in Ecospace annual catch + spaC_xY <- read.csv(paste0(dir_spa, "Ecospace_Annual_Average_Catch.csv"), + skip = num_skip_spa, header = TRUE) + spaC_xY$Year = NULL + ls_spaC_xY[[i]] <- spaC_xY + } + + ## ----------------------------------------------------------------------------- + ## Prepare months and date series objects + start_y <- min(years) + end_y <- max(years) + date_series <- seq(as.Date(paste0(start_y, "-01-01")), as.Date(paste0(end_y, "-12-01")), by = "1 month") + year_series <- seq(as.Date(paste0(start_y, "-01-01")), as.Date(paste0(end_y, "-12-01")), by = "1 year") + ym_series <- format(date_series, "%Y-%m") + + ## Read in MONTHLY biomasses + #simB_xM <- read.csv(paste0(dir_sim, "biomass_monthly.csv"), skip = num_skip_sim); simB_xM$timestep.group = NULL + #simC_xM <- read.csv(paste0(dir_sim, "catch_monthly.csv"), skip = num_skip_sim); simC_xM$timestep.group = NULL + #spaB_xM <- read.csv(paste0(dir_spa, "Ecospace_Average_Biomass.csv"), skip = num_skip_spa, header = TRUE); spaB_xM$TimeStep = NULL + #rownames(spaB_xM) = rownames(simB_xM) = ym_series + + ## Read in "observed" timeseries ----------------------------------------------- + dir_obs = paste0("./", ewe_name, "/", obs_TS_name, ".csv") + obs.list = f.read_ecosim_timeseries(dir_obs, num_row_header = 4) + for(i in 1:length(obs.list)){assign(names(obs.list)[i],obs.list[[i]])} #make separate dataframe for each list element + + obsB.head <- merge(obsB.head, fg_df, by = "pool_code", all.x = TRUE) + obsC.head <- merge(obsC.head, fg_df, by = "pool_code", all.x = TRUE) + + colnames(obsB) = obsB.head$group_name + colnames(obsC) = obsC.head$group_name + + ## ----------------------------------------------------------------------------- + ## + ## OBJECTIVE COMPARISON METRICS + ## Create and run objective functions for multiple Ecospace scenarios + ## Compare fits to observed data and Ecosim results + + fit_metrics_ls = list() ## + spa_fit_sums = data.frame() + + ## Loop through each Ecospace scenario ------------------------------------------ + for(j in 1:length(spa_scenarios)){ + + fit_metrics <- data.frame( + nll_spa_obs=NA, nll_spa_sim=NA, nll_sim_obs=NA, + pbi_spa_obs=NA, pbi_spa_sim=NA, pbi_sim_obs=NA, + mae_spa_obs=NA, mae_spa_sim=NA, mae_sim_obs=NA) + + ## ----------------------------------------------------------------------------- + ## + ## Loop through every functional group to make the `fit_metrics` data frame + for(i in 1:num_fg){ + + (grp = fg_df$group_name[i]) + + ## Get biomass for individual FG: Observed, Ecosim, and Ecospace ----------- + ## Scale to the average of a given timeframe + + ## Ecospace + spaB_ls <- lapply(ls_spaB_xY, function(df) df[, i]) ## Extract the i column from each data frame in the list + spaB <- spaB_ls[[j]] ## Pull from the j'th scenario + spaB_scaled <- spaB / mean(spaB[1:init_years_toscale], na.rm = TRUE) + + ## Ecosim + simB = simB_xY[,i] + simB_scaled = simB / mean(simB[1:init_years_toscale], na.rm = TRUE) ## Ecosim scaled + + ## Observed + ## For observed, there maybe not be reference data so we check to see if observed data is available + obsB_scaled=NULL + if(i %in% obsB.head$pool_code){ + obs.idx = which(obsB.head$pool_code==i) + obs_df = suppressWarnings( ## Suppress warnings thrown when obs not available + data.frame(year_series, obsB = as.numeric(obsB[ ,obs.idx])) + ) + non_na_obsB = obs_df$obsB[!is.na(obs_df$obsB)] # Extract non-NA values from obs_df$obsB + if_else (length(non_na_obsB) < init_years_toscale, + years_to_scale <- length(non_na_obsB), + years_to_scale <- init_years_toscale) + mean_init_years = mean(non_na_obsB[1:years_to_scale]) # Calculate the mean of the first 'init_years_toscale' non-NA values + obsB_scaled = obs_df$obsB / mean_init_years # Scale the entire obs_df$obsB by this mean + } else obsB_scaled=rep(NA, length(simB)) + + ## Create data frame to compare observed, Ecosim, and Ecospace + comp_df <- data.frame(obs = obsB_scaled, sim = simB_scaled, spa = spaB_scaled) + + ## Calculate log-likelihood ------------------------------------------------ + ## Calculate log-likelihood for spa vs obs + resids <- comp_df$obs - comp_df$spa + nll_spa_obs <- -(-length(comp_df$obs)/2 * log(2*pi*var(resids, na.rm=TRUE)) - 1/(2*var(resids, na.rm=TRUE)) * sum(resids^2, na.rm=TRUE)) + + ## Calculate log-likelihood for spa vs sim + resids <- comp_df$sim - comp_df$spa + nll_spa_sim <- -(-length(comp_df$sim)/2 * log(2*pi*var(resids, na.rm=TRUE)) - 1/(2*var(resids, na.rm=TRUE)) * sum(resids^2, na.rm=TRUE)) + + ## Calculate log-likelihood for sim vs obs + resids <- comp_df$obs - comp_df$sim + nll_sim_obs <- -(-length(comp_df$obs)/2 * log(2*pi*var(resids, na.rm=TRUE)) - 1/(2*var(resids, na.rm=TRUE)) * sum(resids^2, na.rm=TRUE)) + + ## Calculate percent bias + ## Note: This measure of percent bias aggregates all prediction errors, + ## both positive and negative, into a single number. This can mask the + ## variability of the errors across different observations. + pbi_spa_obs <- 100 * (sum(comp_df$spa - comp_df$obs, na.rm=TRUE) / sum(comp_df$obs,na.rm=TRUE)) + pbi_spa_sim <- 100 * (sum(comp_df$spa - comp_df$sim, na.rm=TRUE) / sum(comp_df$sim,na.rm=TRUE)) + pbi_sim_obs <- 100 * (sum(comp_df$sim - comp_df$obs, na.rm=TRUE) / sum(comp_df$obs,na.rm=TRUE)) + + ## Calculate mean absolute error (MAE) --------------------------------------- + ## Average magnitude of errors between the predictions and observations, + ## treating all errors with equal weight regardless of their size. + mae_spa_obs = mean(abs(comp_df$spa - comp_df$obs), na.rm = TRUE) + mae_spa_sim = mean(abs(comp_df$spa - comp_df$sim), na.rm = TRUE) + mae_sim_obs = mean(abs(comp_df$sim - comp_df$obs), na.rm = TRUE) + + ## Calculate root mean absolute error (RMSE) --------------------------------- + rmse_spa_obs <- sqrt(mean((comp_df$spa - comp_df$obs)^2, na.rm = TRUE)) + rmse_spa_sim <- sqrt(mean((comp_df$spa - comp_df$sim)^2, na.rm = TRUE)) + rmse_sim_obs <- sqrt(mean((comp_df$sim - comp_df$obs)^2, na.rm = TRUE)) + + ## Store calculations + fit <- c(nll_spa_obs, nll_spa_sim, nll_sim_obs, + pbi_spa_obs, pbi_spa_sim, pbi_sim_obs, + mae_spa_obs, mae_spa_sim, mae_sim_obs); fit + + fit_metrics[i,] <- fit + } + + ## Save information + rownames(fit_metrics) = fg_df$group_name + fit_metrics = round(fit_metrics, 2) + fit_sums <- colSums(fit_metrics, na.rm = TRUE) + fit_metrics_ls[[j]] = round(fit_metrics, 2) + + spa_fit_sums <- rbind(spa_fit_sums, colSums(fit_metrics, na.rm = TRUE)) + + } + names(spa_fit_sums) <- colnames(fit_metrics) + rownames(spa_fit_sums) <- spa_scen_names + spa_fit_sums + + ## Write out tables ------------------------------------------------------------ + + ## Folder name based on `init_years_toscale` + folder_name <- paste0("Fit_metrics_scaled_", init_years_toscale, "y") + + ## Create the folder if it doesn't exist + folder_path <- paste0("./Scenario_comps/Tables/", folder_name) + if (!dir.exists(folder_path)) { + dir.create(folder_path, recursive = TRUE) + } + + ## Write out tables as CSV Files + write.csv(spa_fit_sums, file = paste0(folder_path, "/Ecospace-fits-summed.csv"), row.names = TRUE) + + for (j in 1:length(fit_metrics_ls)){ + write.csv(fit_metrics_ls[[j]], + file = paste0(folder_path, "/Fit metrics - ", spa_scen_names[j], ".csv"), + row.names = TRUE) + } + + ## Also, write out as an Excel file with each scenario as a different Tab + library(openxlsx) + wb <- createWorkbook() # Create a new workbook + + replace_NaN <- function(x) { + x[sapply(x, is.nan)] <- NA + return(x) + } + + for (j in seq_along(fit_metrics_ls)) { # Loop through the list of data frames and add each as a new sheet + sheet_name <- spa_scen_names[j] # Create a sheet name based on the names in spa_scen_names + addWorksheet(wb, sheet_name) # Add a sheet to the workbook with the data frame + writeData(wb, sheet_name, replace_NaN(fit_metrics_ls[[j]]), na.string = "NA") + } + + saveWorkbook(wb, paste0(folder_path, "/Fit_Metrics.xlsx"), overwrite = TRUE) # Save the workbook as an Excel file + + ## ----------------------------------------------------------------------------- + ## + ## Plot biomasses + ## Note: Make sure PDF readers are closed before running pdf() + + ## ----------------------------------------------------------------------------- + ## Plot and compare ANNUAL biomass + + ## Plotting parameters + col_obs = 'black' + col_sim = rgb(0.2, 0.7, .1, alpha = 0.6) ## rgb (red, green, blue, alpha) + col_spa <- c("darkgoldenrod", "indianred2", "steelblue4", "darkorchid4") + #col_spa <- adjustcolor(col_spa, alpha.f = 1) ## Adjust transparancy + + x = year_series + num_plot_pages = 4; x_break = 5; y_break = 4; x_cex = 0.9; y_cex = 0.9; x_las = 2; + sim_lty = 1; spa_lty = 1 + sim_lwd = 2; spa_lwd = 1; obs_pch = 16; obs_cex = 0.8; + main_cex = 0.85; leg_cex = 0.9; leg_pos = 'topleft';leg_inset = 0.1 + #simB_scaled = spaB_scaled_ls + + print(paste("Writing", pdf_file_name)) + pdf(pdf_file_name, onefile = TRUE) ## Set number of plots per page set.mfrow = f.get_plot_dims(x=num_fg / num_plot_pages, round2=4) par(mfrow=set.mfrow, mar=c(1, 2, 1, 2)) plots_per_pg = set.mfrow[1] * set.mfrow[2] - - for(i in 1:num_fg){ -# for(i in 1:19){ + + for(i in 1:num_fg){ + # for(i in 1:19){ grp = fg_df$group_name[i] simB = simB_xY[,i] spaB_ls <- lapply(ls_spaB_xY, function(df) df[, i]) ## Extract the i column from each data frame in the list ## Check to see if observed data is available - obsB_scaled=NULL if(i %in% obsB.head$pool_code){ obs.idx = which(obsB.head$pool_code==i) obs_df = suppressWarnings( ## Suppress warnings thrown when obs not available data.frame(year_series, obsB = as.numeric(obsB[ ,obs.idx])) ) - obsB_scaled = obs_df$obsB / mean(na.omit(obs_df$obsB)[1:init_years_toscale], na.rm = TRUE) - rm(obs_df) - rm(obs.idx) - } + non_na_obsB = obs_df$obsB[!is.na(obs_df$obsB)] # Extract non-NA values from obs_df$obsB + if_else (length(non_na_obsB) < init_years_toscale, + years_to_scale <- length(non_na_obsB), + years_to_scale <- init_years_toscale) + mean_init_years = mean(non_na_obsB[1:years_to_scale]) # Calculate the mean of the first 'init_years_toscale' non-NA values + obsB_scaled = obs_df$obsB / mean_init_years # Scale the entire obs_df$obsB by this mean + } else obsB_scaled=rep(NA, length(simB)) ## Scale to the average of a given timeframe simB_scaled = simB / mean(simB[1:init_years_toscale], na.rm = TRUE) @@ -217,13 +364,10 @@ pdf(dir_pdf_out_xY, onefile = TRUE) for(j in seq_along(spaB_scaled_ls)) { lines(x, spaB_scaled_ls[[j]], lty=spa_lty, lwd=spa_lwd, col=col_spa[j]) # Plot each Ecospace projection. Use the j-th color in the palette for each line. } - #} else if(is.list(spaB_scaled_ls)==FALSE) { # If it's not a list, but a vector, plot directly - # lines(x, spaB_scaled, lty=1, lwd=spa_lwd, col=col_spa[1]) # Plot Ecospace + #} else if(is.list(spaB_scaled_ls)==FALSE) { # If it's not a list, but a vector, plot directly + # lines(x, spaB_scaled, lty=1, lwd=spa_lwd, col=col_spa[1]) # Plot Ecospace } -} -dev.off() - - - + } + dev.off() - \ No newline at end of file +} \ No newline at end of file