resolve RatioBiasEM.R merge conflict

Merge branch 'merge_catch_bias' of https://github.com/nmfs-fish-tools/SSMSE into merge_catch_bias # Conflicts: # R/RatioBiasEM.R
nmfs-fish-tools · Jun 28, 2024 · c49aa49 · c49aa49
2 parents 92c751b + 5c549b4
commit c49aa49
Show file tree

Hide file tree

Showing 6 changed files with 94 additions and 19 deletions.
diff --git a/R/MS_functions.R b/R/MS_functions.R
@@ -91,6 +91,16 @@ EM <- function(EM_out_dir = NULL, init_loop = TRUE, OM_dat, verbose = FALSE,
       new_datfile_name = new_datfile_name,
       verbose = verbose
     )
+
+    #Increment main recruitment phase end year by number of assessment years
+    #So the model can continue to estimate rec devs 
+    ctl <- SS_readctl(file.path(EM_out_dir, start[["ctlfile"]]),
+                      datlist = new_EM_dat
+    )
+    ctl$MainRdevYrLast <- ctl$MainRdevYrLast + nyrs_assess
+    r4ss::SS_writectl(ctl, file.path(EM_out_dir, start[["ctlfile"]]),
+                      overwrite = TRUE
+    )
   }
   # Update SS random seed
   start <- SS_readstarter(file.path(EM_out_dir, "starter.ss"),
@@ -267,6 +277,11 @@ get_EM_catch_df <- function(EM_dir, dat) {
   catch_df <- as.data.frame(catch_df)
   catch_bio_df <- as.data.frame(catch_bio_df)
   catch_F_df <- as.data.frame(catch_F_df)
+
+  #sort data frames by year, season, fleet
+  catch_df <- catch_df[order(abs(catch_df[["fleet"]]),abs(catch_df[["year"]]),abs(catch_df[["seas"]])),]
+  catch_bio_df <- catch_bio_df[order(abs(catch_bio_df[["fleet"]]),abs(catch_bio_df[["year"]]),abs(catch_bio_df[["seas"]])),]
+  catch_F_df <- catch_F_df[order(abs(catch_F_df[["fleet"]]),abs(catch_F_df[["year"]]),abs(catch_F_df[["seas"]])),]
   # get discard, if necessary
   if (dat[["N_discard_fleets"]] > 0) {
     # discard units: 1, biomass/number according to set in catch
@@ -331,6 +346,16 @@ get_EM_catch_df <- function(EM_dir, dat) {
       }
     }
     dis_df <- do.call("rbind", dis_df_list)
+    dis_df <- dis_df %>%
+      dplyr::group_by(.data[["Yr"]], .data[["Seas"]], .data[["Flt"]]) %>%
+      dplyr::summarise(Discard = sum(.data[["Discard"]])) %>%
+      merge(se_dis, all.x = TRUE, all.y = FALSE) %>%
+      dplyr::ungroup() %>%
+      dplyr::select(.data[["Yr"]], .data[["Seas"]], .data[["Flt"]], .data[["Discard"]], .data[["Std_in"]])
+    dis_df <- as.data.frame(dis_df)
+    #sort data frame by year, season, fleet
+    dis_df <- dis_df[order(abs(dis_df[["Flt"]]),abs(dis_df[["Yr"]]),abs(dis_df[["Seas"]])),]
+
   } else {
     dis_df <- NULL
   }

diff --git a/R/RatioBiasEM.R b/R/RatioBiasEM.R
@@ -268,6 +268,13 @@ get_RatioEM_catch_df<-function(EM_dir, dat, dat_yrs,
       }
     }
     dis_df <- do.call("rbind", dis_df_list)
+    dis_df <- dis_df %>%
+      dplyr::group_by(.data[["Yr"]], .data[["Seas"]], .data[["Flt"]]) %>%
+      dplyr::summarise(Discard = sum(.data[["Discard"]])) %>%
+      merge(se_dis, all.x = TRUE, all.y = FALSE) %>%
+      dplyr::ungroup() %>%
+      dplyr::select(.data[["Yr"]], .data[["Seas"]], .data[["Flt"]], .data[["Discard"]], .data[["Std_in"]])
+    dis_df <- as.data.frame(dis_df)
   } else {
     dis_df <- NULL
   }
@@ -293,7 +300,7 @@ get_RatioEM_catch_df<-function(EM_dir, dat, dat_yrs,
 #'  init_loop is TRUE.
 #' @template OM_out_dir
 #' @template sample_struct
-#' @template seed
+#' @template seed 
 #' @param ... Any additional parameters
 
 #For example, if EM has a positive bias (e.g., EM catch = 1 when true OM catch = 0.5), the EM2OM multiplier should be less than 1 (EM2OM = 0.5)
@@ -437,15 +444,16 @@ RatioBiasEM <- function(EM_out_dir = NULL, init_loop = TRUE, OM_dat, verbose = F
     tmp_discards <- base::merge(new_OM_catch_list$discards, sample_struct$EM2OMdiscard_bias, all.x=TRUE)
     new_OM_catch_list$discards$Discard <- new_OM_catch_list$discards$Discard * tmp_discards$bias 
   }
-
   if(2 %in% OM_dat$fleetinfo$type){ # if bycatch fleet -- remove from catch list and keep only bycatch fleets in catch_F list
+
     byc_f <- which(OM_dat$fleetinfo$type==2)#as.numeric(row.names(OM_dat$fleetinfo[which(OM_dat$fleetinfo$type==2),]))
     new_OM_catch_list$catch<- new_OM_catch_list$catch[which(!is.element(new_OM_catch_list$catch$fleet,byc_f)),]
     new_OM_catch_list$catch_bio<- new_OM_catch_list$catch_bio[which(!is.element(new_OM_catch_list$catch$fleet,byc_f)),]
     new_OM_catch_list$catch_F<- new_OM_catch_list$catch_F[which(is.element(new_OM_catch_list$catch_F$fleet,byc_f)),]
     # new_OM_catch_list$catch<- new_OM_catch_list$catch[new_OM_catch_list$catch$fleet!=byc_f,]
     # new_OM_catch_list$catch_bio<- new_OM_catch_list$catch_bio[new_OM_catch_list$catch$fleet!=byc_f,]
     # new_OM_catch_list$catch_F<- new_OM_catch_list$catch_F[new_OM_catch_list$catch_F$fleet==byc_f,]
+
   } else{
     new_OM_catch_list$catch_F <- NULL
   }
@@ -522,14 +530,17 @@ add_new_dat_BIAS<- function (OM_dat, EM_datfile, sample_struct, EM_dir, nyrs_ass
   SIMPLIFY = FALSE, USE.NAMES = TRUE)
 
   #extracted_dat takes observations from the OM; then below we use the EM2OM multiplier to put catch back into EM units and remove the EM2OMcatch_basis element so that it doesn't get added to the datafile. 
-
+  extracted_dat$catch <- extracted_dat$catch[order(abs(extracted_dat$catch$fleet),abs(extracted_dat$catch$year),abs(extracted_dat$catch$seas)),]
   if(!is.null(extracted_dat$catch)){
     tmp_catch<- merge(extracted_dat$catch, sample_struct$EM2OMcatch_bias)
+    tmp_catch <- tmp_catch[order(abs(tmp_catch$fleet),abs(tmp_catch$year),abs(tmp_catch$seas)),]
     extracted_dat$catch$catch<- tmp_catch$catch / tmp_catch$bias ### EM2OM edits to get EM catch back to OM
   }
 
+  extracted_dat$discard_data <- extracted_dat$discard_data[order(abs(extracted_dat$discard_data$Flt),abs(extracted_dat$discard_data$Yr),abs(extracted_dat$discard_data$Seas)),]
   if(!is.null(extracted_dat$discard_data)){
     tmp_discard<- merge(extracted_dat$discard_data, sample_struct$EM2OMdiscard_bias)
+    tmp_discard <- tmp_discard[order(abs(tmp_discard$Flt),abs(tmp_discard$Yr),abs(tmp_discard$Seas)),]
     extracted_dat$discard_data$Discard<- tmp_discard$Discard / tmp_discard$bias
   }
 
@@ -655,6 +666,15 @@ BiasEM <- function(EM_out_dir = NULL, init_loop = TRUE, OM_dat, verbose = FALSE,
       verbose = verbose
     )
 
+    #Increment main recruitment phase end year by number of assessment years
+    #So the model can continue to estimate rec devs 
+    ctl <- SS_readctl(file.path(EM_out_dir, start[["ctlfile"]]),
+                      datlist = new_EM_dat
+    )
+    ctl$MainRdevYrLast <- ctl$MainRdevYrLast + nyrs_assess
+    r4ss::SS_writectl(ctl, file.path(EM_out_dir, start[["ctlfile"]]),
+                      overwrite = TRUE
+    )
   } # end else not first iteration
 
   # Update SS random seed
@@ -699,19 +719,24 @@ BiasEM <- function(EM_out_dir = NULL, init_loop = TRUE, OM_dat, verbose = FALSE,
   # For the simple approach, we can just apply a series of scalars from the EM catch list to create an OM catch list
   new_OM_catch_list = new_EM_catch_list
 
-
+  sample_struct$EM2OMcatch_bias <- sample_struct$EM2OMcatch_bias[order(abs(sample_struct$EM2OMcatch_bias$fleet),abs(sample_struct$EM2OMcatch_bias$year),abs(sample_struct$EM2OMcatch_bias$seas)),]
+  sample_struct$EM2OMcatch_bias <- sample_struct$EM2OMcatch_bias[!duplicated(sample_struct$EM2OMcatch_bias),]
   if(!is.null(new_OM_catch_list$catch)){
-    tmp_catch <- base::merge(new_OM_catch_list$catch, sample_struct$EM2OMcatch_bias, all.x=TRUE, all.y=FALSE)
+    tmp_catch <- base::merge(abs(new_OM_catch_list$catch), abs(sample_struct$EM2OMcatch_bias), all.x=TRUE, all.y=FALSE)
+    tmp_catch <- tmp_catch[order(abs(tmp_catch$fleet),abs(tmp_catch$year),abs(tmp_catch$seas)),]
     new_OM_catch_list$catch$catch <- new_OM_catch_list$catch$catch * tmp_catch$bias 
   }
   if(!is.null(new_OM_catch_list$catch_bio)){
-    tmp_catch_bio <- base::merge(new_OM_catch_list$catch_bio, sample_struct$EM2OMcatch_bias, all.x=TRUE)
+    tmp_catch_bio <- base::merge(abs(new_OM_catch_list$catch_bio), abs(sample_struct$EM2OMcatch_bias), all.x=TRUE)
+    tmp_catch_bio <- tmp_catch_bio[order(abs(tmp_catch_bio$fleet),abs(tmp_catch_bio$year),abs(tmp_catch_bio$seas)),]
     new_OM_catch_list$catch_bio$catch <- new_OM_catch_list$catch_bio$catch * tmp_catch_bio$bias
   }
   # new_OM_catch_list$catch_bio <- NULL
-
+  sample_struct$EM2OMdiscard_bias <- sample_struct$EM2OMdiscard_bias[order(abs(sample_struct$EM2OMdiscard_bias$Flt),abs(sample_struct$EM2OMdiscard_bias$Yr),abs(sample_struct$EM2OMdiscard_bias$Seas)),]
+  sample_struct$EM2OMdiscard_bias <- sample_struct$EM2OMdiscard_bias[!duplicated(sample_struct$EM2OMdiscard_bias),]
   if(!is.null(new_OM_catch_list$discards)){
-    tmp_discards <- base::merge(new_OM_catch_list$discards, sample_struct$EM2OMdiscard_bias, all.x=TRUE)
+    tmp_discards <- base::merge(abs(new_OM_catch_list$discards), abs(sample_struct$EM2OMdiscard_bias), all.x=TRUE) #need to sort to figure this out
+    tmp_discards <- tmp_discards[order(abs(tmp_discards$Flt),abs(tmp_discards$Yr),abs(tmp_discards$Seas)),]
     new_OM_catch_list$discards$Discard <- new_OM_catch_list$discards$Discard * tmp_discards$bias 
   }
 

diff --git a/R/initOM.R b/R/initOM.R
@@ -471,7 +471,7 @@ run_OM <- function(OM_dir,
   if (is.null(seed)) {
     seed <- stats::runif(1, 1, 9999999)
   }
-
+  
   start <- r4ss::SS_readstarter(file.path(OM_dir, "starter.ss"),
     verbose = FALSE
   )

diff --git a/R/manipulate_EM.R b/R/manipulate_EM.R
@@ -66,13 +66,37 @@ get_EM_dat <- function(OM_dat, EM_dat, do_checks = TRUE) {
     check_OM_dat(OM_dat, EM_dat)
   }
   dat <- list(OM_dat = OM_dat, EM_dat = EM_dat)
+
+  if (!is.null(OM_dat[["catch"]])) {
+  Catches <- lapply(dat, function(x) {
+    tmp <- combine_cols(x, "catch", c("year", "seas", "fleet"))
+  })
+  # match 1 way: match each EM obs with an OM obs. extract only these OM obs.
+  matches <- which(Catches[[1]][, "combo"] %in% Catches[[2]][, "combo"])
+  # extract only the rows of interest and get rid of the "combo" column
+  new_dat[["catch"]] <- Catches[[1]][matches, -ncol(Catches[[1]])]
+  }
+
+  if (!is.null(OM_dat[["discard_data"]])) {
+    Discards <- lapply(dat, function(x) {
+      tmp <- combine_cols(x, "discard_data", c("Yr", "Seas", "Flt"))
+    })
+    # match 1 way: match each EM obs with an OM obs. extract only these OM obs.
+    matches <- which(Discards[[1]][, "combo"] %in% Discards[[2]][, "combo"])
+    # extract only the rows of interest and get rid of the "combo" column
+    new_dat[["discard_data"]] <- Discards[[1]][matches, -ncol(Discards[[1]])]
+  }
+
+  if (!is.null(OM_dat[["CPUE"]])) {
   CPUEs <- lapply(dat, function(x) {
     tmp <- combine_cols(x, "CPUE", c("year", "seas", "index"))
   })
   # match 1 way: match each EM obs with an OM obs. extract only these OM obs.
   matches <- which(CPUEs[[1]][, "combo"] %in% CPUEs[[2]][, "combo"])
   # extract only the rows of interest and get rid of the "combo" column
   new_dat[["CPUE"]] <- CPUEs[[1]][matches, -ncol(CPUEs[[1]])]
+  }
+
   # add in lcomps
   if (OM_dat[["use_lencomp"]] == 1) {
     lcomps <- lapply(dat, function(x) {
@@ -85,7 +109,7 @@ get_EM_dat <- function(OM_dat, EM_dat, do_checks = TRUE) {
     new_dat[["lencomp"]] <- lcomps[[1]][matches_l, -ncol(lcomps[[1]])]
   }
   # add in age comps
-  if (!is.null(dat[["agecomp"]])) {
+  if (!is.null(OM_dat[["agecomp"]])) {
     acomps <- lapply(dat, function(x) {
       tmp <- combine_cols(
         x, "agecomp",
@@ -97,7 +121,7 @@ get_EM_dat <- function(OM_dat, EM_dat, do_checks = TRUE) {
   }
   # TODO: check this for other types of data, esp. mean size at age, k
   # and mean size.
-  if (!is.null(dat[["meanbodywt"]])) {
+  if (!is.null(OM_dat[["meanbodywt"]])) {
     meansize <- lapply(dat, function(x) {
       tmp <- combine_cols(
         x, "meanbodywt",
@@ -107,11 +131,12 @@ get_EM_dat <- function(OM_dat, EM_dat, do_checks = TRUE) {
     matches_meansize <- which(meansize[[1]][, "combo"] %in% meansize[[2]][, "combo"])
     new_dat[["meanbodywt"]] <- meansize[[1]][matches_meansize, -ncol(meansize[[1]])]
   }
-  if (!is.null(dat[["MeanSize_at_Age_obs"]])) {
+
+  if (!is.null(OM_dat[["MeanSize_at_Age_obs"]])) {
     size_at_age <- lapply(dat, function(x) {
       tmp <- combine_cols(
         x, "MeanSize_at_Age_obs",
-        c("Yr", "Seas", "FltSvy", "Gender", "Part", "Ageerr")
+        c("Yr", "Seas", "FltSvy", "Gender", "Part", "AgeErr")
       )
     })
     matches_size_at_age <- which(size_at_age[[1]][, "combo"] %in% size_at_age[[2]][, "combo"])

diff --git a/R/parse_MS.R b/R/parse_MS.R
@@ -68,14 +68,12 @@ parse_MS <- function(MS, EM_out_dir = NULL, EM_init_dir = NULL,
   )
 
   new_catch_list <- do.call(MS, args = pars_list)
-  
+
   # to do: need better checks on function name? Maybe be more explicit on
   # which environment the function is in?
   # check output before returning
   check_catch_df(new_catch_list[["catch"]])
-  if (isTRUE(!is.null(new_catch_list[["discards"]]))) {
-    warning("Discards are not added into the OM for SSMSE currently.")
-  }
+
   new_catch_list
 }
 
diff --git a/R/runSSMSE.R b/R/runSSMSE.R
@@ -96,7 +96,7 @@ run_SSMSE <- function(scen_name_vec,
   if (!is.null(custom_MS_source)) {
     source(custom_MS_source)
   }
-  
+
   # input checks
   if (!all(MS_vec %in% c("EM", "no_catch", "Interim"))) {
     invalid_MS <- MS_vec[unlist(lapply(MS_vec, function(x) !exists(x)))]
@@ -170,7 +170,6 @@ run_SSMSE <- function(scen_name_vec,
     scen_list[[i]][["scen_seed"]] <- scen_seed
   }
 
-
   if (run_parallel) {
     if (!is.null(n_cores)) {
       n_cores <- min(max(n_cores, 1), (parallel::detectCores() - 1))
@@ -612,6 +611,7 @@ run_SSMSE_iter <- function(out_dir = NULL,
       verbose = verbose, init_run = TRUE, seed = (iter_seed[["iter"]][1] + 12345)
     )
   }
+
   if (use_SS_boot == FALSE) {
     stop(
       "Currently, only sampling can be done using the bootstrapping ",
@@ -649,6 +649,7 @@ run_SSMSE_iter <- function(out_dir = NULL,
     seed = (iter_seed[["iter"]][1] + 123456),
     sample_struct = sample_struct # add for bias
   )
+
   message(
     "Finished getting catch (years ",
     min(new_catch_list[["catch"]][, "year"]), " to ", max(new_catch_list[["catch"]][, "year"]),
@@ -732,6 +733,7 @@ run_SSMSE_iter <- function(out_dir = NULL,
       "Finished running and sampling OM through year ", max(new_catch_list[["catch"]][, "year"]),
       "."
     )
+
     if (run_EM_last_yr == FALSE && isTRUE(yr == test_run_EM_yr)) {
       skip_EM_run <- TRUE
     } else {