Skip to content

Commit

Permalink
add some missing values and 1995 data
Browse files Browse the repository at this point in the history
  • Loading branch information
Jillian Dunic committed Jun 12, 2024
1 parent f6fd429 commit f0ec3ae
Show file tree
Hide file tree
Showing 7 changed files with 149 additions and 36 deletions.
21 changes: 11 additions & 10 deletions R/load-iphc-dat.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,29 @@
#' \itemize{
#' \item year. Year the set was hauled.
#' \item station. Permanent station ID named by IPHC.
#' \item station_key. Unique key for the set; can be used to join set table to non-Pacific halibut catch data from FISS Survey download page.
#' \item station_key. Unique key for the set; can be used to join set table to non-Pacific halibut catch data from FISS Survey download page. Occasionally there are multiple sets at the same \code{station} but this column is always unique. It's also unique across years.
#' \item species_common_name. Species name used in GFBio.
#' \item species_science_name. Scientific name used in GFBio.
#' \item number_observed. Number of individuals of a species observed. Some species such as skates and shortspine thornyheads were not enumerated to the species level in early years and so these values are `NA`.
#' \item longitude. Calculated longitude in decimal degrees of the midpoint of the set.
#' \item latitude. Calculated latitude in decimal degrees of the midpoint of the set.
#' \item number_observed. Number of hooks with the species of interest. Some species such as skates and shortspine thornyheads were not enumerated to the species level in early years and so these values are `NA`.
#' \item longitude. Longitude in decimal degrees of the midpoint of the set.
#' \item latitude. Latitude in decimal degrees of the midpoint of the set.
#' \item usable. IPHC indication of whether station was deemed effective ("Y") or ineffective ("N") for assessment.
#' \item hooks_retrieved. Number of hooks retrieved.
#' \item hooks_observed. Number of hooks observed for non-Pacific halibut species catch.
#' \item pbs_standard_grid. Stations defined as 'standard' in `gfiphc`.
#' \item inside_wcvi. Is station in inside West Coast Vancouver Island waters. To date, these additional stations in inside waters were only sampled in 2018.
#' \item inside_wcvi. Logical: inside Vancouver Island waters (2018 only) vs. anywhere else; you may want to exclude these from spatiotemporal modelling.
#' \item sample_type. Type of observations.
#' * "20 hooks" - Observation of the first 20 (non-halibut) hooks of each skate.
#' * "all hooks" - All hooks observed.
#'
#' \item depth_m. Average of beginning and end depth of set in metres.
#' \item temp_c. Temperature at profiler max pressure (degrees Celsius).
#' \item soak_time_min. Time interval gear was in water (minutes).
#' \item avg_no_hook_per_skate. Average number of hooks per skate at setting.
#' \item no_skates_hauled. Number of skates hauled (no metadata on IPHC website).
#' \item no_skates_set. Number of skates set, not adjusted for baits or average number of hooks per skate.
#' \item effective_skates. "An effective skate is 100 baited hooks. The average number of hooks/skate and the number of missing baits at setting factor into the effective skate calculation". Description from IPHC table: "data field names defined"
#' \item baited_hooks. The number of baited hooks remaining. These are unavailable (NA) for some Pacific halibut records where the sample_type = '20 hooks'.
#' \item baits_returned. The number of baited hooks remaining. These are unavailable (NA) for some Pacific halibut records where the sample_type = '20 hooks'.
#'}
#'
#' The tibble also contains the following attributes:
Expand All @@ -45,17 +46,17 @@
#' @examples
#' \dontrun{
#' # Retrieve and process the IPHC data
#' iphc_data <- get_iphc_dat()
#' iphc_data <- load_iphc_dat()
#' }
#'
#' @export
load_iphc_dat <- function() {
iphc_dat <- dplyr::inner_join(gfdata::iphc_catch, gfdata::iphc_sets,
by = c("year", "station", "station_key")) |>
dplyr::mutate(baited_hooks = dplyr::case_when(
.data$species_common_name == "pacific halibut" & .data$sample_type == "all hooks" ~ .data$baited_hooks,
dplyr::mutate(baits_returned = dplyr::case_when(
.data$species_common_name == "pacific halibut" & .data$sample_type == "all hooks" ~ .data$baits_returned,
.data$species_common_name == "pacific halibut" & .data$sample_type == "20 hooks" ~ NA, # we do not have the data to know returned baited hooks for halibut catch
.data$species_common_name != "pacific halibut" ~ .data$baited_hooks
.data$species_common_name != "pacific halibut" ~ .data$baits_returned
)) |>
dplyr::mutate(hooks_observed = dplyr::case_when(
.data$species_common_name == "pacific halibut" & .data$sample_type == "all hooks" ~ .data$hooks_observed,
Expand Down
31 changes: 26 additions & 5 deletions data-raw/iphc-compare.R
Original file line number Diff line number Diff line change
Expand Up @@ -197,15 +197,14 @@ test <- bind_rows(
iphc |>
mutate(source = 'raw', station = as.character((station))) |>
filter(!(year == 2019 & station %in% c("2099", "2107"))),
distinct(iphc, year, station, baited_hooks) |> # compare baited hook counts
drop_na(baited_hooks) |>
pivot_longer(cols = baited_hooks, names_to = "species_common_name", values_to = "number_observed") |>
distinct(iphc, year, station, baits_returned) |> # compare baited hook counts
drop_na(baits_returned) |>
pivot_longer(cols = baits_returned, names_to = "species_common_name", values_to = "number_observed") |>
mutate(source = 'raw', station = as.character((station))) |>
mutate(species_common_name = '_hook with bait')
)
not_zero <- test |>
filter(source == "gfiphc") |>
filter(year > 1995) |>
filter(standard == "Y") |>
group_by(species_common_name, year) |>
summarise(count = sum(number_observed, na.rm = TRUE)) |>
Expand All @@ -216,7 +215,7 @@ not_zero <- test |>

test2 <- test |>
filter(species_common_name %in% not_zero) |>
filter(station %in% test_stations) |>
#filter(station %in% test_stations) |>
group_by(source, species_common_name, year) |>
summarise(count = sum(number_observed, na.rm = TRUE))
ggplot(data = test2, aes(x = year, y = count, colour = source)) +
Expand All @@ -229,3 +228,25 @@ t2 <- filter(old2, species_common_name == "darkblotched rockfish", N_it > 0) |>
select(species_science_name, year, station, number_observed)

left_join(t2, filter(test, source == "raw")) |> glimpse()


t1 <- filter(test, species_common_name == "north pacific spiny dogfish") |>
#filter(effective_skates == 0) |>
filter(source == "gfiphc", year == 1996)

t2 <- filter(test, species_common_name == "north pacific spiny dogfish") |>
filter(effective_skates == 0)


hooks_per_effective_skate <- sum(xx2$hooks_observed) / sum(xx1$E_it20)

t2 |> select(year, station, species_common_name:no_skates_set) |> glimpse()

temp <- left_join(
t2 |> select(year, station, species_common_name:no_skates_set) |>
mutate(check_eff_skate = hooks_observed / avg_no_hook_per_skate),
test |> filter(source == "gfiphc")
)

glimpse(temp)

68 changes: 48 additions & 20 deletions data-raw/iphc-simple.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,14 @@ d$scientific_name[d$scientific_name == "delolepis gigantia"] <- "cryptacanthodes
d$scientific_name[d$scientific_name == "eopsetta exilis"] <- "lyopsetta exilis" # slender sole

# baited hook count is used for Watson et al. 2023 censoring approach
baited_hooks <- set |>
baits_returned <- set |>
distinct(year, stlkey, station) |>
left_join(
d |>
filter(species_name == "hook with bait") |>
select(stlkey, number_observed)
) |>
mutate(baited_hooks = if_else(is.na(number_observed), 0, number_observed)) |>
mutate(baits_returned = if_else(is.na(number_observed), 0, number_observed)) |>
left_join(distinct(d, year, sampletype)) |>
rename(baited_hook_sampletype = "sampletype") |> # need this info to make decisions for halibut catch
select(-number_observed)
Expand All @@ -92,7 +92,7 @@ hal_count_dat <- set |>
# mutate(hooksfished = avg_no_hook_per_skate * no_skates_hauled) # except for sets where no non-halibut were captured...

d <- bind_rows(d, hal_count_dat) |>
left_join(baited_hooks)
left_join(baits_returned)

# Additional species clean up --------------------------------------------------
# --- Rougheye/blackspotted complex ---
Expand All @@ -110,7 +110,7 @@ re_bs <- filter(d, scientific_name %in% c("sebastes aleutianus", "sebastes melan
group_by(year, stlkey, station, setno, scientific_name,
sampletype, hooksfished, hooksretrieved, hooksobserved,
no_skates_set, no_skates_hauled, avg_no_hook_per_skate,
effective_skates_hauled, baited_hooks, baited_hook_sampletype) |>
effective_skates_hauled, baits_returned, baited_hook_sampletype) |>
summarise(number_observed = sum(number_observed)
) |> ungroup()
# Replace rougheye/blackspotted rows with the summed counts of rougheye and blackspotted
Expand Down Expand Up @@ -164,7 +164,7 @@ count_dat <- transmute(count_dat,
# set_number = as.integer(setno), # excluding for now
station = as.integer(station),
station_key = as.integer(stlkey),
baited_hooks = as.integer(baited_hooks)
baits_returned = as.integer(baits_returned)
)

# need to collapse bering skate and sandpaper skate into one species:
Expand All @@ -176,7 +176,7 @@ count_dat <- count_dat |>
hooks_retrieved = as.integer(hooks_retrieved),
hooks_observed = sum(hooks_observed),
number_observed = sum(number_observed),
baited_hooks = sum(baited_hooks),
baits_returned = sum(baits_returned),
species_common_name = species_common_name[1], # pick one for now; the DFO ones get joined anyways
.groups = "drop"
)
Expand Down Expand Up @@ -227,7 +227,10 @@ filter(dat_test, year == 2012) |> select(avg_no_hook_per_skate, no_skates_hauled
# so, let's leave them as is in the data according to hooks observed...

# need this to be a right join because very occasionally there are no non-halibut catch!
dat <- right_join(count_dat, set_dat, by = join_by(year, station, station_key))
dat <- right_join(count_dat, set_dat, by = join_by(year, station, station_key)) |>
# Let's add the effective skates in because they are still used for the offset calculation
# For these missing 2012 years we'll use the approximation: hooks_observed / avg_no_hook_per_skate
mutate(effective_skates = ifelse((year == 2012 & effective_skates == 0), hooks_observed / avg_no_hook_per_skate, effective_skates))
#dat <- left_join(count_dat, set_dat, by = join_by(year, station, station_key))

# need to fill in the zeros -------------------------------------------------
Expand Down Expand Up @@ -260,7 +263,7 @@ full <- select(
dat, year, station, station_key, longitude, latitude,
hooks_observed, hooks_fished, hooks_retrieved,
avg_no_hook_per_skate, no_skates_hauled, no_skates_set, effective_skates,
sample_type, usable, soak_time_min, temp_c, depth_m, baited_hooks
sample_type, usable, soak_time_min, temp_c, depth_m, baits_returned
) |> distinct()
full <- purrr::map_dfr(
sort(unique(count_dat$species_science_name)),
Expand Down Expand Up @@ -408,13 +411,29 @@ filter(dat_pbs, year == 2018, inside_wcvi) |> plot_map(pbs_standard_grid)
dat_pbs |> plot_map(paste(pbs_standard_grid, inside_wcvi))

# bring in pre 1998 data from gfiphc ----------------------------------------
# Need to consider if/how we can include 1995 data
#data1995 <- left_join(gfiphc::setData1995rs, gfiphc::countData1995) # No data for hook counts in 1995

old <- gfiphc::data1996to2002 |> filter(year < 1998) # |> filter(usable == "Y")

# Need to count all observations per station to get hooks observed for 1995
dat1995 <- left_join(gfiphc::setData1995, gfiphc::countData1995) |> # No data for hook counts in 1995
tidyr::drop_na(specCount) # there are some stations with no species observed
ho1995 <- dat1995 |>
group_by(year, station, lat, lon) |>
summarise(hooksObserved = sum(specCount))
dat1995 <- left_join(dat1995, ho1995) |>
rename(E_it = "effSkate", catchCount = "specCount") |>
select(year, station, lon, lat, spNameIPHC, catchCount, hooksObserved, usable, E_it)

dat1996to2002 <- gfiphc::data1996to2002 |>
filter(year < 1998) |># |> filter(usable == "Y")
mutate(station = as.character(station))

old <- bind_rows(dat1995, dat1996to2002)
old_br <- filter(old,spNameIPHC == "Hook with Bait") |>
rename(baits_returned = "catchCount") |>
select(year, station, lon, lat, baits_returned)
old <- left_join(old, old_br) |>
mutate(baits_returned = ifelse(is.na(baits_returned), 0, baits_returned))
# add sample type - see Table 2 in Anderson et al. 2022 (Data synopsis 2021)
old$sample_type <- NA
old$sample_type[old$year == 1995] <- "all hooks"
old$sample_type[old$year == 1996] <- "all hooks"
old$sample_type[old$year == 1997] <- "20 hooks"

Expand All @@ -423,15 +442,22 @@ stopifnot(sum(old$station %in% dat_pbs$station) == 0L)

glimpse(dat_pbs)

old <- select(old, year, station, longitude = lon, latitude = lat, depth_m = depthAvge, species_common_name = spNameIPHC, number_observed = catchCount, hooks_observed = hooksObserved, usable, no_skates_hauled = skates, effective_skates = E_it)
old <- select(old, year, station, longitude = lon, latitude = lat,
depth_m = depthAvge, species_common_name = spNameIPHC,
number_observed = catchCount, hooks_observed = hooksObserved, usable,
no_skates_hauled = skates, effective_skates = E_it,
baits_returned)
old <- mutate(old, species_common_name = tolower(species_common_name))

old_sp <- sort(unique(old$species_common_name))
old_sp[!old_sp %in% old$species_common_name]

# most can be dropped, but fix spiny dogfish and sixgill shark
# most can be dropped, but fix others:
old$species_common_name[old$species_common_name == "spiny dogfish"] <- "north pacific spiny dogfish"
old$species_common_name[old$species_common_name == "sixgill shark"] <- "bluntnose sixgill shark"
old$species_common_name[old$species_common_name == "rock sole"] <- "southern rock sole"
old$species_common_name[old$species_common_name == "rougheye rockfish"] <- "rougheye/blackspotted rockfish complex"
old$species_common_name[old$species_common_name == "sablefish (blackcod)"] <- "sablefish"

# need to pad in zeros to old data:
full_old <- select(
Expand All @@ -451,12 +477,16 @@ old <- inner_join(old, select(spp, species_common_name, species_science_name),

# make a fake `station_key` to match modern data:
old <- mutate(old,
station_key = as.integer(paste0(year, station)),
station_key = as.character(paste0(year, station)),
inside_wcvi = FALSE,
pbs_standard_grid = TRUE # confirmed to match gfiphc decisions
)

dat_all <- bind_rows(old, dat_pbs) |>
dat_all <- bind_rows(
old,
dat_pbs |>
mutate(station_key = as.character(station_key),
station = as.character(station))) |>
arrange(year, species_common_name, station)

# fix some known problems!
Expand Down Expand Up @@ -513,8 +543,6 @@ dat_all <-
))

# save it! ------------------------------------------------------------------


# # order nicely:
# iphc <- select(
# dat_all,
Expand Down Expand Up @@ -566,7 +594,7 @@ iphc_sets <- select(
no_skates_hauled,
no_skates_set,
effective_skates,
baited_hooks
baits_returned
) |>
distinct()

Expand Down
Binary file modified data/iphc_catch.rda
Binary file not shown.
Binary file modified data/iphc_sets.rda
Binary file not shown.
2 changes: 1 addition & 1 deletion man/iphc.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

63 changes: 63 additions & 0 deletions man/load_iphc_dat.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit f0ec3ae

Please sign in to comment.