diff --git a/DESCRIPTION b/DESCRIPTION index 52ca29f..ada9b7c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -25,12 +25,13 @@ Imports: tidyr, rlang RoxygenNote: 7.3.1 -Suggests: +Suggests: usethis, testthat, knitr, rmarkdown, here, + ggplot2, sf VignetteBuilder: knitr Roxygen: list(markdown = TRUE) diff --git a/R/data.R b/R/data.R index 14974c7..ed4f80d 100644 --- a/R/data.R +++ b/R/data.R @@ -68,9 +68,21 @@ #' Active survey blocks for DFO Pacific groundfish surveys. #' Obtained via `gfdata::get_active_survey_blocks()` with some cleaning #' as documented in `data-raw/survey_blocks.R`. -"survey_blocks" +#' +#' @format Simple feature (`sf`) collection with 66293 features and 6 fields: +#' \describe{ +#' \item{survey_abbrev}{Survey abbreviation.} +#' \item{survey_series_id}{Unique identifier for the survey series.} +#' \item{station_key}{Unique identifier for each grid cell} +#' \item{depth_m}{Depth in metres.} +#' \item{active_block}{Is block actively fished as of date downloaded: +#' e.g., `attr(gfdata::survey_blocks, "date-downloaded")`)} +#' \item{geometry}{Represents grid cell.} +#' \item{area}{Overwater area in km^2.} +#'} #' #' @examplesIf requireNamespace("sf", quietly = TRUE) +#' requireNamespace("ggplot2", quietly = TRUE) #' library(sf) #' library(ggplot2) #' gfdata::survey_blocks |> @@ -79,3 +91,6 @@ #' theme_minimal() + #' scale_colour_brewer(palette = "Dark2") #' attr(gfdata::survey_blocks, "date-generated") +#' attr(gfdata::survey_blocks, "date-downloaded") +#' +"survey_blocks" \ No newline at end of file diff --git a/data-raw/survey_blocks.R b/data-raw/survey_blocks.R index 5f0993d..251ae56 100644 --- a/data-raw/survey_blocks.R +++ b/data-raw/survey_blocks.R @@ -6,7 +6,9 @@ library(dplyr) # d <- gfdata::get_active_survey_blocks(active_only = FALSE) # saveRDS(d, "~/Downloads/survey-blocks.rds") -d <- readRDS("~/Downloads/survey-blocks2.rds") +d <- readRDS("~/Downloads/survey-blocks.rds") + +download_date <- lubridate::date(file.info("~/Downloads/survey-blocks.rds")$ctime) d$id <- seq_len(nrow(d)) polys <- split(d, d$id) |> lapply(\(x) { @@ -75,8 +77,144 @@ df2 |> filter(active_block) |> # usethis::use_data_raw("survey_blocks") # mapview::mapview(df) -survey_blocks <- filter(df2, active_block) |> mutate(depth_m = -depth_m) - +# Get overwater area for HBLL grids +# --------------------------------- +# The documentation of this coastline shapefile could be better, +# But at least this is a polygon rather than the freshwater atlas and the CHS coastline +# https://open.canada.ca/data/dataset/30aeb5c1-4285-46c8-b60b-15b1a6f4258b +# https://catalogue.data.gov.bc.ca/dataset/province-of-british-columbia-boundary-terrestrial +# "Province of British Columbia - Boundary Terrestrial PUBLISHED +# Published By GeoBC Branch +# Description +# This dataset consists of polyline, single part, and multi part polygons representing the Province of British Columbia. +# The terrestrial portion of the boundary was derived from the Administrative Boundaries Management System (ABMS) representation of the province: Province of British Columbia - Legally Defined Administrative Areas of BC. +# The coastal portion of the boundary differs from the ABMS boundary and was derived from the Freshwater Atlas (FWA): Freshwater Atlas Coastlines. +# This boundary may be updated periodically, as more accurate data becomes available. +# Due to the structure of the data, it does not meet the technical requirements to be published in the BC Geographic Warehouse (BCGW). It is available for download as an OGC GeoPackage and as an ESRI File Geodatabase in the Data and Resources box to the the right." +# --- +st_layers("~/Downloads/BC_Boundary_Terrestrial.gpkg") # get layer name of single polygon +bc_poly <- st_read("~/Downloads/BC_Boundary_Terrestrial.gpkg", layer = "BC_Boundary_Terrestrial_Multipart") + +# HBLL INS ---- +llin <- df2 |> filter(survey_abbrev %in% c("HBLL INS N", "HBLL INS S")) +llin_bbox <- st_bbox(llin) +ll_crs <- st_crs(llin) + +small_bc_ins <- bc_poly |> + st_transform(ll_crs) |> + st_crop(llin_bbox) + +overlap_ins <- st_intersection(small_bc_ins, llin) +llin_ow <- st_difference(llin, small_bc_ins) %>% + mutate(area = st_area(., units = "km^2")) + +# Looks pretty good +ggplot() + + geom_sf(data = llin, fill = "blue") + + geom_sf(data = small_bc_ins) + + geom_sf(data = overlap_ins, fill = "red") + + geom_sf(data = llin_ow, fill = "black") + +# Compare with old overwater df: +overwater_df <- readRDS('~/R_DFO/gfsynopsis/report/data-cache-2024-05/grids/hbll_ins_grid.rds') +odf2 <- overwater_df |> + mutate(X = X * 1000, Y = Y * 1000) |> + st_as_sf(coords = c('X', 'Y'), crs = st_crs(llin)) + +compare_df <- st_join(llin_ow, odf2, suffix = c("_new", "_old")) |> + mutate(area_old = units::set_units(area_old, km^2)) |> + mutate(area_diff = area_new - area_old) |> + units::drop_units() + +# There are a few big differences, but without out knowing exactly how the first +# file was calculated it's unclear if we can know why. +hist(compare_df$area_diff) + +# See if anything pops out from a visual inspection +p1 <- +compare_df |> + arrange(-abs(area_diff)) |> + sf::st_cast("MULTIPOLYGON") |> +ggplot() + + geom_sf(data = small_bc_ins) + + geom_sf(aes(fill = area_diff)) + + geom_sf(data = odf2) + + geom_sf(data = pacea::bc_coast |> # lower resolution than BC Boundary shapefile (pacea uses the rnaturalearth::ne_coastline(scale = 10)) + st_transform(ll_crs) |> + st_crop(llin_bbox), fill = NA, colour = "blue") + + viridis::scale_fill_viridis() +p1 + +#plotly::ggplotly(p1) # easier to zoom in as needed + +# HBLL OUT ---- +llout <- df2 |> filter(survey_abbrev %in% c("HBLL OUT N", "HBLL OUT S")) +llout_bbox <- st_bbox(llout) + +small_bc_out <- bc_poly |> + st_transform(ll_crs) |> + st_crop(llout_bbox) + +overlap_out <- st_intersection(small_bc_out, llout) +llout_ow <- st_difference(llout, small_bc_out) %>% + mutate(area = st_area(., units = "km^2")) + +# Most cells are completely in the water, but a few seem to overlap with land +hist(llout_ow$area) + +p2 <- +ggplot(llout) + + geom_sf(fill = "blue") + + geom_sf(data = small_bc_out) + + geom_sf(data = overlap_out, fill = "red") + + geom_sf(data = llout_ow, fill = "black") +# plotly::ggplotly(p2) +# ---- + +# Get area for all survey blocks +# --------------------------------- +llin_ow_simple <- select(llin_ow, names(df2)) +llout_ow_simple <- select(llout_ow, names(df2)) + +# Calculate area on highest resolution polygons +area_df <- df2 |> + filter(survey_abbrev %in% c("SYN WCHG", "SYN HS", "SYN QCS", "SYN WCVI")) |> + bind_rows(llin_ow_simple, llout_ow_simple) %>% + mutate(area = units::set_units(st_area(.), "km^2")) |> # being explicit with units to reduce mistakes + units::drop_units() |> + select(survey_abbrev, block_id, area) |> + st_drop_geometry() + +# Join back with the original grid polygons +survey_blocks <- df2 |> + left_join(area_df) |> + mutate(depth_m = -depth_m) + +# This is an option to save a data object that keeps the coastline-intersected polygon +# survey_blocks <- df2 |> +# filter(survey_abbrev %in% c("SYN WCHG", "SYN HS", "SYN QCS", "SYN WCVI")) |> +# bind_rows(llin_ow_simple, llout_ow_simple) %>% +# mutate(depth_m = -depth_m, +# area = units::set_units(st_area(.), "km^2")) |> # being explicit with units to reduce mistakes +# units::drop_units() +# After calculating area, could make the data object smaller for storage in the package +# Size before getting overwater area: 528K data/survey_blocks.rda +# Not sure how small to make this, pretty wonky at dTolerance = 500. +# # survey_blocks <- st_simplify(survey_blocks, preserveTopology = TRUE, dTolerance = 200) # option to make it smaller +# Look at difference in resolution compared to coastline +# p <- +# ggplot() + +# geom_sf(data = small_bc_ins) + +# geom_sf(data = survey_blocks |> +# filter(stringr::str_detect(survey_abbrev, "HBLL INS")) |> +# sf::st_cast("MULTIPOLYGON"), # needed for using plotly +# fill = "blue") #+ +# plotly::ggplotly(p) + +# This is still ending up as a bigger object than I was expecting just by adding area? +utils:::format.object_size(file.size("data/survey_blocks.rda"), units = "Mb", digits = 2) + +attr(survey_blocks, "date-downloaded") <- download_date attr(survey_blocks, "date-generated") <- Sys.Date() # stringi::stri_escape_unicode(st_crs(survey_blocks)$wkt) diff --git a/data/survey_blocks.rda b/data/survey_blocks.rda index 869675d..509317a 100644 Binary files a/data/survey_blocks.rda and b/data/survey_blocks.rda differ diff --git a/man/survey_blocks.Rd b/man/survey_blocks.Rd index bdb5ecb..004cb05 100644 --- a/man/survey_blocks.Rd +++ b/man/survey_blocks.Rd @@ -5,7 +5,17 @@ \alias{survey_blocks} \title{Active survey blocks} \format{ -An object of class \code{sf} (inherits from \code{data.frame}) with 22026 rows and 6 columns. +Simple feature (\code{sf}) collection with 66293 features and 6 fields: +\describe{ +\item{survey_abbrev}{Survey abbreviation.} +\item{survey_series_id}{Unique identifier for the survey series.} +\item{station_key}{Unique identifier for each grid cell} +\item{depth_m}{Depth in metres.} +\item{active_block}{Is block actively fished as of date downloaded: +e.g., \code{attr(gfdata::survey_blocks, "date-downloaded")})} +\item{geometry}{Represents grid cell.} +\item{area}{Overwater area in km^2.} +} } \usage{ survey_blocks @@ -15,4 +25,17 @@ Active survey blocks for DFO Pacific groundfish surveys. Obtained via \code{gfdata::get_active_survey_blocks()} with some cleaning as documented in \code{data-raw/survey_blocks.R}. } +\examples{ +\dontshow{if (requireNamespace("sf", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +library(sf) +library(ggplot2) +gfdata::survey_blocks |> + ggplot(aes(colour = survey_abbrev)) + + geom_sf() + + theme_minimal() + + scale_colour_brewer(palette = "Dark2") +attr(gfdata::survey_blocks, "date-generated") +attr(gfdata::survey_blocks, "date-downloaded") +\dontshow{\}) # examplesIf} +} \keyword{datasets}