From d974d3a9b0f6e178bf0e1053814aba6e99202900 Mon Sep 17 00:00:00 2001 From: ecophilina Date: Tue, 15 Oct 2024 17:50:02 -0700 Subject: [PATCH] clean up documentation and create pkgdown site --- .Rbuildignore | 3 ++ .github/workflows/pkgdown.yaml | 50 ++++++++++++++++++++++ .gitignore | 3 +- DESCRIPTION | 2 +- R/correct-ssids.R | 12 +++--- R/parent-level-counts.R | 12 +++--- R/skate-level-counts.R | 11 +++-- _pkgdown.yml | 4 ++ inst/CITATION | 18 ++++---- man/correct_ssids.Rd | 17 -------- man/get_parent_level_counts.Rd | 18 -------- man/get_skate_level_counts.Rd | 16 ------- man/gfdata-package.Rd | 1 + vignettes/01-gfdata-vignette.Rmd | 12 +++--- vignettes/02-gfdata-vignette-get-all.Rmd | 10 +++-- vignettes/gfdata-vignette-get-all.Rmd.orig | 10 ++++- vignettes/gfdata-vignette.Rmd.orig | 44 +++++++++++-------- 17 files changed, 134 insertions(+), 109 deletions(-) create mode 100644 .github/workflows/pkgdown.yaml create mode 100644 _pkgdown.yml delete mode 100644 man/correct_ssids.Rd delete mode 100644 man/get_parent_level_counts.Rd delete mode 100644 man/get_skate_level_counts.Rd diff --git a/.Rbuildignore b/.Rbuildignore index 56d7ba0..28d4818 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -6,3 +6,6 @@ ^data-raw$ ^vignettes/get-all-cache ^vignettes/knitr-cache +^_pkgdown\.yml$ +^docs$ +^pkgdown$ diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml new file mode 100644 index 0000000..4bbce75 --- /dev/null +++ b/.github/workflows/pkgdown.yaml @@ -0,0 +1,50 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + release: + types: [published] + workflow_dispatch: + +name: pkgdown.yaml + +permissions: read-all + +jobs: + pkgdown: + runs-on: ubuntu-latest + # Only restrict concurrency for non-PR jobs + concurrency: + group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::pkgdown, local::. + needs: website + + - name: Build site + run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) + shell: Rscript {0} + + - name: Deploy to GitHub pages 🚀 + if: github.event_name != 'pull_request' + uses: JamesIves/github-pages-deploy-action@v4.5.0 + with: + clean: false + branch: gh-pages + folder: docs diff --git a/.gitignore b/.gitignore index 74135b5..8014643 100644 --- a/.gitignore +++ b/.gitignore @@ -53,4 +53,5 @@ inst/shortraker-kni* figs/* inst/pcod-vb.html *~ -*.dll \ No newline at end of file +*.dll +docs diff --git a/DESCRIPTION b/DESCRIPTION index 1615a1e..6598d3a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -35,5 +35,5 @@ Suggests: sf VignetteBuilder: knitr Roxygen: list(markdown = TRUE) -URL: https://github.com/pbs-assess/gfdata +URL: https://github.com/pbs-assess/gfdata, https://pbs-assess.github.io/gfdata/ BugReports: https://github.com/pbs-assess/gfdata/issues diff --git a/R/correct-ssids.R b/R/correct-ssids.R index f538419..06e85a2 100644 --- a/R/correct-ssids.R +++ b/R/correct-ssids.R @@ -1,9 +1,9 @@ -#' Custom fixes for problem surveys with shared trip ids resulting in assignment to wrong ssid -#' -#' @param dat df containing these columns: fishing_event_ids, survey_series_id, survey_id, -#' major_stat_area_code, minor_stat_area_code -#' @param specimens Defaults to FALSE where checks for duplication of fishing_event_ids -#' +# Custom fixes for problem surveys with shared trip ids resulting in assignment to wrong ssid +# +# @param dat df containing these columns: fishing_event_ids, survey_series_id, survey_id, +# major_stat_area_code, minor_stat_area_code +# @param specimens Defaults to FALSE where checks for duplication of fishing_event_ids +# correct_ssids <- function(dat, specimens = FALSE) { try(dat[dat$survey_series_id %in% c(6, 7), ]$survey_id <- NA, silent = TRUE) try(dat[((dat$survey_series_id == 6 & dat$major_stat_area_code %in% c("03", "04"))), ]$survey_series_id <- 7, silent = TRUE) diff --git a/R/parent-level-counts.R b/R/parent-level-counts.R index 1effe54..41a8005 100644 --- a/R/parent-level-counts.R +++ b/R/parent-level-counts.R @@ -1,9 +1,9 @@ -#' summarize sub/minor level counts from fishing event data at the parent event level -#' includes correction for a typo in dataframe -#' retrieves missing fishing_event_ids for sablefish surveys using major_level_ids -#' -#' @param fe df retrieved with get-event-data.sql -#' +# summarize sub/minor level counts from fishing event data at the parent event level +# includes correction for a typo in dataframe +# retrieves missing fishing_event_ids for sablefish surveys using major_level_ids +# +# @param fe df retrieved with get-event-data.sql +# get_parent_level_counts <- function(fe) { # just actual parent-level events fe_A_no_parent <- filter(fe, is.na(FE_PARENT_EVENT_ID), is.na(FE_MINOR_LEVEL_ID), is.na(FE_SUB_LEVEL_ID)) diff --git a/R/skate-level-counts.R b/R/skate-level-counts.R index edfc9c4..8d25411 100644 --- a/R/skate-level-counts.R +++ b/R/skate-level-counts.R @@ -1,9 +1,8 @@ -#' summarize fishing event data at the skate level -#' retrieves missing fishing_event_ids for sablefish surveys using major_level_ids -#' -#' @param fe df retrieved with get-event-data.sql -#' - +# summarize fishing event data at the skate level +# retrieves missing fishing_event_ids for sablefish surveys using major_level_ids +# +# @param fe df retrieved with get-event-data.sql +# get_skate_level_counts <- function(fe) { fe <- fe |> distinct() diff --git a/_pkgdown.yml b/_pkgdown.yml new file mode 100644 index 0000000..f869e55 --- /dev/null +++ b/_pkgdown.yml @@ -0,0 +1,4 @@ +url: https://pbs-assess.github.io/gfdata/ +template: + bootstrap: 5 + diff --git a/inst/CITATION b/inst/CITATION index 4b58c22..3cd22da 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -1,22 +1,22 @@ citHeader("To cite gfdata in publications use:") -citEntry( - entry = "manual", +bibentry( + bibtype = "manual", title = "{gfdata}: Data Extraction for {DFO} {PBS} Groundfish Stocks", - year = "2022", - note = "R package version 0.0.0.9000", - author = "Keppel, E. A. and Anderson, S.C. and Edwards, A. M. and Grandin, C.", + year = "2024", + note = "R package version 0.1.3", + author = "Keppel, E. A. and Anderson, S.C. and Edwards, A. M. and Grandin, C. and English, P. A.", url = "https://github.com/pbs-assess/gfdata", textVersion = paste( - "Keppel, E.A., S.C. Anderson, A.M. Edwards, and C. Grandin. 2022. + "Keppel, E.A., S.C. Anderson, A.M. Edwards, C. Grandin, and P.A. English. 2024. gfdata: Data Extraction for DFO PBS Groundfish Stocks. R package version - 0.0.0.9000. https://github.com/pbs-assess/gfdata" + 0.1.3. https://github.com/pbs-assess/gfdata" ) ) -citEntry( - entry = "article", +bibentry( + bibtype = "article", title = "A Reproducible Data Synopsis for over 100 Species of {British Columbia} Groundfish", author = "Anderson, S.C. and Keppel, E. A. and Edwards, A. M.", year = "2019", diff --git a/man/correct_ssids.Rd b/man/correct_ssids.Rd deleted file mode 100644 index eebfd55..0000000 --- a/man/correct_ssids.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/correct-ssids.R -\name{correct_ssids} -\alias{correct_ssids} -\title{Custom fixes for problem surveys with shared trip ids resulting in assignment to wrong ssid} -\usage{ -correct_ssids(dat, specimens = FALSE) -} -\arguments{ -\item{dat}{df containing these columns: fishing_event_ids, survey_series_id, survey_id, -major_stat_area_code, minor_stat_area_code} - -\item{specimens}{Defaults to FALSE where checks for duplication of fishing_event_ids} -} -\description{ -Custom fixes for problem surveys with shared trip ids resulting in assignment to wrong ssid -} diff --git a/man/get_parent_level_counts.Rd b/man/get_parent_level_counts.Rd deleted file mode 100644 index 43e97a9..0000000 --- a/man/get_parent_level_counts.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/parent-level-counts.R -\name{get_parent_level_counts} -\alias{get_parent_level_counts} -\title{summarize sub/minor level counts from fishing event data at the parent event level -includes correction for a typo in dataframe -retrieves missing fishing_event_ids for sablefish surveys using major_level_ids} -\usage{ -get_parent_level_counts(fe) -} -\arguments{ -\item{fe}{df retrieved with get-event-data.sql} -} -\description{ -summarize sub/minor level counts from fishing event data at the parent event level -includes correction for a typo in dataframe -retrieves missing fishing_event_ids for sablefish surveys using major_level_ids -} diff --git a/man/get_skate_level_counts.Rd b/man/get_skate_level_counts.Rd deleted file mode 100644 index 7f0a886..0000000 --- a/man/get_skate_level_counts.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/skate-level-counts.R -\name{get_skate_level_counts} -\alias{get_skate_level_counts} -\title{summarize fishing event data at the skate level -retrieves missing fishing_event_ids for sablefish surveys using major_level_ids} -\usage{ -get_skate_level_counts(fe) -} -\arguments{ -\item{fe}{df retrieved with get-event-data.sql} -} -\description{ -summarize fishing event data at the skate level -retrieves missing fishing_event_ids for sablefish surveys using major_level_ids -} diff --git a/man/gfdata-package.Rd b/man/gfdata-package.Rd index 2d5ca8e..805d8b3 100644 --- a/man/gfdata-package.Rd +++ b/man/gfdata-package.Rd @@ -12,6 +12,7 @@ Facilitates groundfish data extraction at the Canadian Department of Fisheries a Useful links: \itemize{ \item \url{https://github.com/pbs-assess/gfdata} + \item \url{https://pbs-assess.github.io/gfdata/} \item Report bugs at \url{https://github.com/pbs-assess/gfdata/issues} } diff --git a/vignettes/01-gfdata-vignette.Rmd b/vignettes/01-gfdata-vignette.Rmd index d3cd8f9..6cd23f5 100644 --- a/vignettes/01-gfdata-vignette.Rmd +++ b/vignettes/01-gfdata-vignette.Rmd @@ -1,17 +1,19 @@ --- -title: "gfdata Vignette" +title: "Introduction to gfdata" author: "Elise Keppel" date: "2024-10-15" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{gfdata Vignette} + %\VignetteIndexEntry{Introduction to gfdata} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- - - - + # Setup diff --git a/vignettes/02-gfdata-vignette-get-all.Rmd b/vignettes/02-gfdata-vignette-get-all.Rmd index ffb046c..c2797f6 100644 --- a/vignettes/02-gfdata-vignette-get-all.Rmd +++ b/vignettes/02-gfdata-vignette-get-all.Rmd @@ -1,15 +1,19 @@ --- -title: "gfdata `get_all` vignette" +title: "Using 'get_all' functions" author: "Philina English" date: "2024-10-11" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{gfdata `get_all' Vignette} + %\VignetteIndexEntry{Using 'get_all' functions} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- - + ## Why use a `get_all_*()` function? diff --git a/vignettes/gfdata-vignette-get-all.Rmd.orig b/vignettes/gfdata-vignette-get-all.Rmd.orig index ccb0469..790bdf3 100644 --- a/vignettes/gfdata-vignette-get-all.Rmd.orig +++ b/vignettes/gfdata-vignette-get-all.Rmd.orig @@ -1,14 +1,20 @@ --- -title: "gfdata `get_all` vignette" +title: "Using 'get_all' functions" author: "Philina English" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{gfdata `get_all' Vignette} + %\VignetteIndexEntry{Using 'get_all' functions} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- + + ```{r, echo = FALSE} knitr::opts_chunk$set( collapse = FALSE, diff --git a/vignettes/gfdata-vignette.Rmd.orig b/vignettes/gfdata-vignette.Rmd.orig index aeafceb..97034cd 100644 --- a/vignettes/gfdata-vignette.Rmd.orig +++ b/vignettes/gfdata-vignette.Rmd.orig @@ -1,14 +1,20 @@ --- -title: "gfdata Vignette" +title: "Introduction to gfdata" author: "Elise Keppel" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{gfdata Vignette} + %\VignetteIndexEntry{Introduction to gfdata} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- + + ```{r, echo = FALSE} knitr::opts_chunk$set( collapse = FALSE, @@ -34,7 +40,7 @@ If you don't already have the package installed, then run: # install.packages("devtools") devtools::install_github("pbs-assess/gfdata") ``` - + First we will load the package along with dplyr since we will use it within our code later. ```{r, cache=FALSE, warning = FALSE, message = FALSE} @@ -47,11 +53,11 @@ library(dplyr) Commercial and research catch, effort, and biological data for groundfish are archived by the DFO Pacific Groundfish Data Unit (Fisheries and Oceans Canada, Science -Branch, Pacific Region) and housed in a number of relational databases archived -on-site at the Pacific Biological Station, Nanaimo, BC). +Branch, Pacific Region) and housed in a number of relational databases archived +on-site at the Pacific Biological Station, Nanaimo, BC). -The gfdata package was -develeoped to automate data extraction from these databases in a consistent, +The gfdata package was +develeoped to automate data extraction from these databases in a consistent, reproducible manner with a series of `get_*()` functions. The functions extract data using SQL queries, developed with support from the Groundfish Data Unit. The standardized datasets are designed to feed directly into functions in the @@ -78,9 +84,9 @@ sort(fns[grepl("get", fns)]) ``` -The `get_*()` functions extract data by species, and some -functions have arguments for additional filtering, such as survey series, -management area, years, gear type, or environmental data type. In all cases, +The `get_*()` functions extract data by species, and some +functions have arguments for additional filtering, such as survey series, +management area, years, gear type, or environmental data type. In all cases, the `get_*()` functions can extract data for one or multiple species. All functions can be viewed with the available arguments in the help @@ -93,14 +99,14 @@ documentation for each set of functions with: ``` -In addition, a number of the `get` functions retain many relevant database +In addition, a number of the `get` functions retain many relevant database columns that users can filter on with, for example, `dplyr::filter(dat, x = "y")`. # Example As an example, we could extract Pacific cod survey sample data with the -following function call if we were on a DFO laptop, with appropriate database +following function call if we were on a DFO laptop, with appropriate database permissions, and on the PBS network. ```{r, eval=.eval} @@ -108,8 +114,8 @@ dat <- get_survey_samples("pacific cod") head(dat) ``` -Note that there are some duplicate records in the databases due to relating a -record to multiple stratification schemes for alternative analyses. If this +Note that there are some duplicate records in the databases due to relating a +record to multiple stratification schemes for alternative analyses. If this occurs, a warning is given. > "Duplicate specimen IDs are present because of overlapping survey stratifications. If working with the data yourelf, filter them after selecting specific surveys. For example, `dat <- dat[!duplicated(dat$specimen_id), ]`. The tidying and plotting functions within gfplot will do this for you." @@ -133,7 +139,7 @@ get_survey_samples(c(396, 222)) get_survey_samples(c(222, "pacific cod")) ``` -We can further restrict the data extraction to a single trawl survey series +We can further restrict the data extraction to a single trawl survey series by including the ssid (survey series id) argument. For a list of survey series id codes, run the lookup function `get_ssids()`. @@ -160,16 +166,16 @@ glimpse(dat) # Caching the data from the SQL servers -In addition to the individual `get_*()` functions, there is a function -`cache_pbs_data()` that runs all the `get_*()` functions and caches the data +In addition to the individual `get_*()` functions, there is a function +`cache_pbs_data()` that runs all the `get_*()` functions and caches the data in a folder that you specify. This is useful to be able to have the data available for working on later when not on the PBS network, and it saves -running the SQL queries (though the data do get updated occassionally and the +running the SQL queries (though the data do get updated occassionally and the most up-to-date data should usually be extracted for analysis). The helper function `cache_pbs_data()` will extract all of the data for the given species into a series of `.rds` files into whatever folder you specify to -the `path` argument. I'll wrap it in a quick check just to make sure we don't +the `path` argument. I'll wrap it in a quick check just to make sure we don't download the data twice if we build this document again. ```{r, eval = FALSE, eval=.eval}