diff --git a/.Rbuildignore b/.Rbuildignore
index 56d7ba0..28d4818 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -6,3 +6,6 @@
^data-raw$
^vignettes/get-all-cache
^vignettes/knitr-cache
+^_pkgdown\.yml$
+^docs$
+^pkgdown$
diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
new file mode 100644
index 0000000..4bbce75
--- /dev/null
+++ b/.github/workflows/pkgdown.yaml
@@ -0,0 +1,50 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+ push:
+ branches: [main, master]
+ pull_request:
+ branches: [main, master]
+ release:
+ types: [published]
+ workflow_dispatch:
+
+name: pkgdown.yaml
+
+permissions: read-all
+
+jobs:
+ pkgdown:
+ runs-on: ubuntu-latest
+ # Only restrict concurrency for non-PR jobs
+ concurrency:
+ group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
+ env:
+ GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+ permissions:
+ contents: write
+ steps:
+ - uses: actions/checkout@v4
+
+ - uses: r-lib/actions/setup-pandoc@v2
+
+ - uses: r-lib/actions/setup-r@v2
+ with:
+ use-public-rspm: true
+
+ - uses: r-lib/actions/setup-r-dependencies@v2
+ with:
+ extra-packages: any::pkgdown, local::.
+ needs: website
+
+ - name: Build site
+ run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
+ shell: Rscript {0}
+
+ - name: Deploy to GitHub pages 🚀
+ if: github.event_name != 'pull_request'
+ uses: JamesIves/github-pages-deploy-action@v4.5.0
+ with:
+ clean: false
+ branch: gh-pages
+ folder: docs
diff --git a/.gitignore b/.gitignore
index 74135b5..8014643 100644
--- a/.gitignore
+++ b/.gitignore
@@ -53,4 +53,5 @@ inst/shortraker-kni*
figs/*
inst/pcod-vb.html
*~
-*.dll
\ No newline at end of file
+*.dll
+docs
diff --git a/DESCRIPTION b/DESCRIPTION
index 1615a1e..6598d3a 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -35,5 +35,5 @@ Suggests:
sf
VignetteBuilder: knitr
Roxygen: list(markdown = TRUE)
-URL: https://github.com/pbs-assess/gfdata
+URL: https://github.com/pbs-assess/gfdata, https://pbs-assess.github.io/gfdata/
BugReports: https://github.com/pbs-assess/gfdata/issues
diff --git a/R/correct-ssids.R b/R/correct-ssids.R
index f538419..06e85a2 100644
--- a/R/correct-ssids.R
+++ b/R/correct-ssids.R
@@ -1,9 +1,9 @@
-#' Custom fixes for problem surveys with shared trip ids resulting in assignment to wrong ssid
-#'
-#' @param dat df containing these columns: fishing_event_ids, survey_series_id, survey_id,
-#' major_stat_area_code, minor_stat_area_code
-#' @param specimens Defaults to FALSE where checks for duplication of fishing_event_ids
-#'
+# Custom fixes for problem surveys with shared trip ids resulting in assignment to wrong ssid
+#
+# @param dat df containing these columns: fishing_event_ids, survey_series_id, survey_id,
+# major_stat_area_code, minor_stat_area_code
+# @param specimens Defaults to FALSE where checks for duplication of fishing_event_ids
+#
correct_ssids <- function(dat, specimens = FALSE) {
try(dat[dat$survey_series_id %in% c(6, 7), ]$survey_id <- NA, silent = TRUE)
try(dat[((dat$survey_series_id == 6 & dat$major_stat_area_code %in% c("03", "04"))), ]$survey_series_id <- 7, silent = TRUE)
diff --git a/R/parent-level-counts.R b/R/parent-level-counts.R
index 1effe54..41a8005 100644
--- a/R/parent-level-counts.R
+++ b/R/parent-level-counts.R
@@ -1,9 +1,9 @@
-#' summarize sub/minor level counts from fishing event data at the parent event level
-#' includes correction for a typo in dataframe
-#' retrieves missing fishing_event_ids for sablefish surveys using major_level_ids
-#'
-#' @param fe df retrieved with get-event-data.sql
-#'
+# summarize sub/minor level counts from fishing event data at the parent event level
+# includes correction for a typo in dataframe
+# retrieves missing fishing_event_ids for sablefish surveys using major_level_ids
+#
+# @param fe df retrieved with get-event-data.sql
+#
get_parent_level_counts <- function(fe) {
# just actual parent-level events
fe_A_no_parent <- filter(fe, is.na(FE_PARENT_EVENT_ID), is.na(FE_MINOR_LEVEL_ID), is.na(FE_SUB_LEVEL_ID))
diff --git a/R/skate-level-counts.R b/R/skate-level-counts.R
index edfc9c4..8d25411 100644
--- a/R/skate-level-counts.R
+++ b/R/skate-level-counts.R
@@ -1,9 +1,8 @@
-#' summarize fishing event data at the skate level
-#' retrieves missing fishing_event_ids for sablefish surveys using major_level_ids
-#'
-#' @param fe df retrieved with get-event-data.sql
-#'
-
+# summarize fishing event data at the skate level
+# retrieves missing fishing_event_ids for sablefish surveys using major_level_ids
+#
+# @param fe df retrieved with get-event-data.sql
+#
get_skate_level_counts <- function(fe) {
fe <- fe |> distinct()
diff --git a/_pkgdown.yml b/_pkgdown.yml
new file mode 100644
index 0000000..f869e55
--- /dev/null
+++ b/_pkgdown.yml
@@ -0,0 +1,4 @@
+url: https://pbs-assess.github.io/gfdata/
+template:
+ bootstrap: 5
+
diff --git a/inst/CITATION b/inst/CITATION
index 4b58c22..3cd22da 100644
--- a/inst/CITATION
+++ b/inst/CITATION
@@ -1,22 +1,22 @@
citHeader("To cite gfdata in publications use:")
-citEntry(
- entry = "manual",
+bibentry(
+ bibtype = "manual",
title = "{gfdata}: Data Extraction for {DFO} {PBS} Groundfish Stocks",
- year = "2022",
- note = "R package version 0.0.0.9000",
- author = "Keppel, E. A. and Anderson, S.C. and Edwards, A. M. and Grandin, C.",
+ year = "2024",
+ note = "R package version 0.1.3",
+ author = "Keppel, E. A. and Anderson, S.C. and Edwards, A. M. and Grandin, C. and English, P. A.",
url = "https://github.com/pbs-assess/gfdata",
textVersion = paste(
- "Keppel, E.A., S.C. Anderson, A.M. Edwards, and C. Grandin. 2022.
+ "Keppel, E.A., S.C. Anderson, A.M. Edwards, C. Grandin, and P.A. English. 2024.
gfdata: Data Extraction for DFO PBS Groundfish Stocks. R package version
- 0.0.0.9000. https://github.com/pbs-assess/gfdata"
+ 0.1.3. https://github.com/pbs-assess/gfdata"
)
)
-citEntry(
- entry = "article",
+bibentry(
+ bibtype = "article",
title = "A Reproducible Data Synopsis for over 100 Species of {British Columbia} Groundfish",
author = "Anderson, S.C. and Keppel, E. A. and Edwards, A. M.",
year = "2019",
diff --git a/man/correct_ssids.Rd b/man/correct_ssids.Rd
deleted file mode 100644
index eebfd55..0000000
--- a/man/correct_ssids.Rd
+++ /dev/null
@@ -1,17 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/correct-ssids.R
-\name{correct_ssids}
-\alias{correct_ssids}
-\title{Custom fixes for problem surveys with shared trip ids resulting in assignment to wrong ssid}
-\usage{
-correct_ssids(dat, specimens = FALSE)
-}
-\arguments{
-\item{dat}{df containing these columns: fishing_event_ids, survey_series_id, survey_id,
-major_stat_area_code, minor_stat_area_code}
-
-\item{specimens}{Defaults to FALSE where checks for duplication of fishing_event_ids}
-}
-\description{
-Custom fixes for problem surveys with shared trip ids resulting in assignment to wrong ssid
-}
diff --git a/man/get_parent_level_counts.Rd b/man/get_parent_level_counts.Rd
deleted file mode 100644
index 43e97a9..0000000
--- a/man/get_parent_level_counts.Rd
+++ /dev/null
@@ -1,18 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/parent-level-counts.R
-\name{get_parent_level_counts}
-\alias{get_parent_level_counts}
-\title{summarize sub/minor level counts from fishing event data at the parent event level
-includes correction for a typo in dataframe
-retrieves missing fishing_event_ids for sablefish surveys using major_level_ids}
-\usage{
-get_parent_level_counts(fe)
-}
-\arguments{
-\item{fe}{df retrieved with get-event-data.sql}
-}
-\description{
-summarize sub/minor level counts from fishing event data at the parent event level
-includes correction for a typo in dataframe
-retrieves missing fishing_event_ids for sablefish surveys using major_level_ids
-}
diff --git a/man/get_skate_level_counts.Rd b/man/get_skate_level_counts.Rd
deleted file mode 100644
index 7f0a886..0000000
--- a/man/get_skate_level_counts.Rd
+++ /dev/null
@@ -1,16 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/skate-level-counts.R
-\name{get_skate_level_counts}
-\alias{get_skate_level_counts}
-\title{summarize fishing event data at the skate level
-retrieves missing fishing_event_ids for sablefish surveys using major_level_ids}
-\usage{
-get_skate_level_counts(fe)
-}
-\arguments{
-\item{fe}{df retrieved with get-event-data.sql}
-}
-\description{
-summarize fishing event data at the skate level
-retrieves missing fishing_event_ids for sablefish surveys using major_level_ids
-}
diff --git a/man/gfdata-package.Rd b/man/gfdata-package.Rd
index 2d5ca8e..805d8b3 100644
--- a/man/gfdata-package.Rd
+++ b/man/gfdata-package.Rd
@@ -12,6 +12,7 @@ Facilitates groundfish data extraction at the Canadian Department of Fisheries a
Useful links:
\itemize{
\item \url{https://github.com/pbs-assess/gfdata}
+ \item \url{https://pbs-assess.github.io/gfdata/}
\item Report bugs at \url{https://github.com/pbs-assess/gfdata/issues}
}
diff --git a/vignettes/01-gfdata-vignette.Rmd b/vignettes/01-gfdata-vignette.Rmd
index d3cd8f9..6cd23f5 100644
--- a/vignettes/01-gfdata-vignette.Rmd
+++ b/vignettes/01-gfdata-vignette.Rmd
@@ -1,17 +1,19 @@
---
-title: "gfdata Vignette"
+title: "Introduction to gfdata"
author: "Elise Keppel"
date: "2024-10-15"
output: rmarkdown::html_vignette
vignette: >
- %\VignetteIndexEntry{gfdata Vignette}
+ %\VignetteIndexEntry{Introduction to gfdata}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
-
-
-
+
# Setup
diff --git a/vignettes/02-gfdata-vignette-get-all.Rmd b/vignettes/02-gfdata-vignette-get-all.Rmd
index ffb046c..c2797f6 100644
--- a/vignettes/02-gfdata-vignette-get-all.Rmd
+++ b/vignettes/02-gfdata-vignette-get-all.Rmd
@@ -1,15 +1,19 @@
---
-title: "gfdata `get_all` vignette"
+title: "Using 'get_all' functions"
author: "Philina English"
date: "2024-10-11"
output: rmarkdown::html_vignette
vignette: >
- %\VignetteIndexEntry{gfdata `get_all' Vignette}
+ %\VignetteIndexEntry{Using 'get_all' functions}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
-
+
## Why use a `get_all_*()` function?
diff --git a/vignettes/gfdata-vignette-get-all.Rmd.orig b/vignettes/gfdata-vignette-get-all.Rmd.orig
index ccb0469..790bdf3 100644
--- a/vignettes/gfdata-vignette-get-all.Rmd.orig
+++ b/vignettes/gfdata-vignette-get-all.Rmd.orig
@@ -1,14 +1,20 @@
---
-title: "gfdata `get_all` vignette"
+title: "Using 'get_all' functions"
author: "Philina English"
date: "`r Sys.Date()`"
output: rmarkdown::html_vignette
vignette: >
- %\VignetteIndexEntry{gfdata `get_all' Vignette}
+ %\VignetteIndexEntry{Using 'get_all' functions}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
+
+
```{r, echo = FALSE}
knitr::opts_chunk$set(
collapse = FALSE,
diff --git a/vignettes/gfdata-vignette.Rmd.orig b/vignettes/gfdata-vignette.Rmd.orig
index aeafceb..97034cd 100644
--- a/vignettes/gfdata-vignette.Rmd.orig
+++ b/vignettes/gfdata-vignette.Rmd.orig
@@ -1,14 +1,20 @@
---
-title: "gfdata Vignette"
+title: "Introduction to gfdata"
author: "Elise Keppel"
date: "`r Sys.Date()`"
output: rmarkdown::html_vignette
vignette: >
- %\VignetteIndexEntry{gfdata Vignette}
+ %\VignetteIndexEntry{Introduction to gfdata}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
+
+
```{r, echo = FALSE}
knitr::opts_chunk$set(
collapse = FALSE,
@@ -34,7 +40,7 @@ If you don't already have the package installed, then run:
# install.packages("devtools")
devtools::install_github("pbs-assess/gfdata")
```
-
+
First we will load the package along with dplyr since we will use it within our code later.
```{r, cache=FALSE, warning = FALSE, message = FALSE}
@@ -47,11 +53,11 @@ library(dplyr)
Commercial and research catch, effort, and biological data for groundfish are
archived by the DFO Pacific Groundfish Data Unit (Fisheries and Oceans Canada, Science
-Branch, Pacific Region) and housed in a number of relational databases archived
-on-site at the Pacific Biological Station, Nanaimo, BC).
+Branch, Pacific Region) and housed in a number of relational databases archived
+on-site at the Pacific Biological Station, Nanaimo, BC).
-The gfdata package was
-develeoped to automate data extraction from these databases in a consistent,
+The gfdata package was
+develeoped to automate data extraction from these databases in a consistent,
reproducible manner with a series of `get_*()` functions. The functions extract
data using SQL queries, developed with support from the Groundfish Data Unit.
The standardized datasets are designed to feed directly into functions in the
@@ -78,9 +84,9 @@ sort(fns[grepl("get", fns)])
```
-The `get_*()` functions extract data by species, and some
-functions have arguments for additional filtering, such as survey series,
-management area, years, gear type, or environmental data type. In all cases,
+The `get_*()` functions extract data by species, and some
+functions have arguments for additional filtering, such as survey series,
+management area, years, gear type, or environmental data type. In all cases,
the `get_*()` functions can extract data for one or multiple species.
All functions can be viewed with the available arguments in the help
@@ -93,14 +99,14 @@ documentation for each set of functions with:
```
-In addition, a number of the `get` functions retain many relevant database
+In addition, a number of the `get` functions retain many relevant database
columns that users can filter on with, for example, `dplyr::filter(dat, x = "y")`.
# Example
As an example, we could extract Pacific cod survey sample data with the
-following function call if we were on a DFO laptop, with appropriate database
+following function call if we were on a DFO laptop, with appropriate database
permissions, and on the PBS network.
```{r, eval=.eval}
@@ -108,8 +114,8 @@ dat <- get_survey_samples("pacific cod")
head(dat)
```
-Note that there are some duplicate records in the databases due to relating a
-record to multiple stratification schemes for alternative analyses. If this
+Note that there are some duplicate records in the databases due to relating a
+record to multiple stratification schemes for alternative analyses. If this
occurs, a warning is given.
> "Duplicate specimen IDs are present because of overlapping survey stratifications. If working with the data yourelf, filter them after selecting specific surveys. For example, `dat <- dat[!duplicated(dat$specimen_id), ]`. The tidying and plotting functions within gfplot will do this for you."
@@ -133,7 +139,7 @@ get_survey_samples(c(396, 222))
get_survey_samples(c(222, "pacific cod"))
```
-We can further restrict the data extraction to a single trawl survey series
+We can further restrict the data extraction to a single trawl survey series
by including the ssid (survey series id) argument. For a list of
survey series id codes, run the lookup function `get_ssids()`.
@@ -160,16 +166,16 @@ glimpse(dat)
# Caching the data from the SQL servers
-In addition to the individual `get_*()` functions, there is a function
-`cache_pbs_data()` that runs all the `get_*()` functions and caches the data
+In addition to the individual `get_*()` functions, there is a function
+`cache_pbs_data()` that runs all the `get_*()` functions and caches the data
in a folder that you specify. This is useful to be able to have the data
available for working on later when not on the PBS network, and it saves
-running the SQL queries (though the data do get updated occassionally and the
+running the SQL queries (though the data do get updated occassionally and the
most up-to-date data should usually be extracted for analysis).
The helper function `cache_pbs_data()` will extract all of the data for the
given species into a series of `.rds` files into whatever folder you specify to
-the `path` argument. I'll wrap it in a quick check just to make sure we don't
+the `path` argument. I'll wrap it in a quick check just to make sure we don't
download the data twice if we build this document again.
```{r, eval = FALSE, eval=.eval}