Skip to content

Commit

Permalink
Move dead_band from metadata match to flux computation (#28)
Browse files Browse the repository at this point in the history
Also return min and max time values, along with mean, when computing flux
  • Loading branch information
bpbond authored Feb 24, 2024
1 parent 119c967 commit 0e61515
Show file tree
Hide file tree
Showing 8 changed files with 66 additions and 71 deletions.
19 changes: 7 additions & 12 deletions R/metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
#' @param data_timestamps Data timestamps, either character (YYYY-MM-DD HH:MM:SS) or \code{\link{POSIXct}}
#' @param start_dates Metadata measurement date entries, either character (YYYY-MM-DD) or \code{\link{POSIXct}}
#' @param start_times Metadata measurement start time entries, either character (HH:MM:SS) or \code{\link[lubridate]{period}}
#' @param dead_bands Dead band lengths in seconds, numeric; must be same length as \code{start_dates}
#' @param obs_lengths Observation lengths in seconds, numeric; must be same length as \code{start_dates}
#' @param obs_lengths Observation lengths in seconds, numeric; must be same
#' length as \code{start_dates}. This should include both the intended
#' measurement period as well as any dead band time at the beginning
#' @importFrom lubridate ymd_hms ymd hms tz is.POSIXct is.period
#' @importFrom utils head tail
#' @importFrom stats na.omit
Expand All @@ -24,25 +25,23 @@
#' # Metadata start dates and times: two measurements, starting 5 minutes apart
#' s_d <- c("2024-01-01", "2024-01-01")
#' s_t <- c("13:00:00", "13:05:00")
#' db <- c(1, 1) # Dead bands
#' ol <- c(60, 60) # Observation lengths
#' wtf_metadata_match(d_t, s_d, s_t, db, ol)
#' wtf_metadata_match(d_t, s_d, s_t, ol)
#' # Returns {1, 1, 2, NA} indicating that the first and second data timestamps
#' # correspond to metadata entry 1, the third to entry 2, and the fourth
#' # has no match
#'
#' # This generates an error because of overlapping timestamps:
#' \dontrun{
#' s_t <- c("13:00:00", "13:01:00")
#' wtf_metadata_match(d_t, s_d, s_t, db, ol)
#' wtf_metadata_match(d_t, s_d, s_t, ol)
#' }
wtf_metadata_match <- function(data_timestamps,
start_dates, start_times,
dead_bands, obs_lengths) {
obs_lengths) {

# Input checks and convert to dates/timestamps if needed
stopifnot(length(start_dates) == length(start_times))
stopifnot(length(start_dates) == length(dead_bands))
stopifnot(length(start_dates) == length(obs_lengths))

# The metadata dates and times shouldn't be empty
Expand All @@ -52,9 +51,6 @@ wtf_metadata_match <- function(data_timestamps,
if(any(is.na(start_times))) {
warning("One or more metadata times are missing")
}
if(any(is.na(dead_bands))) {
warning("One or more dead bands are missing")
}
if(any(is.na(obs_lengths))) {
warning("One or more observation lengths are missing")
}
Expand All @@ -70,11 +66,10 @@ wtf_metadata_match <- function(data_timestamps,
if(is.character(start_times)) start_times <- hms(start_times)
stopifnot(is.period(start_times))

stopifnot(is.numeric(dead_bands))
stopifnot(is.numeric(obs_lengths))

# Compute the metadata start and stop timestamps
start_timestamps <- start_dates + start_times + dead_bands
start_timestamps <- start_dates + start_times
stopifnot(is.POSIXct((start_timestamps))) # should always be true!
stop_timestamps <- start_timestamps + obs_lengths

Expand Down
6 changes: 6 additions & 0 deletions R/models.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ wtf_normalize_time <- function(time, normalize = TRUE) {
#' @param conc_column Name of the gas concentration column in \code{data}, character
#' @param volume XXX
#' @param area XXX
#' @param dead_band Length of dead band, the equilibration period at the
#' beginning of the time series during which data are ignore, in seconds (numeric)
#' @param normalize_time Normalize the values so that first is zero? Logical
#' @param fit_function Optional flux-fit function;
#' default is \code{\link{wtf_fit_models}}
Expand All @@ -117,6 +119,7 @@ wtf_compute_fluxes <- function(data,
conc_column,
volume,
area,
dead_band = 0,
normalize_time = TRUE,
fit_function = wtf_fit_models,
...) {
Expand All @@ -136,8 +139,11 @@ wtf_compute_fluxes <- function(data,
# passing volume and area?
f <- function(x, ...) {
x$.norm_time <- wtf_normalize_time(x[,time_column], normalize_time)
x <- x[x$.norm_time >= dead_band,] # exclude dead band data
out <- fit_function(x$.norm_time, x[,conc_column], ...)
out[time_column] <- mean(x[,time_column])
out[paste0(time_column, "_min")] <- min(x[,time_column])
out[paste0(time_column, "_max")] <- max(x[,time_column])
return(out)
}

Expand Down
14 changes: 7 additions & 7 deletions inst/extdata/TG10-01087-metadata.csv
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Date,Start_time,Plot,Volume,Area,Dead_band,Obs_length
2022-10-27,10:35:30,A,,,10,60
2022-10-27,10:37:00,B,,,10,60
2022-10-27,10:39:00,C,,,10,60
2022-10-27,10:40:30,D,,,10,60
2022-10-27,10:42:00,E,,,10,60
2022-10-27,10:43:30,F,,,10,60
Date,Start_time,Plot,Volume,Area,Obs_length
2022-10-27,10:35:30,A,,,60
2022-10-27,10:37:15,B,,,60
2022-10-27,10:39:00,C,,,60
2022-10-27,10:40:30,D,,,60
2022-10-27,10:42:00,E,,,60
2022-10-27,10:43:30,F,,,60
4 changes: 4 additions & 0 deletions man/wtf_compute_fluxes.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 6 additions & 13 deletions man/wtf_metadata_match.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

47 changes: 15 additions & 32 deletions tests/testthat/test-metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,76 +7,59 @@ test_that("wtf_metadata_match works", {
# Handles bad input - length and types
expect_error(wtf_metadata_match(d_t, start_dates = 1, start_times = 1:2),
regexp = "length\\(start_times\\)")
expect_error(wtf_metadata_match(d_t, 1, 1, dead_bands = 1:2),
regexp = "length\\(dead_bands\\)")
expect_error(wtf_metadata_match(d_t, 1, 1, dead_bands = 1, obs_lengths = 1:2),
expect_error(wtf_metadata_match(d_t, 1, 1, obs_lengths = 1:2),
regexp = "length\\(obs_lengths\\)")
expect_error(wtf_metadata_match(1:2, 1, 1, 1, 1),
expect_error(wtf_metadata_match(1:2, 1, 1, 1),
regexp = "is.POSIXct\\(data_timestamps\\)")
expect_error(wtf_metadata_match(d_t, 1, "12:34", 1, 1),
expect_error(wtf_metadata_match(d_t, 1, "12:34", 1),
regexp = "is.POSIXct\\(start_dates\\)")
expect_error(wtf_metadata_match(d_t, "2023-12-23", 1, 1, 1),
expect_error(wtf_metadata_match(d_t, "2023-12-23", 1, 1),
regexp = "is.period\\(start_times\\)")
expect_error(wtf_metadata_match(d_t, "2023-12-23", "12:34:00", dead_bands = "1", 1),
regexp = "is.numeric\\(dead_bands\\)")
expect_error(wtf_metadata_match(d_t, "2023-12-23", "12:34:00", 1, obs_lengths = "1"),
expect_error(wtf_metadata_match(d_t, "2023-12-23", "12:34:00", obs_lengths = "1"),
regexp = "is.numeric\\(obs_lengths\\)")

# Warn on missing data
s_d <- c("2024-01-01", "2024-01-01")
s_t <- c("13:00:00", "13:05:00")
suppressMessages({
expect_warning(wtf_metadata_match(d_t, c("2024-01-01", NA), s_t, 1:2, 2:3),
expect_warning(wtf_metadata_match(d_t, c("2024-01-01", NA), s_t, 1:2),
regexp = "dates are missing")
expect_warning(wtf_metadata_match(d_t, s_d, c(NA, "13:05:00"), 1:2, 2:3),
expect_warning(wtf_metadata_match(d_t, s_d, c(NA, "13:05:00"), 1:2),
regexp = "times are missing")
expect_warning(wtf_metadata_match(d_t, s_d, s_t, c(1, NA), 2:3),
regexp = "dead bands are missing")
expect_warning(wtf_metadata_match(d_t, s_d, s_t, 1:2, c(NA, 3)),
expect_warning(wtf_metadata_match(d_t, s_d, s_t, c(NA, 2)),
regexp = "observation lengths are missing")
})

# Overlapping metadata
s_d <- c("2024-01-01", "2024-01-01")
s_t <- c("13:00:00", "13:05:00")
expect_silent(wtf_metadata_match(d_t, s_d, s_t, c(1, 1), obs_lengths = c(60, 60)))
expect_error(wtf_metadata_match(d_t, s_d, s_t, c(1, 1), obs_lengths = c(600, 600)),
expect_silent(wtf_metadata_match(d_t, s_d, s_t, obs_lengths = c(60, 60)))
expect_error(wtf_metadata_match(d_t, s_d, s_t, obs_lengths = c(600, 600)),
regexp = "overlaps")
# Reports which entries are problematic
s_d <- c("2024-01-01", "2024-01-01", "2024-01-01")
expect_error(wtf_metadata_match(d_t, s_d, c("13:00:00", "13:01:00", "13:05:00"),
c(1, 1, 1), obs_lengths = c(60, 60, 60)),
obs_lengths = c(60, 60, 60)),
regexp = "overlaps: 2")
expect_error(wtf_metadata_match(d_t, s_d, c("13:00:00", "13:04:00", "13:05:00"),
c(1, 1, 1), obs_lengths = c(60, 60, 60)),
obs_lengths = c(60, 60, 60)),
regexp = "overlaps: 3")
expect_error(wtf_metadata_match(d_t, s_d, c("13:00:00", "13:01:00", "13:02:00"),
c(1, 1, 1), obs_lengths = c(60, 60, 60)),
obs_lengths = c(60, 60, 60)),
regexp = "overlaps: 2, 3")

# Assigns matches correctly
d_t <- c("2024-01-01 13:00:05", "2024-01-01 13:00:10",
"2024-01-01 13:05:05", "2024-01-01 13:10:00")
s_d <- c("2024-01-01", "2024-01-01")
s_t <- c("13:00:00", "13:05:00")
db <- c(1, 1)
ol <- c(60, 60)
expect_silent(x <- wtf_metadata_match(d_t, s_d, s_t, db, ol))
expect_silent(x <- wtf_metadata_match(d_t, s_d, s_t, ol))
expect_identical(x, c(1, 1, 2, NA_real_))

# Warns on missing metadata dates
suppressMessages({
s_d[1] <- NA
expect_warning(wtf_metadata_match(d_t, s_d, s_t, db, ol),
regexp = "dates are missing")
})
# We also warn on missing times, but checking that requires a nested
# expect_warning here, because of hms() behavior, which crashes
# on GitHub Actions

# Gives a message if there are unmatched metadata entries
s_d <- c("2024-01-01", "2024-01-01", "2024-01-10")
s_t <- c("13:00:00", "13:05:00", "13:10:00")
expect_message(wtf_metadata_match(d_t, s_d, s_t, c(1, 1, 1), c(60, 60, 60)),
expect_message(wtf_metadata_match(d_t, s_d, s_t, c(60, 60, 60)),
regexp = "no timestamp matches: 3")
})
6 changes: 6 additions & 0 deletions tests/testthat/test-models.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,17 +36,23 @@ test_that("wtf_compute_fluxes works", {
expect_s3_class(out, "data.frame")
expect_identical(out$Plot, plots) # one row per plot
expect_identical(out$time, rep(mean(times), nrow(out))) # mean of raw times
expect_identical(out$time_min, rep(min(times), nrow(out))) # min of raw times
expect_identical(out$time_max, rep(max(times), nrow(out))) # max of raw times

# Raw times
out <- wtf_compute_fluxes(x, "Plot", "time", "conc", 1, 1,
fit_function = ff, normalize_time = FALSE)
expect_identical(out$Plot, plots) # one row per plot
expect_identical(out$time, rep(mean(times), nrow(out))) # mean of raw times
expect_identical(out$time_min, rep(min(times), nrow(out))) # min of raw times
expect_identical(out$time_max, rep(max(times), nrow(out))) # max of raw times

# Passing NULL for the group column should return a single row
out <- wtf_compute_fluxes(x, NULL, "time", "conc", 1, 1,
fit_function = ff, normalize_time = TRUE)
expect_s3_class(out, "data.frame")
expect_identical(nrow(out), 1L) # one row
expect_identical(out$time, rep(mean(times), nrow(out))) # mean of raw times
expect_identical(out$time_min, rep(min(times), nrow(out))) # min of raw times
expect_identical(out$time_max, rep(max(times), nrow(out))) # max of raw times
})
22 changes: 15 additions & 7 deletions vignettes/intro-to-whattheflux.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,11 @@ using the `TIMESTAMP` column that `wtf_read_LI7810` helpfully created
when it read the data file.

```{r matching, fig.width=8}
dat$metadat_row <- wtf_metadata_match(dat$TIMESTAMP,
metadat$Date, metadat$Start_time,
metadat$Dead_band, metadat$Obs_length)
dat$metadat_row <- wtf_metadata_match(
data_timestamps = dat$TIMESTAMP,
start_dates = metadat$Date,
start_times = metadat$Start_time,
obs_lengths = metadat$Obs_length + 10) # 10 is expected dead band length
# Based on the row match information, add a "Plot" column to the data
dat$Plot <- metadat$Plot[dat$metadat_row]
Expand All @@ -98,9 +100,11 @@ Here, we'll just change the values programmatically and re-match:

```{r matching2, fig.width=8}
metadat$Obs_length[3:5] <- c(30, 45, 45)
dat$metadat_row <- wtf_metadata_match(dat$TIMESTAMP,
metadat$Date, metadat$Start_time,
metadat$Dead_band, metadat$Obs_length)
dat$metadat_row <- wtf_metadata_match(
data_timestamps = dat$TIMESTAMP,
start_dates = metadat$Date,
start_times = metadat$Start_time,
obs_lengths = metadat$Obs_length + 10)
dat$Plot <- metadat$Plot[dat$metadat_row]
p %+% dat
Expand All @@ -118,7 +122,8 @@ QA/QC information.
fluxes <- wtf_compute_fluxes(dat,
group_column = "Plot",
time_column = "TIMESTAMP",
conc_column = "CO2")
conc_column = "CO2",
dead_band = 10)
# By default, wtf_compute_fluxes returns a data.frame with one row per
# grouping variable value (i.e., per measurement). The first column is the
Expand All @@ -130,6 +135,9 @@ fluxes <- wtf_compute_fluxes(dat,
# For clarity, print out only a subset of the columns
fluxes[c(1:2, 4, 14, 15, 20)]
# The data frame also has columns TIMESTAMP_min and TIMESTAMP_max,
# giving the entire period over which the flux was computed.
ggplot(fluxes, aes(Plot, slope_estimate, color = adj.r.squared)) +
geom_point() +
geom_linerange(aes(ymin = slope_estimate - slope_std.error,
Expand Down

0 comments on commit 0e61515

Please sign in to comment.