Skip to content

Commit

Permalink
Move dead_band from metadata match to flux computation
Browse files Browse the repository at this point in the history
Also return min and max time values, along with mean, when computing flux
  • Loading branch information
bpbond committed Feb 24, 2024
1 parent 119c967 commit 25d2681
Show file tree
Hide file tree
Showing 8 changed files with 66 additions and 71 deletions.
19 changes: 7 additions & 12 deletions R/metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
#' @param data_timestamps Data timestamps, either character (YYYY-MM-DD HH:MM:SS) or \code{\link{POSIXct}}
#' @param start_dates Metadata measurement date entries, either character (YYYY-MM-DD) or \code{\link{POSIXct}}
#' @param start_times Metadata measurement start time entries, either character (HH:MM:SS) or \code{\link[lubridate]{period}}
#' @param dead_bands Dead band lengths in seconds, numeric; must be same length as \code{start_dates}
#' @param obs_lengths Observation lengths in seconds, numeric; must be same length as \code{start_dates}
#' @param obs_lengths Observation lengths in seconds, numeric; must be same
#' length as \code{start_dates}. This should include both the intended
#' measurement period as well as any dead band time at the beginning
#' @importFrom lubridate ymd_hms ymd hms tz is.POSIXct is.period
#' @importFrom utils head tail
#' @importFrom stats na.omit
Expand All @@ -24,25 +25,23 @@
#' # Metadata start dates and times: two measurements, starting 5 minutes apart
#' s_d <- c("2024-01-01", "2024-01-01")
#' s_t <- c("13:00:00", "13:05:00")
#' db <- c(1, 1) # Dead bands
#' ol <- c(60, 60) # Observation lengths
#' wtf_metadata_match(d_t, s_d, s_t, db, ol)
#' wtf_metadata_match(d_t, s_d, s_t, ol)
#' # Returns {1, 1, 2, NA} indicating that the first and second data timestamps
#' # correspond to metadata entry 1, the third to entry 2, and the fourth
#' # has no match
#'
#' # This generates an error because of overlapping timestamps:
#' \dontrun{
#' s_t <- c("13:00:00", "13:01:00")
#' wtf_metadata_match(d_t, s_d, s_t, db, ol)
#' wtf_metadata_match(d_t, s_d, s_t, ol)
#' }
wtf_metadata_match <- function(data_timestamps,
start_dates, start_times,
dead_bands, obs_lengths) {
obs_lengths) {

# Input checks and convert to dates/timestamps if needed
stopifnot(length(start_dates) == length(start_times))
stopifnot(length(start_dates) == length(dead_bands))
stopifnot(length(start_dates) == length(obs_lengths))

# The metadata dates and times shouldn't be empty
Expand All @@ -52,9 +51,6 @@ wtf_metadata_match <- function(data_timestamps,
if(any(is.na(start_times))) {
warning("One or more metadata times are missing")
}
if(any(is.na(dead_bands))) {
warning("One or more dead bands are missing")
}
if(any(is.na(obs_lengths))) {
warning("One or more observation lengths are missing")
}
Expand All @@ -70,11 +66,10 @@ wtf_metadata_match <- function(data_timestamps,
if(is.character(start_times)) start_times <- hms(start_times)
stopifnot(is.period(start_times))

stopifnot(is.numeric(dead_bands))
stopifnot(is.numeric(obs_lengths))

# Compute the metadata start and stop timestamps
start_timestamps <- start_dates + start_times + dead_bands
start_timestamps <- start_dates + start_times
stopifnot(is.POSIXct((start_timestamps))) # should always be true!
stop_timestamps <- start_timestamps + obs_lengths

Expand Down
6 changes: 6 additions & 0 deletions R/models.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ wtf_normalize_time <- function(time, normalize = TRUE) {
#' @param conc_column Name of the gas concentration column in \code{data}, character
#' @param volume XXX
#' @param area XXX
#' @param dead_band Length of dead band, the equilibration period at the
#' beginning of the time series during which data are ignore, in seconds (numeric)
#' @param normalize_time Normalize the values so that first is zero? Logical
#' @param fit_function Optional flux-fit function;
#' default is \code{\link{wtf_fit_models}}
Expand All @@ -117,6 +119,7 @@ wtf_compute_fluxes <- function(data,
conc_column,
volume,
area,
dead_band = 0,
normalize_time = TRUE,
fit_function = wtf_fit_models,
...) {
Expand All @@ -136,8 +139,11 @@ wtf_compute_fluxes <- function(data,
# passing volume and area?
f <- function(x, ...) {
x$.norm_time <- wtf_normalize_time(x[,time_column], normalize_time)
x <- x[x$.norm_time >= dead_band,] # exclude dead band data
out <- fit_function(x$.norm_time, x[,conc_column], ...)
out[time_column] <- mean(x[,time_column])
out[paste0(time_column, "_min")] <- min(x[,time_column])
out[paste0(time_column, "_max")] <- max(x[,time_column])
return(out)
}

Expand Down
14 changes: 7 additions & 7 deletions inst/extdata/TG10-01087-metadata.csv
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Date,Start_time,Plot,Volume,Area,Dead_band,Obs_length
2022-10-27,10:35:30,A,,,10,60
2022-10-27,10:37:00,B,,,10,60
2022-10-27,10:39:00,C,,,10,60
2022-10-27,10:40:30,D,,,10,60
2022-10-27,10:42:00,E,,,10,60
2022-10-27,10:43:30,F,,,10,60
Date,Start_time,Plot,Volume,Area,Obs_length
2022-10-27,10:35:30,A,,,60
2022-10-27,10:37:15,B,,,60
2022-10-27,10:39:00,C,,,60
2022-10-27,10:40:30,D,,,60
2022-10-27,10:42:00,E,,,60
2022-10-27,10:43:30,F,,,60
4 changes: 4 additions & 0 deletions man/wtf_compute_fluxes.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 6 additions & 13 deletions man/wtf_metadata_match.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

47 changes: 15 additions & 32 deletions tests/testthat/test-metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,76 +7,59 @@ test_that("wtf_metadata_match works", {
# Handles bad input - length and types
expect_error(wtf_metadata_match(d_t, start_dates = 1, start_times = 1:2),
regexp = "length\\(start_times\\)")
expect_error(wtf_metadata_match(d_t, 1, 1, dead_bands = 1:2),
regexp = "length\\(dead_bands\\)")
expect_error(wtf_metadata_match(d_t, 1, 1, dead_bands = 1, obs_lengths = 1:2),
expect_error(wtf_metadata_match(d_t, 1, 1, obs_lengths = 1:2),
regexp = "length\\(obs_lengths\\)")
expect_error(wtf_metadata_match(1:2, 1, 1, 1, 1),
expect_error(wtf_metadata_match(1:2, 1, 1, 1),
regexp = "is.POSIXct\\(data_timestamps\\)")
expect_error(wtf_metadata_match(d_t, 1, "12:34", 1, 1),
expect_error(wtf_metadata_match(d_t, 1, "12:34", 1),
regexp = "is.POSIXct\\(start_dates\\)")
expect_error(wtf_metadata_match(d_t, "2023-12-23", 1, 1, 1),
expect_error(wtf_metadata_match(d_t, "2023-12-23", 1, 1),
regexp = "is.period\\(start_times\\)")
expect_error(wtf_metadata_match(d_t, "2023-12-23", "12:34:00", dead_bands = "1", 1),
regexp = "is.numeric\\(dead_bands\\)")
expect_error(wtf_metadata_match(d_t, "2023-12-23", "12:34:00", 1, obs_lengths = "1"),
expect_error(wtf_metadata_match(d_t, "2023-12-23", "12:34:00", obs_lengths = "1"),
regexp = "is.numeric\\(obs_lengths\\)")

# Warn on missing data
s_d <- c("2024-01-01", "2024-01-01")
s_t <- c("13:00:00", "13:05:00")
suppressMessages({
expect_warning(wtf_metadata_match(d_t, c("2024-01-01", NA), s_t, 1:2, 2:3),
expect_warning(wtf_metadata_match(d_t, c("2024-01-01", NA), s_t, 1:2),
regexp = "dates are missing")
expect_warning(wtf_metadata_match(d_t, s_d, c(NA, "13:05:00"), 1:2, 2:3),
expect_warning(wtf_metadata_match(d_t, s_d, c(NA, "13:05:00"), 1:2),
regexp = "times are missing")
expect_warning(wtf_metadata_match(d_t, s_d, s_t, c(1, NA), 2:3),
regexp = "dead bands are missing")
expect_warning(wtf_metadata_match(d_t, s_d, s_t, 1:2, c(NA, 3)),
expect_warning(wtf_metadata_match(d_t, s_d, s_t, c(NA, 2)),
regexp = "observation lengths are missing")
})

# Overlapping metadata
s_d <- c("2024-01-01", "2024-01-01")
s_t <- c("13:00:00", "13:05:00")
expect_silent(wtf_metadata_match(d_t, s_d, s_t, c(1, 1), obs_lengths = c(60, 60)))
expect_error(wtf_metadata_match(d_t, s_d, s_t, c(1, 1), obs_lengths = c(600, 600)),
expect_silent(wtf_metadata_match(d_t, s_d, s_t, obs_lengths = c(60, 60)))
expect_error(wtf_metadata_match(d_t, s_d, s_t, obs_lengths = c(600, 600)),
regexp = "overlaps")
# Reports which entries are problematic
s_d <- c("2024-01-01", "2024-01-01", "2024-01-01")
expect_error(wtf_metadata_match(d_t, s_d, c("13:00:00", "13:01:00", "13:05:00"),
c(1, 1, 1), obs_lengths = c(60, 60, 60)),
obs_lengths = c(60, 60, 60)),
regexp = "overlaps: 2")
expect_error(wtf_metadata_match(d_t, s_d, c("13:00:00", "13:04:00", "13:05:00"),
c(1, 1, 1), obs_lengths = c(60, 60, 60)),
obs_lengths = c(60, 60, 60)),
regexp = "overlaps: 3")
expect_error(wtf_metadata_match(d_t, s_d, c("13:00:00", "13:01:00", "13:02:00"),
c(1, 1, 1), obs_lengths = c(60, 60, 60)),
obs_lengths = c(60, 60, 60)),
regexp = "overlaps: 2, 3")

# Assigns matches correctly
d_t <- c("2024-01-01 13:00:05", "2024-01-01 13:00:10",
"2024-01-01 13:05:05", "2024-01-01 13:10:00")
s_d <- c("2024-01-01", "2024-01-01")
s_t <- c("13:00:00", "13:05:00")
db <- c(1, 1)
ol <- c(60, 60)
expect_silent(x <- wtf_metadata_match(d_t, s_d, s_t, db, ol))
expect_silent(x <- wtf_metadata_match(d_t, s_d, s_t, ol))
expect_identical(x, c(1, 1, 2, NA_real_))

# Warns on missing metadata dates
suppressMessages({
s_d[1] <- NA
expect_warning(wtf_metadata_match(d_t, s_d, s_t, db, ol),
regexp = "dates are missing")
})
# We also warn on missing times, but checking that requires a nested
# expect_warning here, because of hms() behavior, which crashes
# on GitHub Actions

# Gives a message if there are unmatched metadata entries
s_d <- c("2024-01-01", "2024-01-01", "2024-01-10")
s_t <- c("13:00:00", "13:05:00", "13:10:00")
expect_message(wtf_metadata_match(d_t, s_d, s_t, c(1, 1, 1), c(60, 60, 60)),
expect_message(wtf_metadata_match(d_t, s_d, s_t, c(60, 60, 60)),
regexp = "no timestamp matches: 3")
})
6 changes: 6 additions & 0 deletions tests/testthat/test-models.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,17 +36,23 @@ test_that("wtf_compute_fluxes works", {
expect_s3_class(out, "data.frame")
expect_identical(out$Plot, plots) # one row per plot
expect_identical(out$time, rep(mean(times), nrow(out))) # mean of raw times
expect_identical(out$time_min, rep(min(times), nrow(out))) # min of raw times
expect_identical(out$time_max, rep(max(times), nrow(out))) # max of raw times

# Raw times
out <- wtf_compute_fluxes(x, "Plot", "time", "conc", 1, 1,
fit_function = ff, normalize_time = FALSE)
expect_identical(out$Plot, plots) # one row per plot
expect_identical(out$time, rep(mean(times), nrow(out))) # mean of raw times
expect_identical(out$time_min, rep(min(times), nrow(out))) # min of raw times
expect_identical(out$time_max, rep(max(times), nrow(out))) # max of raw times

# Passing NULL for the group column should return a single row
out <- wtf_compute_fluxes(x, NULL, "time", "conc", 1, 1,
fit_function = ff, normalize_time = TRUE)
expect_s3_class(out, "data.frame")
expect_identical(nrow(out), 1L) # one row
expect_identical(out$time, rep(mean(times), nrow(out))) # mean of raw times
expect_identical(out$time_min, rep(min(times), nrow(out))) # min of raw times
expect_identical(out$time_max, rep(max(times), nrow(out))) # max of raw times
})
22 changes: 15 additions & 7 deletions vignettes/intro-to-whattheflux.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,11 @@ using the `TIMESTAMP` column that `wtf_read_LI7810` helpfully created
when it read the data file.

```{r matching, fig.width=8}
dat$metadat_row <- wtf_metadata_match(dat$TIMESTAMP,
metadat$Date, metadat$Start_time,
metadat$Dead_band, metadat$Obs_length)
dat$metadat_row <- wtf_metadata_match(
data_timestamps = dat$TIMESTAMP,
start_dates = metadat$Date,
start_times = metadat$Start_time,
obs_lengths = metadat$Obs_length + 10) # 10 is expected dead band length
# Based on the row match information, add a "Plot" column to the data
dat$Plot <- metadat$Plot[dat$metadat_row]
Expand All @@ -98,9 +100,11 @@ Here, we'll just change the values programmatically and re-match:

```{r matching2, fig.width=8}
metadat$Obs_length[3:5] <- c(30, 45, 45)
dat$metadat_row <- wtf_metadata_match(dat$TIMESTAMP,
metadat$Date, metadat$Start_time,
metadat$Dead_band, metadat$Obs_length)
dat$metadat_row <- wtf_metadata_match(
data_timestamps = dat$TIMESTAMP,
start_dates = metadat$Date,
start_times = metadat$Start_time,
obs_lengths = metadat$Obs_length + 10)
dat$Plot <- metadat$Plot[dat$metadat_row]
p %+% dat
Expand All @@ -118,7 +122,8 @@ QA/QC information.
fluxes <- wtf_compute_fluxes(dat,
group_column = "Plot",
time_column = "TIMESTAMP",
conc_column = "CO2")
conc_column = "CO2",
dead_band = 10)
# By default, wtf_compute_fluxes returns a data.frame with one row per
# grouping variable value (i.e., per measurement). The first column is the
Expand All @@ -130,6 +135,9 @@ fluxes <- wtf_compute_fluxes(dat,
# For clarity, print out only a subset of the columns
fluxes[c(1:2, 4, 14, 15, 20)]
# The data frame also has columns TIMESTAMP_min and TIMESTAMP_max,
# giving the entire period over which the flux was computed.
ggplot(fluxes, aes(Plot, slope_estimate, color = adj.r.squared)) +
geom_point() +
geom_linerange(aes(ymin = slope_estimate - slope_std.error,
Expand Down

0 comments on commit 25d2681

Please sign in to comment.