Move dead_band from metadata match to flux computation (#28)

Also return min and max time values, along with mean, when computing flux
COMPASS-DOE · Feb 24, 2024 · 0e61515 · 0e61515
1 parent 119c967
commit 0e61515
Show file tree

Hide file tree

Showing 8 changed files with 66 additions and 71 deletions.
diff --git a/R/metadata.R b/R/metadata.R
@@ -6,8 +6,9 @@
 #' @param data_timestamps Data timestamps, either character (YYYY-MM-DD HH:MM:SS) or \code{\link{POSIXct}}
 #' @param start_dates Metadata measurement date entries, either character (YYYY-MM-DD) or \code{\link{POSIXct}}
 #' @param start_times Metadata measurement start time entries, either character (HH:MM:SS) or \code{\link[lubridate]{period}}
-#' @param dead_bands Dead band lengths in seconds, numeric; must be same length as \code{start_dates}
-#' @param obs_lengths Observation lengths in seconds, numeric; must be same length as \code{start_dates}
+#' @param obs_lengths Observation lengths in seconds, numeric; must be same
+#' length as \code{start_dates}. This should include both the intended
+#' measurement period as well as any dead band time at the beginning
 #' @importFrom lubridate ymd_hms ymd hms tz is.POSIXct is.period
 #' @importFrom utils head tail
 #' @importFrom stats na.omit
@@ -24,25 +25,23 @@
 #' # Metadata start dates and times: two measurements, starting 5 minutes apart
 #' s_d <- c("2024-01-01", "2024-01-01")
 #' s_t <- c("13:00:00", "13:05:00")
-#' db <- c(1, 1) # Dead bands
 #' ol <- c(60, 60) # Observation lengths
-#' wtf_metadata_match(d_t, s_d, s_t, db, ol)
+#' wtf_metadata_match(d_t, s_d, s_t, ol)
 #' # Returns {1, 1, 2, NA} indicating that the first and second data timestamps
 #' # correspond to metadata entry 1, the third to entry 2, and the fourth
 #' # has no match
 #'
 #' # This generates an error because of overlapping timestamps:
 #' \dontrun{
 #' s_t <- c("13:00:00", "13:01:00")
-#' wtf_metadata_match(d_t, s_d, s_t, db, ol)
+#' wtf_metadata_match(d_t, s_d, s_t, ol)
 #' }
 wtf_metadata_match <- function(data_timestamps,
                                start_dates, start_times,
-                               dead_bands, obs_lengths) {
+                               obs_lengths) {
 
   # Input checks and convert to dates/timestamps if needed
   stopifnot(length(start_dates) == length(start_times))
-  stopifnot(length(start_dates) == length(dead_bands))
   stopifnot(length(start_dates) == length(obs_lengths))
 
   # The metadata dates and times shouldn't be empty
@@ -52,9 +51,6 @@ wtf_metadata_match <- function(data_timestamps,
   if(any(is.na(start_times))) {
     warning("One or more metadata times are missing")
   }
-  if(any(is.na(dead_bands))) {
-    warning("One or more dead bands are missing")
-  }
   if(any(is.na(obs_lengths))) {
     warning("One or more observation lengths are missing")
   }
@@ -70,11 +66,10 @@ wtf_metadata_match <- function(data_timestamps,
   if(is.character(start_times)) start_times <- hms(start_times)
   stopifnot(is.period(start_times))
 
-  stopifnot(is.numeric(dead_bands))
   stopifnot(is.numeric(obs_lengths))
 
   # Compute the metadata start and stop timestamps
-  start_timestamps <- start_dates + start_times + dead_bands
+  start_timestamps <- start_dates + start_times
   stopifnot(is.POSIXct((start_timestamps))) # should always be true!
   stop_timestamps <- start_timestamps + obs_lengths
 

diff --git a/R/models.R b/R/models.R
@@ -96,6 +96,8 @@ wtf_normalize_time <- function(time, normalize = TRUE) {
 #' @param conc_column Name of the gas concentration column in \code{data}, character
 #' @param volume XXX
 #' @param area XXX
+#' @param dead_band Length of dead band, the equilibration period at the
+#' beginning of the time series during which data are ignore, in seconds (numeric)
 #' @param normalize_time Normalize the values so that first is zero? Logical
 #' @param fit_function Optional flux-fit function;
 #' default is \code{\link{wtf_fit_models}}
@@ -117,6 +119,7 @@ wtf_compute_fluxes <- function(data,
                                conc_column,
                                volume,
                                area,
+                               dead_band = 0,
                                normalize_time = TRUE,
                                fit_function = wtf_fit_models,
                                ...) {
@@ -136,8 +139,11 @@ wtf_compute_fluxes <- function(data,
   # passing volume and area?
   f <- function(x, ...) {
     x$.norm_time <- wtf_normalize_time(x[,time_column], normalize_time)
+    x <- x[x$.norm_time >= dead_band,] # exclude dead band data
     out <- fit_function(x$.norm_time, x[,conc_column], ...)
     out[time_column] <- mean(x[,time_column])
+    out[paste0(time_column, "_min")] <- min(x[,time_column])
+    out[paste0(time_column, "_max")] <- max(x[,time_column])
     return(out)
   }
 

diff --git a/inst/extdata/TG10-01087-metadata.csv b/inst/extdata/TG10-01087-metadata.csv
@@ -1,7 +1,7 @@
-Date,Start_time,Plot,Volume,Area,Dead_band,Obs_length
-2022-10-27,10:35:30,A,,,10,60
-2022-10-27,10:37:00,B,,,10,60
-2022-10-27,10:39:00,C,,,10,60
-2022-10-27,10:40:30,D,,,10,60
-2022-10-27,10:42:00,E,,,10,60
-2022-10-27,10:43:30,F,,,10,60
+Date,Start_time,Plot,Volume,Area,Obs_length
+2022-10-27,10:35:30,A,,,60
+2022-10-27,10:37:15,B,,,60
+2022-10-27,10:39:00,C,,,60
+2022-10-27,10:40:30,D,,,60
+2022-10-27,10:42:00,E,,,60
+2022-10-27,10:43:30,F,,,60
diff --git a/man/wtf_compute_fluxes.Rd b/man/wtf_compute_fluxes.Rd
diff --git a/man/wtf_metadata_match.Rd b/man/wtf_metadata_match.Rd
diff --git a/tests/testthat/test-metadata.R b/tests/testthat/test-metadata.R
@@ -7,76 +7,59 @@ test_that("wtf_metadata_match works", {
   # Handles bad input - length and types
   expect_error(wtf_metadata_match(d_t, start_dates = 1, start_times = 1:2),
                regexp = "length\\(start_times\\)")
-  expect_error(wtf_metadata_match(d_t, 1, 1, dead_bands = 1:2),
-               regexp = "length\\(dead_bands\\)")
-  expect_error(wtf_metadata_match(d_t, 1, 1, dead_bands = 1, obs_lengths = 1:2),
+  expect_error(wtf_metadata_match(d_t, 1, 1, obs_lengths = 1:2),
                regexp = "length\\(obs_lengths\\)")
-  expect_error(wtf_metadata_match(1:2, 1, 1, 1, 1),
+  expect_error(wtf_metadata_match(1:2, 1, 1, 1),
                regexp = "is.POSIXct\\(data_timestamps\\)")
-  expect_error(wtf_metadata_match(d_t, 1, "12:34", 1, 1),
+  expect_error(wtf_metadata_match(d_t, 1, "12:34", 1),
                regexp = "is.POSIXct\\(start_dates\\)")
-  expect_error(wtf_metadata_match(d_t, "2023-12-23", 1, 1, 1),
+  expect_error(wtf_metadata_match(d_t, "2023-12-23", 1, 1),
                regexp = "is.period\\(start_times\\)")
-  expect_error(wtf_metadata_match(d_t, "2023-12-23", "12:34:00", dead_bands = "1", 1),
-               regexp = "is.numeric\\(dead_bands\\)")
-  expect_error(wtf_metadata_match(d_t, "2023-12-23", "12:34:00", 1, obs_lengths = "1"),
+  expect_error(wtf_metadata_match(d_t, "2023-12-23", "12:34:00", obs_lengths = "1"),
                regexp = "is.numeric\\(obs_lengths\\)")
 
   # Warn on missing data
   s_d <- c("2024-01-01", "2024-01-01")
   s_t <- c("13:00:00", "13:05:00")
   suppressMessages({
-    expect_warning(wtf_metadata_match(d_t, c("2024-01-01", NA), s_t, 1:2, 2:3),
+    expect_warning(wtf_metadata_match(d_t, c("2024-01-01", NA), s_t, 1:2),
                    regexp = "dates are missing")
-    expect_warning(wtf_metadata_match(d_t, s_d, c(NA, "13:05:00"), 1:2, 2:3),
+    expect_warning(wtf_metadata_match(d_t, s_d, c(NA, "13:05:00"), 1:2),
                    regexp = "times are missing")
-    expect_warning(wtf_metadata_match(d_t, s_d, s_t, c(1, NA), 2:3),
-                   regexp = "dead bands are missing")
-    expect_warning(wtf_metadata_match(d_t, s_d, s_t, 1:2, c(NA, 3)),
+    expect_warning(wtf_metadata_match(d_t, s_d, s_t, c(NA, 2)),
                    regexp = "observation lengths are missing")
   })
 
   # Overlapping metadata
   s_d <- c("2024-01-01", "2024-01-01")
   s_t <- c("13:00:00", "13:05:00")
-  expect_silent(wtf_metadata_match(d_t, s_d, s_t, c(1, 1), obs_lengths = c(60, 60)))
-  expect_error(wtf_metadata_match(d_t, s_d, s_t, c(1, 1), obs_lengths = c(600, 600)),
+  expect_silent(wtf_metadata_match(d_t, s_d, s_t, obs_lengths = c(60, 60)))
+  expect_error(wtf_metadata_match(d_t, s_d, s_t, obs_lengths = c(600, 600)),
                regexp = "overlaps")
   # Reports which entries are problematic
   s_d <- c("2024-01-01", "2024-01-01", "2024-01-01")
   expect_error(wtf_metadata_match(d_t, s_d, c("13:00:00", "13:01:00", "13:05:00"),
-                                  c(1, 1, 1), obs_lengths = c(60, 60, 60)),
+                                  obs_lengths = c(60, 60, 60)),
                regexp = "overlaps: 2")
   expect_error(wtf_metadata_match(d_t, s_d, c("13:00:00", "13:04:00", "13:05:00"),
-                                  c(1, 1, 1), obs_lengths = c(60, 60, 60)),
+                                  obs_lengths = c(60, 60, 60)),
                regexp = "overlaps: 3")
   expect_error(wtf_metadata_match(d_t, s_d, c("13:00:00", "13:01:00", "13:02:00"),
-                                  c(1, 1, 1), obs_lengths = c(60, 60, 60)),
+                                  obs_lengths = c(60, 60, 60)),
                regexp = "overlaps: 2, 3")
 
   # Assigns matches correctly
   d_t <- c("2024-01-01 13:00:05", "2024-01-01 13:00:10",
            "2024-01-01 13:05:05", "2024-01-01 13:10:00")
   s_d <- c("2024-01-01", "2024-01-01")
   s_t <- c("13:00:00", "13:05:00")
-  db <- c(1, 1)
   ol <- c(60, 60)
-  expect_silent(x <- wtf_metadata_match(d_t, s_d, s_t, db, ol))
+  expect_silent(x <- wtf_metadata_match(d_t, s_d, s_t, ol))
   expect_identical(x, c(1, 1, 2, NA_real_))
 
-  # Warns on missing metadata dates
-  suppressMessages({
-    s_d[1] <- NA
-    expect_warning(wtf_metadata_match(d_t, s_d, s_t, db, ol),
-                   regexp = "dates are missing")
-  })
-  # We also warn on missing times, but checking that requires a nested
-  # expect_warning here, because of hms() behavior, which crashes
-  # on GitHub Actions
-
   # Gives a message if there are unmatched metadata entries
   s_d <- c("2024-01-01", "2024-01-01", "2024-01-10")
   s_t <- c("13:00:00", "13:05:00", "13:10:00")
-  expect_message(wtf_metadata_match(d_t, s_d, s_t, c(1, 1, 1), c(60, 60, 60)),
+  expect_message(wtf_metadata_match(d_t, s_d, s_t, c(60, 60, 60)),
                  regexp = "no timestamp matches: 3")
 })
diff --git a/tests/testthat/test-models.R b/tests/testthat/test-models.R
@@ -36,17 +36,23 @@ test_that("wtf_compute_fluxes works", {
   expect_s3_class(out, "data.frame")
   expect_identical(out$Plot, plots) # one row per plot
   expect_identical(out$time, rep(mean(times), nrow(out))) # mean of raw times
+  expect_identical(out$time_min, rep(min(times), nrow(out))) # min of raw times
+  expect_identical(out$time_max, rep(max(times), nrow(out))) # max of raw times
 
   # Raw times
   out <- wtf_compute_fluxes(x, "Plot", "time", "conc", 1, 1,
                             fit_function = ff, normalize_time = FALSE)
   expect_identical(out$Plot, plots) # one row per plot
   expect_identical(out$time, rep(mean(times), nrow(out))) # mean of raw times
+  expect_identical(out$time_min, rep(min(times), nrow(out))) # min of raw times
+  expect_identical(out$time_max, rep(max(times), nrow(out))) # max of raw times
 
   # Passing NULL for the group column should return a single row
   out <- wtf_compute_fluxes(x, NULL, "time", "conc", 1, 1,
                             fit_function = ff, normalize_time = TRUE)
   expect_s3_class(out, "data.frame")
   expect_identical(nrow(out), 1L) # one row
   expect_identical(out$time, rep(mean(times), nrow(out))) # mean of raw times
+  expect_identical(out$time_min, rep(min(times), nrow(out))) # min of raw times
+  expect_identical(out$time_max, rep(max(times), nrow(out))) # max of raw times
 })
diff --git a/vignettes/intro-to-whattheflux.Rmd b/vignettes/intro-to-whattheflux.Rmd
@@ -69,9 +69,11 @@ using the `TIMESTAMP` column that `wtf_read_LI7810` helpfully created
 when it read the data file.
 
 ```{r matching, fig.width=8}
-dat$metadat_row <- wtf_metadata_match(dat$TIMESTAMP, 
-                                      metadat$Date, metadat$Start_time,
-                                      metadat$Dead_band, metadat$Obs_length)
+dat$metadat_row <- wtf_metadata_match(
+  data_timestamps = dat$TIMESTAMP,
+  start_dates = metadat$Date,
+  start_times = metadat$Start_time,
+  obs_lengths = metadat$Obs_length + 10) # 10 is expected dead band length
 
 # Based on the row match information, add a "Plot" column to the data
 dat$Plot <- metadat$Plot[dat$metadat_row]
@@ -98,9 +100,11 @@ Here, we'll just change the values programmatically and re-match:
 
 ```{r matching2, fig.width=8}
 metadat$Obs_length[3:5] <- c(30, 45, 45)
-dat$metadat_row <- wtf_metadata_match(dat$TIMESTAMP, 
-                                      metadat$Date, metadat$Start_time,
-                                      metadat$Dead_band, metadat$Obs_length)
+dat$metadat_row <- wtf_metadata_match(
+  data_timestamps = dat$TIMESTAMP,
+  start_dates = metadat$Date,
+  start_times = metadat$Start_time,
+  obs_lengths = metadat$Obs_length + 10)
 dat$Plot <- metadat$Plot[dat$metadat_row]
 
 p %+% dat
@@ -118,7 +122,8 @@ QA/QC information.
 fluxes <- wtf_compute_fluxes(dat,
                            group_column = "Plot", 
                            time_column = "TIMESTAMP", 
-                           conc_column = "CO2")
+                           conc_column = "CO2",
+                           dead_band = 10)
 
 # By default, wtf_compute_fluxes returns a data.frame with one row per
 # grouping variable value (i.e., per measurement). The first column is the
@@ -130,6 +135,9 @@ fluxes <- wtf_compute_fluxes(dat,
 # For clarity, print out only a subset of the columns 
 fluxes[c(1:2, 4, 14, 15, 20)]
 
+# The data frame also has columns TIMESTAMP_min and TIMESTAMP_max,
+# giving the entire period over which the flux was computed.
+
 ggplot(fluxes, aes(Plot, slope_estimate, color = adj.r.squared)) +
   geom_point() +
   geom_linerange(aes(ymin = slope_estimate - slope_std.error,