Skip to content

Commit

Permalink
Merge pull request #20 from thomaszwagerman/tolerance_implementation
Browse files Browse the repository at this point in the history
Tolerance implementation
  • Loading branch information
thomaszwagerman authored Oct 29, 2024
2 parents e74a76e + 46b9141 commit f653e32
Show file tree
Hide file tree
Showing 11 changed files with 161 additions and 70 deletions.
48 changes: 28 additions & 20 deletions R/catch.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@
#'
#' The underlying functionality is handled by `create_object_list()`.
#'
#' @param df_current data.frame, the newest/current version of dataset x.
#' @param df_previous data.frame, the old version of dataset, for example x - t1.
#' @param datetime_variable character, which variable to use as unique ID to join `df_current` and `df_previous`. Usually a "datetime" variable.
#' @inheritParams create_object_list
#'
#' @returns A dataframe which contains only rows of `df_current` that have changes from `df_previous`, but without new rows.
#' also returns a waldo object as in `loupe()`.
Expand All @@ -28,30 +26,40 @@
#' df_caught
#'
#' @export
catch <- function(df_current, df_previous, datetime_variable) {
catch <- function(df_current, df_previous, datetime_variable, ...) {
butterfly_object_list <- create_object_list(
df_current,
df_previous,
datetime_variable
datetime_variable,
...
)

# By using an inner join, we drop any row which does not match in
# df_previous.
df_rows_changed_from_previous <- suppressMessages(
dplyr::anti_join(
butterfly_object_list$df_current_without_new_row,
df_previous
if (butterfly_object_list$butterfly_status == TRUE) {
cli::cat_bullet(
"There are no differences, so there are no rows to return Did you specify a tolerance that exceeds number of differences?",
bullet = "info",
col = "orange",
bullet_col = "orange"
)
} else {
# By using an inner join, we drop any row which does not match in
# df_previous.
df_rows_changed_from_previous <- suppressMessages(
dplyr::anti_join(
butterfly_object_list$df_current_without_new_row,
df_previous
)
)
)

cli::cat_line()
cli::cat_line()

cli::cat_bullet(
"Only these rows are returned.",
bullet = "info",
col = "orange",
bullet_col = "orange"
)
cli::cat_bullet(
"Only these rows are returned.",
bullet = "info",
col = "orange",
bullet_col = "orange"
)

return(df_rows_changed_from_previous)
return(df_rows_changed_from_previous)
}
}
22 changes: 20 additions & 2 deletions R/create_object_list.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#' @param df_previous data.frame, the old version of dataset, for example x - t1.
#' @param datetime_variable string, which variable to use as unique ID to join
#' `df_current` and `df_previous`. Usually a "datetime" variable.
#' @param ... Other `waldo::compare()` arguments can be supplied here,
#' such as `tolerance` or `max_diffs`. See `?waldo::compare()` for a full list.
#'
#' @returns A list containing boolean where TRUE indicates no changes to
#' previous data and FALSE indicates unexpected changes, a dataframe of
Expand All @@ -32,8 +34,23 @@
#'
#' butterfly_object_list
#'
#' # You can pass other `waldo::compare()` options such as tolerance here
#' butterfly_object_list <- butterfly::create_object_list(
#' butterflycount$march, # This is your new or current dataset
#' butterflycount$february, # This is the previous version you are comparing it to
#' datetime_variable = "time", # This is the unique ID variable they have in common
#' tolerance = 2
#' )
#'
#' butterfly_object_list
#'
#' @export
create_object_list <- function(df_current, df_previous, datetime_variable) {
create_object_list <- function(
df_current,
df_previous,
datetime_variable,
...
) {
# Check input is as expected
stopifnot("`df_current` must be a data.frame" = is.data.frame(df_current))
stopifnot("`df_previous` must be a data.frame" = is.data.frame(df_previous))
Expand Down Expand Up @@ -70,7 +87,8 @@ create_object_list <- function(df_current, df_previous, datetime_variable) {
# Compare the current data with the previous data, without "new" values
waldo_object <- waldo::compare(
df_current_without_new_row,
df_previous
df_previous,
...
)

# Creating a feedback message depending on the waldo object's output
Expand Down
9 changes: 4 additions & 5 deletions R/loupe.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,7 @@
#'
#' The underlying functionality is handled by `create_object_list()`.
#'
#' @param df_current data.frame, the newest/current version of dataset x.
#' @param df_previous data.frame, the old version of dataset, for example x - t1.
#' @param datetime_variable string, which variable to use as unique ID to join `df_current` and `df_previous`. Usually a "datetime" variable.
#' @inheritParams create_object_list
#'
#' @returns A boolean where TRUE indicates no changes to previous data and FALSE indicates unexpected changes.
#'
Expand All @@ -45,11 +43,12 @@
#' )
#'
#' @export
loupe <- function(df_current, df_previous, datetime_variable) {
loupe <- function(df_current, df_previous, datetime_variable, ...) {
butterfly_object_list <- create_object_list(
df_current,
df_previous,
datetime_variable
datetime_variable,
...
)

return(butterfly_object_list$butterfly_status)
Expand Down
80 changes: 45 additions & 35 deletions R/release.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@
#' which contains the new rows (if present) but matched rows which contain
#' changes from previous data will be dropped.
#'
#' @param df_current data.frame, the newest/current version of dataset x.
#' @param df_previous data.frame, the old version of dataset, for example x - t1.
#' @param datetime_variable string, which variable to use as unique ID to join `df_current` and `df_previous`. Usually a "datetime" variable.
#' @inheritParams create_object_list
#' @param include_new boolean, should new rows be included? Default is TRUE.
#'
#' @returns A dataframe which contains only rows of `df_current` that have not changed from `df_previous`, and includes new rows.
Expand All @@ -28,52 +26,64 @@
#' df_released
#'
#' @export
release <- function(df_current, df_previous, datetime_variable, include_new = TRUE) {
release <- function(df_current, df_previous, datetime_variable, include_new = TRUE, ...) {
butterfly_object_list <- create_object_list(
df_current,
df_previous,
datetime_variable
datetime_variable,
...
)

# By using an inner join, we drop any row which does not match in
# df_previous.
df_current_without_changed_rows <- suppressMessages(
dplyr::inner_join(
butterfly_object_list$df_current_without_new_row,
df_previous
)
)

# Returng the dataframe with or without new rows added
if (include_new == TRUE) {
# Then we add the new rows back in and return the dataframe as such
df_release <- dplyr::bind_rows(
butterfly_object_list$df_current_new_rows,
df_current_without_changed_rows
)

cli::cat_line()
if (butterfly_object_list$butterfly_status == TRUE){

cli::cat_bullet(
"These will be dropped, but new rows are included.",
"There are no differences, so there are no rows to drop. Did you specify a tolerance that exceeds number of differences?",
bullet = "info",
col = "orange",
bullet_col = "orange"
)

return(df_release)
} else {
# By using an inner join, we drop any row which does not match in
# df_previous.
df_current_without_changed_rows <- suppressMessages(
dplyr::inner_join(
butterfly_object_list$df_current_without_new_row,
df_previous
)
)

} else if (include_new == FALSE) {
cli::cat_line()
# Returng the dataframe with or without new rows added
if (include_new == TRUE) {
# Then we add the new rows back in and return the dataframe as such
df_release <- dplyr::bind_rows(
butterfly_object_list$df_current_new_rows,
df_current_without_changed_rows
)

cli::cat_bullet(
"These will be dropped, along with new rows.",
bullet = "info",
col = "orange",
bullet_col = "orange"
)
cli::cat_line()

cli::cat_bullet(
"These will be dropped, but new rows are included.",
bullet = "info",
col = "orange",
bullet_col = "orange"
)

return(df_release)

} else if (include_new == FALSE) {
cli::cat_line()

cli::cat_bullet(
"These will be dropped, along with new rows.",
bullet = "info",
col = "orange",
bullet_col = "orange"
)

# If new rows are not included, simply return the df without changed rows
return(df_current_without_changed_rows)
# If new rows are not included, simply return the df without changed rows
return(df_current_without_changed_rows)
}
}
}
2 changes: 1 addition & 1 deletion man/butterfly-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 6 additions & 2 deletions man/catch.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 14 additions & 1 deletion man/create_object_list.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 6 additions & 2 deletions man/loupe.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 6 additions & 2 deletions man/release.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions tests/testthat/test-create_object_list.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,17 @@ test_that("comparison object is returned when not equal", {
0
)
})

test_that("passing of additional waldo arguments works as expected", {
# Adding a tolerance of 2 should now "ignore" the single change
create_object_list_output <- create_object_list(
butterflycount$march,
butterflycount$february,
datetime_variable = "time",
tolerance = 2
)

testthat::expect_true(
create_object_list_output$butterfly_status
)
})
17 changes: 17 additions & 0 deletions vignettes/butterfly.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,23 @@ butterfly::loupe(

`butterfly` follows the `waldo` philosophy of erring on the side of providing too much information, rather than too little. It will give a detailed feedback message on the status between two objects.

### Additional arguments from `waldo::compare()`

You have the flexibility to pass further arguments that `waldo::compare()` accepts, to any butterfly function, for instance to specify the tolerance.

If we add a tolerance of 2 to the previous example, no differences should be returned:

```{r tolerance_example}
butterfly::loupe(
butterflycount$march,
butterflycount$february,
datetime_variable = "time",
tolerance = 2 # <- setting a tolerance of 2
)
```

Call `?waldo::compare()` to see the full list of arguments.

## Extracting unexpected changes: catch()

You might want to return changed rows as a dataframe. For this `butterfly::catch()`is provided.
Expand Down

0 comments on commit f653e32

Please sign in to comment.