Skip to content

Commit

Permalink
initial draft
Browse files Browse the repository at this point in the history
  • Loading branch information
seniort committed Jul 5, 2024
1 parent 1268019 commit d368102
Show file tree
Hide file tree
Showing 9 changed files with 126 additions and 14 deletions.
6 changes: 5 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ Authors@R: c(
family = "Stoilova",
role = c("aut"),
email = "jana.stoilova@roche.com"),
person(given = "Tamara",
family = "Senior",
role = c("aut"),
email = "tamara.senior@roche.com"),
person("GlaxoSmithKline LLC", role = c("cph", "fnd")),
person("F. Hoffmann-La Roche AG", role = c("cph", "fnd")),
person("Atorus Research LLC", role = c("cph", "fnd"))
Expand All @@ -40,7 +44,7 @@ Suggests:
haven,
covr,
safetyData,
admiral.test,
pharmaversesdtm,
spelling
Config/testthat/edition: 3
URL: https://github.com/pharmaverse/metatools,
Expand Down
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ export(build_from_derived)
export(build_qnam)
export(check_ct_col)
export(check_ct_data)
export(check_unique_keys)
export(check_variables)
export(combine_supp)
export(convert_var_to_fct)
Expand All @@ -23,6 +24,7 @@ export(set_variable_labels)
export(sort_by_key)
importFrom(dplyr,"%>%")
importFrom(dplyr,across)
importFrom(dplyr,add_count)
importFrom(dplyr,all_of)
importFrom(dplyr,anti_join)
importFrom(dplyr,any_of)
Expand All @@ -46,6 +48,7 @@ importFrom(dplyr,rename)
importFrom(dplyr,select)
importFrom(magrittr,"%>%")
importFrom(metacore,get_control_term)
importFrom(metacore,get_keys)
importFrom(metacore,select_dataset)
importFrom(purrr,discard)
importFrom(purrr,keep)
Expand All @@ -62,6 +65,7 @@ importFrom(rlang,":=")
importFrom(rlang,as_label)
importFrom(rlang,as_name)
importFrom(rlang,enexpr)
importFrom(rlang,expr)
importFrom(rlang,is_named)
importFrom(rlang,list2)
importFrom(rlang,set_names)
Expand Down
6 changes: 2 additions & 4 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
* Breaking change: `combine_supp()` requires that the QNAM columns are not in the source dataset (#64)
* `combine_supp()` now allows multiple `QNAM` values to go to the same `IDVAR` (#63)
* Allow supp data to be zero-row with `combine_supp()` (#45)
* Enhance `check_ct_data()` so that `na_acceptable` can now accept a vector of variables. Also add new argument `omit_vars` to control if any variables should be skipped when doing controlled terminology checks (#57).
* Enhance `check_ct_data()` so that `na_acceptable` can now accept a vector of variables. Also add new argument `omit_vars` to control if any variables should be skipped when doing controlled terminology checks (#57)
* Add `check_unique_keys()` to check the uniqueness of records in the dataset by variable keys (#52)

# metatools 0.1.4
* correct bug with `combine_supp()` when the data and the supp have white space. Now it will be trimmed before attempting to merge
Expand All @@ -20,8 +21,5 @@
* add error to `combine_supp()` to report when not all the rows in the supp have merged
* add `floating_pt_correction` argument to `combine_supp()` used for when there are floating point errors with `IDVARVAL`


# metatools 0.1.1

* Based on tester feedback, remove any row in supplemental qualifiers that are empty and rearranged the columns

59 changes: 56 additions & 3 deletions R/checks.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ check_ct_col <- function(data, metacore, var, na_acceptable = NULL) {
#' acceptability of missing values is based on if the core for the variable is
#' "Required" in the `metacore` object. If set to `TRUE` then will pass check
#' if values are in the control terminology or are missing. If set to
#' `FALSE`then NA will not be acceptable.e
#' `FALSE` then NA will not be acceptable.
#'
#' @return vector
#' @importFrom metacore get_control_term
Expand Down Expand Up @@ -237,8 +237,8 @@ check_vars_in_data <- function(vars, vars_name, data) {
#'
#' This function checks the variables in the dataset against the variables
#' defined in the metacore specifications. If everything matches the function
#' will return `TRUE` and a message starting everything is as expected. If there
#' are additional or missing variables and error will explain the discrepancies
#' will print a message stating everything is as expected. If there
#' are additional or missing variables an error will explain the discrepancies
#' @param data Dataset to check
#' @param metacore metacore object that only contains the specifications for the
#' dataset of interest.
Expand Down Expand Up @@ -290,3 +290,56 @@ check_variables <- function(data, metacore, dataset_name = NULL) {
}
data
}

#' Check Uniqueness of Records by Key
#'
#' This function checks the uniqueness of records in the dataset by key using
#' `get_keys` from the metacore package. If the key uniquely identifies each
#' record the function will print a message stating everything is as expected.
#' If records are not uniquely identified an error will explain the duplicates.
#' @param data Dataset to check
#' @param metacore metacore object that only contains the specifications for the
#' dataset of interest.
#' @param dataset_name Optional string to specify the dataset. This is only
#' needed if the metacore object provided hasn't already been subsetted.
#'
#' @return message if the key uniquely identifies each dataset record, and error otherwise
#' @export
#' @importFrom metacore get_keys
#' @importFrom dplyr pull add_count
#' @importFrom rlang expr
#'
#' @examples
#' library(haven)
#' library(metacore)
#' library(magrittr)
#' load(metacore_example("pilot_ADaM.rda"))
#' spec <- metacore %>% select_dataset("ADSL")
#' data <- read_xpt(metatools_example("adsl.xpt"))
#' check_unique_keys(data, spec)
check_unique_keys <- function(data, metacore, dataset_name = NULL) {
metacore <- make_lone_dataset(metacore, dataset_name)
keys <- get_keys(metacore,expr(!!metacore$ds_spec$dataset))
var_list <- keys %>%
pull(variable)
missing <- var_list %>%
discard(~ . %in% names(data))
if (length(missing) > 0) {
stop(paste0(
"The following variable keys are missing in the dataset:\n",
paste0(missing, collapse = "\n")
))
}
grouped <- data %>%
group_by(pick(!!keys$variable)) %>%
add_count() %>%
filter(n != 1)
if (nrow(grouped) == 0) {
message("Keys uniquely identify records")
} else {
stop(paste0("Keys do not uniquely identify records\n",
"variable keys:\n",
paste0(var_list, collapse = "\n")))
}
data
}
35 changes: 35 additions & 0 deletions man/check_unique_keys.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/check_variables.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/get_bad_ct.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions tests/testthat/test-checks.R
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,21 @@ test_that("variable_check works correctly", {
data_mis_ex <- data_extra %>% select(-1)
expect_error(check_variables(data_mis_ex, spec))
})

test_that("check_unique_keys works as expected", {
#check requirement for subsetted metacore object or a dataset name
expect_error(check_unique_keys(data, metacore))
#check missing variable keys error
expect_error(check_unique_keys(data, metacore, dataset_name = "ADVS"))
#check works correctly when records are unique
expect_message(check_unique_keys(data, metacore, dataset_name = "ADSL"))
#check works correctly when records are not unique
test <- build_from_derived(metacore,
dataset_name = "ADLBHY",
ds_list = list("LB" = safetyData::sdtm_lb,
"ADSL" = safetyData::adam_adsl,
"ADLBC" = safetyData::adam_adlbc),
predecessor_only = FALSE,
keep = FALSE)
expect_error(check_unique_keys(test, metacore, dataset_name = "ADLBHY"))
})
6 changes: 3 additions & 3 deletions tests/testthat/test-supp.R
Original file line number Diff line number Diff line change
Expand Up @@ -182,14 +182,14 @@ test_that("combine_supp", {

test_that("combine_supp works with different IDVARVAL classes", {
expect_equal(
combine_supp(admiral.test::admiral_ae, admiral.test::admiral_suppae) %>%
combine_supp(pharmaversesdtm::ae, pharmaversesdtm::suppae) %>%
pull(AESEQ),
admiral.test::admiral_ae %>% pull(AESEQ)
pharmaversesdtm::ae %>% pull(AESEQ)
)
})

test_that("combine_supp works with without QEVAL", {
expect_silent(combine_supp(admiral.test::admiral_tr, admiral.test::admiral_supptr))
expect_silent(combine_supp(pharmaversesdtm::tr_onco, pharmaversesdtm::supptr_onco))
})

test_that("supp data that does not match the main data will raise a warning", {
Expand Down

0 comments on commit d368102

Please sign in to comment.