Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Close #57: Enhance check_ct_data #67

Merged
merged 9 commits into from
Apr 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ Authors@R: c(
role = c("aut"),
email = "mike.stackhouse@atorusresearch.com",
comment = c(ORCID = "0000-0001-6030-723X")),
person(given = "Jana",
family = "Stoilova",
role = c("aut"),
email = "jana.stoilova@roche.com"),
person("GlaxoSmithKline LLC", role = c("cph", "fnd")),
person("F. Hoffmann-La Roche AG", role = c("cph", "fnd")),
person("Atorus Research LLC", role = c("cph", "fnd"))
Expand All @@ -39,6 +43,7 @@ Suggests:
admiral.test,
spelling
Config/testthat/edition: 3
URL: https://pharmaverse.github.io/metatools/
URL: https://github.com/pharmaverse/metatools,
https://pharmaverse.github.io/metatools/
BugReports: https://github.com/pharmaverse/metatools/issues
Language: en-US
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ importFrom(metacore,select_dataset)
importFrom(purrr,discard)
importFrom(purrr,keep)
importFrom(purrr,map)
importFrom(purrr,map2)
importFrom(purrr,map_dfr)
importFrom(purrr,map_lgl)
importFrom(purrr,pmap_dfr)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
* Breaking change: `combine_supp()` requires that the QNAM columns are not in the source dataset (#64)
* `combine_supp()` now allows multiple `QNAM` values to go to the same `IDVAR` (#63)
* Allow supp data to be zero-row with `combine_supp()` (#45)
* Enhance `check_ct_data()` so that `na_acceptable` can now accept a vector of variables. Also add new argument `omit_vars` to control if any variables should be skipped when doing controlled terminology checks (#57).

# metatools 0.1.4
* correct bug with `combine_supp()` when the data and the supp have white space. Now it will be trimmed before attempting to merge
Expand Down
80 changes: 69 additions & 11 deletions R/checks.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,17 @@ check_ct_col <- function(data, metacore, var, na_acceptable = NULL) {
#' @importFrom dplyr pull
#' @importFrom stringr str_remove_all
#' @export
#'
#' @examples
#' library(haven)
#' library(metacore)
#' library(magrittr)
#' load(metacore_example("pilot_ADaM.rda"))
#' spec <- metacore %>% select_dataset("ADSL")
#' data <- read_xpt(metatools_example("adsl.xpt"))
#' get_bad_ct(data, spec, "DISCONFL")
#' get_bad_ct(data, spec, "DISCONFL", na_acceptable = FALSE)
#'
get_bad_ct <- function(data, metacore, var, na_acceptable = NULL){
col_name_str <- as_label(enexpr(var)) %>%
str_remove_all("\"")
Expand Down Expand Up @@ -107,13 +118,17 @@ get_bad_ct <- function(data, metacore, var, na_acceptable = NULL){
#' dataset of interest. If any variable has different codelists for different
#' datasets the metacore object will need to be subsetted using
#' `select_dataset` from the metacore package.
#' @param na_acceptable Logical value, set to `NULL` by default, so the
#' acceptability of missing values is based on if the core for the variable is
#' "Required" in the `metacore` object. If set to `TRUE` then will
#' @param na_acceptable `logical` value or `character` vector, set to `NULL` by default.
#' `NULL` sets the acceptability of missing values based on if the core for
#' the variable is "Required" in the `metacore` object. If set to `TRUE` then will
#' pass check if values are in the control terminology or are missing. If set
#' to `FALSE`then NA will not be acceptable.
#' to `FALSE` then NA will not be acceptable. If set to a `character` vector then
#' only the specified variables may contain NA values.
#' @param omit_vars `character` vector indicating which variables should be skipped
#' when doing the controlled terminology checks. Internally, `omit_vars` is
#' evaluated before `na_acceptable`.
#'
#' @importFrom purrr map_lgl map safely discard
#' @importFrom purrr map_lgl map map2 safely discard
#' @importFrom dplyr filter pull select inner_join
#' @importFrom stringr str_remove
#' @return Given data if all columns pass. It will error otherwise
Expand All @@ -126,8 +141,15 @@ get_bad_ct <- function(data, metacore, var, na_acceptable = NULL){
#' load(metacore_example("pilot_ADaM.rda"))
#' spec <- metacore %>% select_dataset("ADSL")
#' data <- read_xpt(metatools_example("adsl.xpt"))
#'
#' check_ct_data(data, spec)
check_ct_data <- function(data, metacore, na_acceptable = NULL) {
#' \dontrun{
#' # These examples produce errors:
#' check_ct_data(data, spec, na_acceptable = FALSE)
#' check_ct_data(data, spec, na_acceptable = FALSE, omit_vars = "DISCONFL")
#' check_ct_data(data, spec, na_acceptable = c("DSRAEFL", "DCSREAS"), omit_vars = "DISCONFL")
#'}
check_ct_data <- function(data, metacore, na_acceptable = NULL, omit_vars = NULL) {
codes_in_data <- metacore$value_spec %>%
filter(variable %in% names(data), !is.na(code_id)) %>%
pull(code_id) %>%
Expand All @@ -142,13 +164,38 @@ check_ct_data <- function(data, metacore, na_acceptable = NULL) {
filter(variable %in% names(data)) %>%
pull(variable) %>%
unique()

# Subset cols_to_check by omit_vars
if (is.character(omit_vars)) {
check_vars_in_data(omit_vars, "omit_vars", data)
cols_to_check <- setdiff(cols_to_check, omit_vars)
}

# send all variables through check_ct_col
safe_chk <- safely(check_ct_col)
results <- cols_to_check %>%
map(function(x) {
out <- safe_chk(data, metacore, {{ x }}, na_acceptable)
out$error
})

if (is.character(na_acceptable)) {
check_vars_in_data(na_acceptable, "na_acceptable", data)
new_na_acceptable <- rep(FALSE, length(cols_to_check))
new_na_acceptable[match(na_acceptable, cols_to_check)] <- TRUE

results <- map2(cols_to_check, new_na_acceptable, function(x, naac) {
out <- safe_chk(data, metacore, {{ x }}, naac)
out$error
})

} else if(is.logical(na_acceptable) || is.null(na_acceptable)) {
results <- cols_to_check %>%
map(function(x) {
out <- safe_chk(data, metacore, {{ x }}, na_acceptable)
out$error
})

} else {
stop("na_acceptable is not NULL, logical or character.", call. = FALSE)
}


# Write out warning message
test <- map_lgl(results, is.null)
if (all(test)) {
Expand All @@ -174,6 +221,17 @@ check_ct_data <- function(data, metacore, na_acceptable = NULL) {
}
}

check_vars_in_data <- function(vars, vars_name, data) {
if (!all(vars %in% names(data))) {
stop(
paste0(
"Not all variables from ", vars_name, " are in the data: ",
paste0(setdiff(vars, names(data)), collapse = ",")
),
call. = FALSE)
}
return(NULL)
}

#' Check Variable Names
#'
Expand Down
22 changes: 17 additions & 5 deletions man/check_ct_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions man/get_bad_ct.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 26 additions & 0 deletions tests/testthat/test-checks.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,22 @@ mod_ds_vars <- spec$ds_vars %>%
spec_mod <- metacore::metacore(spec$ds_spec, mod_ds_vars, spec$var_spec, spec$value_spec, spec$derivations, spec$codelist) %>%
suppressWarnings()

test_that("get_bad_ct works correctly", {

# test na_acceptable
expect_equal(get_bad_ct(data, spec, "DISCONFL"), character(0))
expect_equal(get_bad_ct(data, spec, "DISCONFL", TRUE), character(0))
expect_equal(get_bad_ct(data, spec, "DISCONFL", FALSE), "")

expect_equal(get_bad_ct(data, spec_mod, "DISCONFL"), "")
expect_equal(get_bad_ct(data, spec_mod, "DISCONFL", TRUE), character(0))
expect_equal(get_bad_ct(data, spec_mod, "DISCONFL", FALSE), "")

data_na <- data %>%
mutate(DISCONFL = if_else(dplyr::row_number() == 1, NA_character_, DISCONFL))
expect_equal(get_bad_ct(data_na, spec_mod, "DISCONFL"), c(NA_character_, ""))

})

test_that("check_ct_col works correctly", {
# Check it works with a character col
Expand Down Expand Up @@ -64,6 +80,16 @@ test_that("check_ct_data works correctly", {
"DSRAEFL does not have a unique control term, consider spcificing a dataset")
expect_error(check_ct_data(data, spec_mod))
expect_equal(check_ct_data(data, spec_mod, TRUE), data)

# Check character vector input for na_acceptable:
expect_error(check_ct_data(data, spec, na_acceptable = c("DISCONFL", "DSRAEFL")))
expect_error(check_ct_data(data, spec, 1))

# Check omit_vars:
expect_error(check_ct_data(data, spec, omit_vars = c("A", "B")))
expect_error(check_ct_data(data, spec, FALSE, omit_vars = c("DISCONFL", "DSRAEFL")))
expect_equal(check_ct_data(data, spec_mod, na_acceptable = NULL, omit_vars = "DISCONFL"), data)

})

test_that("variable_check works correctly", {
Expand Down