Skip to content

Commit

Permalink
handling of additional edge cases (#423)
Browse files Browse the repository at this point in the history
* handling of additional edge cases

* use arrange to fix tests

* fixes

* typo

* more tests

* remove dup test
  • Loading branch information
edward-burn authored Mar 25, 2024
1 parent b515853 commit 737c5e7
Show file tree
Hide file tree
Showing 3 changed files with 148 additions and 12 deletions.
18 changes: 10 additions & 8 deletions R/cohortTransformations.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@ cohort_collapse <- function(x) {
# TODO do we need to confirm this assumption?

con <- x$src$con
min_start_sql <- dbplyr::sql(glue::glue('max({DBI::dbQuoteIdentifier(con, "cohort_start_date")})'))
min_start_sql <- dbplyr::sql(glue::glue('min({DBI::dbQuoteIdentifier(con, "cohort_start_date")})'))
max_start_sql <- dbplyr::sql(glue::glue('max({DBI::dbQuoteIdentifier(con, "cohort_start_date")})'))
max_end_sql <- dbplyr::sql(glue::glue('max({DBI::dbQuoteIdentifier(con, "cohort_end_date")})'))

x <- x %>%
dplyr::distinct() %>%
dplyr::mutate(dur = !!datediff("cohort_start_date",
"cohort_end_date")) %>%
dplyr::group_by(.data$cohort_definition_id, .data$subject_id, .add = FALSE) %>%
Expand All @@ -24,7 +26,7 @@ cohort_collapse <- function(x) {
dplyr::mutate(
prev_start = dplyr::coalesce(
!!dbplyr::win_over(
min_start_sql,
max_start_sql,
partition = c("cohort_definition_id", "subject_id"),
frame = c(-Inf, -1),
order = c("cohort_start_date", "dur"),
Expand Down Expand Up @@ -53,23 +55,23 @@ cohort_collapse <- function(x) {
x <- x %>%
dplyr::group_by(.data$cohort_definition_id, .data$subject_id, .add = FALSE) %>%
dbplyr::window_order(.data$cohort_definition_id, .data$subject_id,
.data$prev_start) %>%
.data$cohort_start_date, .data$dur) %>%
dplyr::mutate(groups = cumsum(
dplyr::case_when(is.na(.data$prev_start) ~ 0L,
dplyr::case_when(is.na(.data$prev_start) ~ NA,
!is.na(.data$prev_start) &&
.data$prev_start <= .data$cohort_start_date &&
.data$cohort_start_date <= .data$prev_end ~ 0L,
TRUE ~ 1L)
))

x <- x %>%
x <- x %>%
dplyr::mutate(groups = dplyr::if_else(
is.na(.data$prev_start) &&
.data$cohort_end_date < .data$next_start,
is.na(.data$groups) &&
.data$cohort_end_date >= .data$next_start,
0L, .data$groups
))

x <- x |>
x <- x %>%
dplyr::group_by(.data$cohort_definition_id,
.data$subject_id, .data$groups, .add = FALSE) %>%
dplyr::summarize(cohort_start_date = min(.data$cohort_start_date, na.rm = TRUE),
Expand Down
121 changes: 119 additions & 2 deletions tests/testthat/test-db-cohortTransformations.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
test_cohort_collapse <- function(con, write_schema) {
test_cohort_collapse <- function(con, cdm_schema, write_schema) {

cdm <- cdm_from_con(
con = con, cdm_name = "test", cdm_schema = cdm_schema,
write_schema = write_schema
)

# Nuria's examples
cohort_input <- tibble::tribble(
Expand Down Expand Up @@ -88,6 +93,117 @@ test_cohort_collapse <- function(con, write_schema) {

DBI::dbRemoveTable(con, inSchema(write_schema, "tmp_cohort_collapse_input", dbms = dbms(con)))

# another example
ct_test_cohort <- dplyr::tibble(cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = as.Date(c(
"1950-04-18",
"1956-05-30",
"1956-05-30")),
cohort_end_date = as.Date(c(
"1950-05-02",
"1956-05-30",
"1956-08-28")))
ct_test_cohort_expected <- dplyr::tibble(cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = as.Date(c(
"1950-04-18",
"1956-05-30")),
cohort_end_date = as.Date(c(
"1950-05-02",
"1956-08-28")))
cdm <- insertTable(cdm, name = "ct_test_cohort", table = ct_test_cohort)
expect_equal(CDMConnector:::cohort_collapse(cdm$ct_test_cohort) %>%
dplyr::collect() %>%
dplyr::arrange(cohort_start_date),
ct_test_cohort_expected)

ct_test_cohort <- dplyr::tibble(cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = as.Date(c(
"1950-04-18",
"1956-05-30",
"1956-05-30")),
cohort_end_date = as.Date(c(
"1950-05-02",
"1956-08-28",
"1956-05-30")))
ct_test_cohort_expected <- dplyr::tibble(cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = as.Date(c(
"1950-04-18",
"1956-05-30")),
cohort_end_date = as.Date(c(
"1950-05-02",
"1956-08-28")))
cdm <- insertTable(cdm, name = "ct_test_cohort", table = ct_test_cohort)
expect_equal(CDMConnector:::cohort_collapse(cdm$ct_test_cohort) %>%
dplyr::collect() %>%
dplyr::arrange(cohort_start_date),
ct_test_cohort_expected)

ct_test_cohort <- dplyr::tibble(cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = as.Date(c(
"1950-04-18",
"1956-05-30",
"1956-05-30")),
cohort_end_date = as.Date(c(
"1950-05-02",
"1956-08-28",
"1956-06-10")))
ct_test_cohort_expected <- dplyr::tibble(cohort_definition_id = 1L,
subject_id = 1L,
cohort_start_date = as.Date(c(
"1950-04-18",
"1956-05-30")),
cohort_end_date = as.Date(c(
"1950-05-02",
"1956-08-28")))
cdm <- insertTable(cdm, name = "ct_test_cohort", table = ct_test_cohort)
expect_equal(CDMConnector:::cohort_collapse(cdm$ct_test_cohort) %>%
dplyr::collect() %>%
dplyr::arrange(cohort_start_date),
ct_test_cohort_expected)

# multiple cohort ids
ct_test_cohort <- dplyr::tibble(cohort_definition_id = c(1L,1L,1L,2L, 2L, 2L),
subject_id = c(1L,1L,1L,2L, 1L, 1L),
cohort_start_date = as.Date(c(
"1950-04-18",
"1956-05-30",
"1956-05-30",
"2005-01-01",
"2000-01-01",
"2000-01-03")),
cohort_end_date = as.Date(c(
"1950-05-02",
"1956-08-28",
"1956-06-10",
"2005-01-01",
"2000-01-10",
"2000-01-08")))
ct_test_cohort_expected <- dplyr::tibble(cohort_definition_id = c(1L,1L, 2L, 2L),
subject_id = c(1L,1L,2L, 1L),
cohort_start_date = as.Date(c(
"1950-04-18",
"1956-05-30",
"2005-01-01",
"2000-01-01")),
cohort_end_date = as.Date(c(
"1950-05-02",
"1956-08-28",
"2005-01-01",
"2000-01-10")))
cdm <- insertTable(cdm, name = "ct_test_cohort", table = ct_test_cohort)
expect_equal(CDMConnector:::cohort_collapse(cdm$ct_test_cohort) %>%
dplyr::collect() %>%
dplyr::arrange(cohort_start_date),
ct_test_cohort_expected %>%
dplyr::arrange(cohort_start_date))
dropTable(cdm, name = "ct_test_cohort")



# test every case (Allen's interval algebra) for two intervals and two people
intervals <- tibble::tribble(
Expand Down Expand Up @@ -257,10 +373,11 @@ for (dbtype in dbToTest) {
test_that(glue::glue("{dbtype} - cohort_collapse"), {
if (!(dbtype %in% ciTestDbs)) skip_on_ci()
if (dbtype != "duckdb") skip_on_cran() else skip_if_not_installed("duckdb")
cdm_schema <- get_cdm_schema(dbtype)
write_schema <- get_write_schema(dbtype)
con <- get_connection(dbtype)
skip_if(any(write_schema == "") || is.null(con))
test_cohort_collapse(con, write_schema)
test_cohort_collapse(con, cdm_schema, write_schema)
disconnect(con)
})
}
Expand Down
21 changes: 19 additions & 2 deletions tests/testthat/test-db-generateConceptCohortSet.R
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,6 @@ test_generate_concept_cohort_set <- function(con, cdm_schema, write_schema) {
expect_setequal(unique(expected$subject_id), unique(actual$subject_id))
expect_equal(cohortCount(cdm$gibleed),
cohortCount(cdm$gibleed2))
expect_equal(as.integer(sort(cdm$gibleed |> pull("subject_id"))),
as.integer(sort(cdm$gibleed2 |> pull("subject_id"))))
}

# all occurrences (no descendants) ----
Expand Down Expand Up @@ -207,6 +205,25 @@ test_generate_concept_cohort_set <- function(con, cdm_schema, write_schema) {
attr(actual, 'cohort_set') <- attr(expected, 'cohort_set') <- NULL
expect_equal(actual, expected)

# multiple cohort generation ------
cohort <- readCohortSet(system.file("cohorts3", package = "CDMConnector"))
cdm <- generateCohortSet(cdm, cohortSet = cohort, name = "gibleed2",
overwrite = TRUE)

cdm <- generateConceptCohortSet(
cdm = cdm,
conceptSet = list("acetaminophen_1" = 1127433,
"acetaminophen_2" = 1127433),
name = "acetaminophen",
limit = "all",
end = "event_end_date",
overwrite = TRUE
)
# should have two identical cohorts
expect_equal(length(cohortCount(cdm$acetaminophen) %>%
dplyr::select("number_records") |>
dplyr::distinct() |>
dplyr::pull()), 1)

# cohort generation with a cohort subset ------
# create our main cohort of interest
Expand Down

0 comments on commit 737c5e7

Please sign in to comment.