Skip to content

Commit

Permalink
Creating Queries That Return Cohort Information (#61)
Browse files Browse the repository at this point in the history
* added GetCohortSubjects

* updated runtests.jl

* Added additional test dependencies

* Fixed up imports and generating initial cohort

* Added test cohort definition asset

* added tests for GetCohortSubjects

* corrected

* added GetCohortSubjectStartDate

* test suite for GetCohortSubjectStartDate

* added a dispatch for GetDatabaseCohorts

* added GetCohortSubjectEndDate

* Added notes and fixed a docstring error

* requested changes made

* added requested changes

* added requested changes

* Small fix to return unique IDs and doc updates

* Fixed export signature

---------

Co-authored-by: TheCedarPrince <jacobszelko@gmail.com>
  • Loading branch information
Jay-sanjay and TheCedarPrince authored Nov 24, 2023
1 parent 5a0a09a commit 3200fd1
Show file tree
Hide file tree
Showing 7 changed files with 538 additions and 6 deletions.
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,17 @@ New minor release to introduce some new functionalities and novel extensions of

* GetDrugAmounts

- New functions for creating Cohort queries:

* GetDatabaseCohorts

* GetCohortSubjects

* GetCohortSubjectStartDate

* GetCohortSubjectEndDate


- Extensive test suite for new features

### Changed
Expand Down
4 changes: 4 additions & 0 deletions docs/src/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ GetDatabaseYearRange
GetDrugExposureIDs
GetDrugConceptIDs
GetDrugAmounts
GetDatabaseCohorts
GetCohortSubjects
GetCohortSubjectStartDate
GetCohortSubjectEndDate
GetVisitProcedure
```

Expand Down
323 changes: 322 additions & 1 deletion src/getters.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1564,4 +1564,325 @@ function GetVisitProcedure(

end

export GetDatabasePersonIDs, GetPatientState, GetPatientGender, GetPatientRace, GetPatientAgeGroup, GetPatientVisits, GetMostRecentConditions, GetMostRecentVisit, GetVisitCondition, GetPatientEthnicity, GetDatabaseYearRange, GetVisitPlaceOfService, GetVisitConcept, GetVisitDate, GetDrugExposureIDs, GetDrugConceptIDs, GetDrugAmounts, GetVisitProcedure
"""
GetCohortSubjects(cohort_ids, conn; tab = cohort)
Given a list of cohort IDs, find their corresponding subjects.
# Arguments:
- `cohort_ids` - list of `cohort_id`'s; each ID must be of subtype `Float64`
- `conn` - database connection using DBInterface
# Keyword Arguments:
- `tab` - the `SQLTable` representing the `cohort` table; default `cohort`
# Returns
- `df::DataFrame` - a two column `DataFrame` comprised of columns: `:cohort_definition_id` and `:subject_id`
"""
function GetCohortSubjects(
cohort_ids,
conn;
tab = cohort
)

df = DBInterface.execute(conn, GetCohortSubjects(cohort_ids; tab=tab)) |> DataFrame

return df

end

"""
function GetCohortSubjects(df:DataFrame, conn; tab = cohort)
Given a `DataFrame` with a `:cohort_definition_id` column, return the `DataFrame` with an associated `:subject_id` for each `cohort_definition_id` in the `DataFrame`
Multiple dispatch that accepts all other arguments like in `GetCohortSubjects(ids, conn; tab = cohort)`
"""

function GetCohortSubjects(

Check warning on line 1606 in src/getters.jl

View check run for this annotation

Codecov / codecov/patch

src/getters.jl#L1606

Added line #L1606 was not covered by tests
df::DataFrame,
conn;
tab = cohort
)

df_ids= df[:,"cohort_definition_id"]

Check warning on line 1612 in src/getters.jl

View check run for this annotation

Codecov / codecov/patch

src/getters.jl#L1612

Added line #L1612 was not covered by tests

return outerjoin(GetCohortSubjects(df_ids, conn; tab=tab), df, on = :cohort_definition_id)

Check warning on line 1614 in src/getters.jl

View check run for this annotation

Codecov / codecov/patch

src/getters.jl#L1614

Added line #L1614 was not covered by tests

end

"""
GetCohortSubjects(cohort_ids; tab = cohort)
Produces SQL statement that, given a list of `cohort_id`'s, finds the subjects associated with that cohort.
# Arguments:
- `cohort_ids` - list of `cohort_id`'s; each ID must be of subtype `Float64`
# Keyword Arguments:
- `tab` - the `SQLTable` representing the `cohort` table; default `cohort`
# Returns
- `df::DataFrame` - a two column `DataFrame` comprised of columns: `:cohort_definition_id` and `:subject_id`
"""
function GetCohortSubjects(
cohort_ids;
tab = cohort
)

sql =
From(tab) |>
Where(Fun.in(Get.cohort_definition_id, cohort_ids...)) |>
Select(Get.cohort_definition_id, Get.subject_id) |>
q -> render(q, dialect=dialect)

return String(sql)

end

"""
function GetCohortSubjectStartDate(cohort_ids, subject_ids, conn; tab=cohort)
Given a single or list of cohort IDs and subject IDs, return their start dates.
# Arguments:
- `cohort_ids` - list of `cohort_id`'s; each ID must be of subtype `Float64`
- `subject_id` - list of `subject_id`'s; each ID must be of subtype `Float64`
- `conn` - database connection using DBInterface
# Keyword Arguments:
- `tab` - the `SQLTable` representing the `cohort` table; default `cohort`
# Returns
- `df::DataFrame` - a three column `DataFrame` comprised of columns: `:cohort_definition_id` , `:subject_id` and `:cohort_start_date`
"""
function GetCohortSubjectStartDate(
cohort_ids,
subject_ids,
conn;
tab = cohort
)

df = DBInterface.execute(conn, GetCohortSubjectStartDate(cohort_ids, subject_ids; tab=tab)) |> DataFrame

return df

end

"""
function GetCohortSubjectStartDate(df:DataFrame, conn; tab = cohort)
Given a `DataFrame` with a `:cohort_definition_id` column and `:subject_id` column, return the `DataFrame` with an associated `:cohort_start_date` corresponding to a cohort's subject ID in the `DataFrame`
Multiple dispatch that accepts all other arguments like in `GetCohortSubjectStartDate(ids, conn; tab = cohort)`
"""
function GetCohortSubjectStartDate(

Check warning on line 1691 in src/getters.jl

View check run for this annotation

Codecov / codecov/patch

src/getters.jl#L1691

Added line #L1691 was not covered by tests
df::DataFrame,
conn;
tab = cohort
)

return outerjoin(GetCohortSubjectStartDate(df[:,"cohort_definition_id"], df[:,"subject_id"], conn; tab=tab), df, on = :cohort_definition_id)

Check warning on line 1697 in src/getters.jl

View check run for this annotation

Codecov / codecov/patch

src/getters.jl#L1697

Added line #L1697 was not covered by tests

end

"""
function GetCohortSubjectStartDate(cohort_ids, subject_ids; tab=cohort)
Given a list of cohort IDs and subject IDs return their start dates.
# Arguments:
- `cohort_ids` - list of `cohort_id`'s; each ID must be of subtype `Float64`
- `subject_id` - list of `subject_id`'s; each ID must be of subtype `Float64`
- `conn` - database connection using DBInterface
# Keyword Arguments:
- `tab` - the `SQLTable` representing the `cohort` table; default `cohort`
# Returns
- `df::DataFrame` - a three column `DataFrame` comprised of columns: `:cohort_definition_id` , `:subject_id` and `:cohort_start_date`
"""
function GetCohortSubjectStartDate(
cohort_ids,
subject_ids;
tab = cohort
)

sql =
From(tab) |>
Where(Fun.in(Get.cohort_definition_id, cohort_ids...)) |>
Where(Fun.in(Get.subject_id, subject_ids...)) |>
Select(Get.cohort_definition_id, Get.subject_id, Get.cohort_start_date) |>
q -> render(q, dialect=dialect)

return String(sql)

end

"""
function GetCohortSubjectEndDate(cohort_ids, subject_ids, conn; tab=cohort)
Given a list of cohort IDs and subject IDs return their end dates.
# Arguments:
- `cohort_ids` - list of `cohort_id`'s; each ID must be of subtype `Float64`
- `subject_id` - list of `subject_id`'s; each ID must be of subtype `Float64`
- `conn` - database connection using DBInterface
# Keyword Arguments:
- `tab` - the `SQLTable` representing the `cohort` table; default `cohort`
# Returns
- `df::DataFrame` - a three column `DataFrame` comprised of columns: `:cohort_definition_id` , `:subject_id` and `:cohort_end_date`
"""
function GetCohortSubjectEndDate(
cohort_ids,
subject_ids,
conn;
tab = cohort
)

df = DBInterface.execute(conn, GetCohortSubjectEndDate(cohort_ids, subject_ids; tab=tab)) |> DataFrame

return df

end

"""
function GetCohortSubjectEndDate(df:DataFrame, conn; tab = cohort)
Given a `DataFrame` with a `:cohort_definition_id` column and `:subject_id` column, return the `DataFrame` with an associated `:cohort_end_date` corresponding to a given `cohort_definition_id` and `subject_id` in the `DataFrame`
Multiple dispatch that accepts all other arguments like in `GetCohortSubjectEndDate(ids, conn; tab = cohort)`
"""
function GetCohortSubjectEndDate(

Check warning on line 1781 in src/getters.jl

View check run for this annotation

Codecov / codecov/patch

src/getters.jl#L1781

Added line #L1781 was not covered by tests
df::DataFrame,
conn;
tab = cohort
)

return outerjoin(GetCohortSubjectEndDate(df[:,"cohort_definition_id"], df[:,"subject_id"], conn; tab=tab), df, on = :cohort_definition_id)

Check warning on line 1787 in src/getters.jl

View check run for this annotation

Codecov / codecov/patch

src/getters.jl#L1787

Added line #L1787 was not covered by tests

end

"""
function GetCohortSubjectEndDate(cohort_ids; subject_ids; tab=cohort)
Given a list of cohort IDs and subject IDs return their end date.
# Arguments:
- `cohort_ids` - list of `cohort_id`'s; each ID must be of subtype `Float64`
- `subject_id` - list of `subject_id`'s; each ID must be of subtype `Float64`
- `conn` - database connection using DBInterface
# Keyword Arguments:
- `tab` - the `SQLTable` representing the `cohort` table; default `cohort`
# Returns
- `df::DataFrame` - a three column `DataFrame` comprised of columns: `:cohort_definition_id` , `:subject_id` and `:cohort_end_date`
"""
function GetCohortSubjectEndDate(
cohort_ids,
subject_ids;
tab=cohort
)

sql =
From(tab) |>
Where(Fun.in(Get.cohort_definition_id, cohort_ids...)) |>
Where(Fun.in(Get.subject_id, subject_ids...)) |>
Select(Get.cohort_definition_id, Get.subject_id, Get.cohort_end_date) |>
q -> render(q, dialect=dialect)

return String(sql)

end

"""
GetDatabaseCohorts(conn; tab=cohort)
Given a `DataFrame` returns all unique cohort_definition_id associated with a database.
#Arguments:
- `conn` - database connection using DBInterface
# Keyword Arguments:
- `tab` - the `SQLTable` representing the Cohort table; default `cohort`
# Returns
- `df::DataFrame` - a one column `DataFrame` comprised of columns: `:cohort_definition_id`
"""
function GetDatabaseCohorts(
conn;
tab=cohort
)
ids = DBInterface.execute(conn, GetDatabaseCohorts(tab=tab)) |> DataFrame

return convert(Vector{Int}, ids.cohort_definition_id)

end

"""
function GetDatabaseCohorts(; tab=cohort)
Given a cohort table returns all unique IDs associated with a database.
# Arguments:
- `tab` - the `SQLTable` representing the Cohort table; default `cohort`
# Returns
- `df::DataFrame` - a one column `DataFrame` comprised of columns: `:cohort_definition_id`
"""

function GetDatabaseCohorts(
; tab=cohort
)

sql =
From(tab) |>
Group(Get.cohort_definition_id) |>
Select(Get.cohort_definition_id) |>
q -> render(q, dialect=dialect)

return String(sql)

end

export GetDatabasePersonIDs, GetPatientState, GetPatientGender, GetPatientRace, GetPatientAgeGroup, GetPatientVisits, GetMostRecentConditions, GetMostRecentVisit, GetVisitCondition, GetPatientEthnicity, GetDatabaseYearRange, GetVisitPlaceOfService, GetVisitConcept, GetVisitDate, GetDrugExposures, GetDrugConceptIDs, GetDrugAmounts, GetVisitProcedure, GetDatabaseCohorts, GetCohortSubjects, GetCohortSubjectStartDate, GetCohortSubjectEndDate, GetDrugExposureIDs
4 changes: 3 additions & 1 deletion test/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
FunSQL = "cf6cc811-59f4-4a10-b258-a8547a8f6407"
HealthSampleData = "b8464e9a-ae38-46a4-977b-86f00930f698"
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
OHDSICohortExpressions = "b1469e29-13af-4897-a4a9-3c363a260582"
OMOPCDMCohortCreator = "f525a15e-a73f-4eef-870f-f901257eae22"
SQLite = "0aa819cd-b072-5ff4-a722-6bc24af294d9"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Expand All @@ -13,7 +15,7 @@ TimeZones = "f269a46b-ccf7-5d73-abea-4c690281aa53"
DBInterface = "2"
DataFrames = "1"
FunSQL = "0.10"
HealthSampleData = "0"
SQLite = "1"
TimeZones = "1"
HealthSampleData = "0"
julia = "1.7"
Loading

0 comments on commit 3200fd1

Please sign in to comment.