Skip to content

Commit

Permalink
Merge pull request #52 from OHDSI/develop
Browse files Browse the repository at this point in the history
V2.0 release candidate
  • Loading branch information
fdefalco authored Apr 23, 2024
2 parents 0f95ce9 + eb374fb commit 79c8944
Show file tree
Hide file tree
Showing 56 changed files with 2,095 additions and 549 deletions.
Binary file added .DS_Store
Binary file not shown.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@ docs
_pkgdown.yml
cran-comments.md
^CRAN-RELEASE$
work/*
^CRAN-SUBMISSION$
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@
R/TC
statement_*.sql
errorReport.txt
work/*

3 changes: 3 additions & 0 deletions CRAN-SUBMISSION
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Version: 2.0.0
Date: 2024-04-16 20:03:15 UTC
SHA: 53f57cba7a55b79e60ec84baea9399f4ae19743c
28 changes: 18 additions & 10 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,26 +1,34 @@
Package: Eunomia
Type: Package
Title: A Standard Dataset in the OMOP Common Data Model
Version: 1.0.3
Date: 2024-03-27
Title: Standard Dataset Manager for Observational Medical Outcomes Partnership Common Data Model Sample Datasets
Version: 2.0.0
Date: 2024-04-22
Authors@R: c(
person("Frank", "DeFalco", , "fdefalco@ohdsi.org", role = c("aut", "cre")),
person("Martijn", "Schuemie", , "schuemie@ohdsi.org", role = c("aut")),
person("Anthony", "Sena", , "sena@ohdsi.org", role=c("aut")),
person("Natthawut", "Adulyanukosol", , "na339@unc.edu", role=c("aut")),
person("Star", "Liu", , "sliu197@jhmi.edu", role=c("aut")),
person("Adam", "Black", , "black@ohdsi.org", role = c("aut")),
person("Observational Health Data Science and Informatics", role = c("cph"))
)
Maintainer: Frank DeFalco <fdefalco@ohdsi.org>
Description: A sample dataset in the OMOP (Observational Medical Outcomes Partnership) Common Data Model (CDM) format. The CDM enables uniform storage of observational health care data, and is widely used for health care analytics. 'Eunomia' contains simulated data as well as a subset of the OMOP Vocabulary, and enables testing of additional packages and is used for educational and demonstration purposes.
Description: Facilitates access to sample datasets from the 'EunomiaDatasets' repository (<https://github.com/ohdsi/EunomiaDatasets>).
License: Apache License 2.0
URL: https://github.com/OHDSI/Eunomia
BugReports: https://github.com/OHDSI/Eunomia/issues
Depends:
DatabaseConnector (>= 2.2.0)
Imports:
SqlRender,
RSQLite (> 2.1.1),
readr
readr,
rlang,
RSQLite,
DBI,
arrow,
CommonDataModel
Suggests:
testthat
testthat,
withr,
duckdb,
DatabaseConnector
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.3.1
11 changes: 8 additions & 3 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
# Generated by roxygen2: do not edit by hand

export(createCohorts)
export(exportToCsv)
export(downloadEunomiaData)
export(exportDataFiles)
export(extractLoadData)
export(getDatabaseFile)
export(getEunomiaConnectionDetails)
import(DatabaseConnector)
export(loadDataFiles)
import(RSQLite)
importFrom(readr,write_csv)
importFrom(readr,read_csv)
importFrom(tools,file_ext)
importFrom(utils,download.file)
importFrom(utils,read.csv)
importFrom(utils,untar)
importFrom(utils,unzip)
Expand Down
10 changes: 10 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
Eunomia 2.0
=============
Changes
- Updated package to no longer contain a dataset rather facilitate access to sample datasets
stored in the https://github.com/OHDSI/EunomiaDatasets repository
- Backward compatibility maintained with getEunomiaConnectionDetails function
- New function added for getDatabaseFile
- Embedded sample dataset removed
- Remove dependency on DatabaseConnector and Java

Eunomia 1.0.3
=============

Expand Down
94 changes: 42 additions & 52 deletions R/Cohorts.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2020 Observational Health Data Sciences and Informatics
# Copyright 2023 Observational Health Data Sciences and Informatics
#
# This file is part of Eunomia
#
Expand All @@ -14,75 +14,65 @@
# See the License for the specific language governing permissions and
# limitations under the License.


#' Construct cohorts
#'
#' @description
#' Creates a set of predefined cohorts in a cohort table.
#' WARNING: this will delete all existing cohorts in the table!
#' Creates a set of predefined cohorts in a cohort table. WARNING: this will delete all existing
#' cohorts in the table!
#'
#' @param connectionDetails The connection details to connect to the (Eunomia) database.
#' @param cdmDatabaseSchema The name of the database schema holding the CDM data.
#' @param cohortDatabaseSchema The name of the database schema where the cohorts will be written.
#' @param cohortTable The name of the table in the cohortDatabaseSchema where the cohorts
#' will be written.
#' @param cdmDatabaseSchema Deprecated. The cdm must be created in the main schema.
#' @param cohortDatabaseSchema Deprecated. The cohort table will be created in the main schema.
#' @param cohortTable Deprecated. Cohort table will be named "cohort".
#'
#' @return
#' A data frame listing all created cohorts.
#'
#' @examples
#' connectionDetails <- getEunomiaConnectionDetails()
#' createCohorts(connectionDetails)
#'
#' connection <- connect(connectionDetails)
#'
#' sql <- "SELECT COUNT(*)
#' FROM main.cohort
#' WHERE cohort_definition_id = 1;"
#'
#' renderTranslateQuerySql(connection, sql)
#'
#' disconnect(connection)
#'
#' @export
createCohorts <- function(connectionDetails,
cdmDatabaseSchema = "main",
cohortDatabaseSchema = "main",
cohortTable = "cohort") {
connection <- DatabaseConnector::connect(connectionDetails)
on.exit(DatabaseConnector::disconnect(connection))

# Create study cohort table structure:
sql <- SqlRender::loadRenderTranslateSql(sqlFilename = "CreateCohortTable.sql",
packageName = "Eunomia",
dbms = connectionDetails$dbms,
cohort_database_schema = cohortDatabaseSchema,
cohort_table = cohortTable)
DatabaseConnector::executeSql(connection, sql, progressBar = FALSE, reportOverallTime = FALSE)
if (!("ConnectionDetails" %in% class(connectionDetails))) {
stop("connectionDetails is not valid.")
}

if (connectionDetails$dbms != "sqlite") {
stop("createCohorts only supports sqlite")
}

if (cdmDatabaseSchema != "main" || cohortDatabaseSchema != "main") {
stop("sqlite only supports the main schema")
}

if (cohortTable != "cohort") {
warning("The cohortTable argument to createCohorts was deprecated in Eunomia v2.1.0")
}

connection <- DBI::dbConnect(RSQLite::SQLite(), connectionDetails$server())
on.exit(DBI::dbDisconnect(connection))

# Instantiate cohorts:
pathToCsv <- system.file("settings", "CohortsToCreate.csv", package = "Eunomia")
cohortsToCreate <- read.csv(pathToCsv)
for (i in 1:nrow(cohortsToCreate)) {
writeLines(paste("Creating cohort:", cohortsToCreate$name[i]))
sql <- SqlRender::loadRenderTranslateSql(sqlFilename = paste0(cohortsToCreate$name[i], ".sql"),
packageName = "Eunomia",
dbms = connectionDetails$dbms,
cdm_database_schema = cdmDatabaseSchema,
cohort_database_schema = cohortDatabaseSchema,
cohort_table = cohortTable,
cohort_definition_id = cohortsToCreate$cohortId[i])
DatabaseConnector::executeSql(connection, sql)
# Create example cohort table
pathToSql <- system.file("sql", "CreateCohortTable.sql",package = "Eunomia", mustWork = TRUE)
sql <- readChar(pathToSql, file.info(pathToSql)$size)
sql <- gsub("--[a-zA-Z0-9 ]*", "", sql) # remove comments in sql
sql <- strsplit(gsub("\n", " ", sql), ";")[[1]] # remove newlines, split on semicolon
sql <- trimws(sql) # trim white space
sql <- sql[-which(sql == "")] # remove empty lines

for (i in seq_along(sql)) {
DBI::dbExecute(connection, sql[i])
}

# Fetch cohort counts:
sql <- "SELECT cohort_definition_id, COUNT(*) AS count FROM @cohort_database_schema.@cohort_table GROUP BY cohort_definition_id"
counts <- DatabaseConnector::renderTranslateQuerySql(connection,
sql,
cohort_database_schema = cohortDatabaseSchema,
cohort_table = cohortTable,
snakeCaseToCamelCase = TRUE)
counts <- merge(cohortsToCreate, counts, by.x = "cohortId", by.y = "cohortDefinitionId")
writeLines(sprintf("Cohorts created in table %s.%s", cohortDatabaseSchema, cohortTable))
sql <- "SELECT cohort_definition_id, COUNT(*) AS count
FROM main.cohort
GROUP BY cohort_definition_id"
counts <- DBI::dbGetQuery(connection, sql)

cohortsToCreate <- read.csv(system.file("settings", "CohortsToCreate.csv", package = "Eunomia", mustWork = T))
counts <- merge(cohortsToCreate, counts, by.x = "cohortId", by.y = "cohort_definition_id")
writeLines("Cohorts created in table main.cohort")
return(counts)
}
156 changes: 138 additions & 18 deletions R/Connection.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2020 Observational Health Data Sciences and Informatics
# Copyright 2023 Observational Health Data Sciences and Informatics
#
# This file is part of Eunomia
#
Expand All @@ -15,31 +15,151 @@
# limitations under the License.


#' Get Eunomia Connection Details
#' Get Default Eunomia Connection Details
#'
#' @description
#' Creates a copy of the Eunomia database, and provides details for connecting to that copy.
#' Creates a copy of the default (GiBleed) Eunomia database, and provides details for connecting to
#' that copy. Function provides backwards compatibility to prior releases of Eunomia default (GiBleed)
#' dataset
#'
#' @param databaseFile The path where the database file will be copied to. By default, the database will
#' be copied to a temporary folder, and will be deleted at the end of the R session.
#' @param dbms The target dialect, by default "sqlite".
#'
#' @return
#' A ConnectionDetails object, to be used with the \code{DatabaseConnector} package.
#'
#' @export
getEunomiaConnectionDetails <- function(databaseFile = tempfile(fileext = ".sqlite"), dbms = "sqlite") {

if (interactive() & !("DatabaseConnector" %in% rownames(utils::installed.packages()))) {
message("The DatabaseConnector package is required but not installed.")
if (!isTRUE(utils::askYesNo("Would you like to install DatabaseConnector?"))) {
return(invisible(NULL))
} else {
utils::install.packages("DatabaseConnector")
}
}

datasetLocation <- getDatabaseFile(datasetName = "GiBleed", dbms = dbms, databaseFile = databaseFile)
DatabaseConnector::createConnectionDetails(dbms = dbms, server = datasetLocation)
}

#' Create a copy of a Eunomia dataset
#'
#' @description
#' Creates a copy of a Eunomia database, and returns the path to the new database file.
#' If the dataset does not yet exist on the user's computer it will attempt to download the source data
#' to the the path defined by the EUNOMIA_DATA_FOLDER environment variable.
#'
#' @param datasetName The data set name as found on https://github.com/OHDSI/EunomiaDatasets. The
#' data set name corresponds to the folder with the data set ZIP files
#' @param cdmVersion The OMOP CDM version. This version will appear in the suffix of the data file,
#' for example: <datasetName>_<cdmVersion>.zip. Default: '5.3'
#' @param pathToData The path where the Eunomia data is stored on the file system., By default the
#' value of the environment variable "EUNOMIA_DATA_FOLDER" is used.
#' @param dbms The database system to use. "sqlite" (default) or "duckdb"
#' @param databaseFile The path where the database file will be copied to. By default, the database
#' will be copied to a temporary folder, and will be deleted at the end of the R
#' session.
#' @param inputFormat The format of the files expected in the archive. (csv or parquet)
#' @param verbose Provide additional logging details during execution
#' @param overwrite Remove and replace an existing data set.
#'
#' @return
#' A ConnectionDetails object, to be used with the \code{DatabaseConnector} package.
#' @return The file path to the new Eunomia dataset copy
#' @export
#'
#' @examples
#' connectionDetails <- getEunomiaConnectionDetails()
#' connection <- connect(connectionDetails)
#' querySql(connection, "SELECT COUNT(*) FROM person;")
#' disconnect(connection)
#' \dontrun{
#' conn <- DBI::dbConnect(RSQLite::SQLite(), getDatabaseFile("GiBleed"))
#' DBI::dbDisconnect(conn)
#'
#' @export
getEunomiaConnectionDetails <- function(databaseFile = tempfile(fileext = ".sqlite")) {
extractFolder <- tempdir()
file <- xzfile(system.file("sqlite", "cdm.tar.xz", package = "Eunomia"), open = "rb")
untar(file, exdir = extractFolder)
close(file)
file.rename(from = file.path(extractFolder, "cdm.sqlite"), to = databaseFile)
details <- DatabaseConnector::createConnectionDetails(dbms = "sqlite", server = databaseFile)
return(details)
#' conn <- DBI::dbConnect(duckdb::duckdb(), getDatabaseFile("GiBleed", dbms = "duckdb"))
#' DBI::dbDisconnect(conn, shutdown = TRUE)
#'
#' conn <- DatabaseConnector::connect(dbms = "sqlite", server = getDatabaseFile("GiBleed"))
#' DatabaseConnector::disconnect(conn)
#' }
#'
getDatabaseFile <- function(datasetName,
cdmVersion = "5.3",
pathToData = Sys.getenv("EUNOMIA_DATA_FOLDER"),
dbms = "sqlite",
databaseFile = tempfile(fileext = paste0(".", dbms)),
inputFormat = "csv",
verbose = FALSE,
overwrite = TRUE) {

if (is.null(pathToData) || is.na(pathToData) || pathToData == "") {
pathToData <- tempdir()
}

stopifnot(is.character(dbms), length(dbms) == 1, dbms %in% c("sqlite", "duckdb"))
stopifnot(is.character(cdmVersion), length(cdmVersion) == 1, cdmVersion %in% c("5.3", "5.4"))

if (dbms == "duckdb") {
rlang::check_installed("duckdb")
# duckdb database are tied to a specific version of duckdb until it reaches v1.0
duckdbVersion <- substr(utils::packageVersion("duckdb"), 1, 3)
datasetFileName <- paste0(datasetName, "_", cdmVersion, "_", duckdbVersion, ".", dbms)
} else {
datasetFileName <- paste0(datasetName, "_", cdmVersion, ".", dbms)
}

# cached sqlite or duckdb file to be copied
datasetLocation <- file.path(pathToData, datasetFileName)
datasetAvailable <- file.exists(datasetLocation)
if (datasetAvailable && overwrite) {
if (verbose) {
message("overwrite specified, deleting existing dataset: ", datasetLocation, appendLF = TRUE)
}
unlink(datasetLocation)
datasetAvailable <- FALSE
}

if (verbose) {
message("dataset: ",datasetLocation, " available: ",datasetAvailable, appendLF = TRUE)
}

# zip archive of csv source files
archiveName <- paste0(datasetName, "_", cdmVersion, ".zip")
archiveLocation <- file.path(pathToData, archiveName)
archiveAvailable <- file.exists(archiveLocation)

if (archiveAvailable && overwrite) {
if (verbose) {
message("overwrite specified, deleting existing archive: ", archiveLocation, appendLF = TRUE)
}
unlink(archiveLocation)
archiveAvailable <- FALSE
}

if (verbose) {
message("archive: ",archiveLocation," available:",archiveAvailable,appendLF = TRUE)
}

if (!datasetAvailable && !archiveAvailable) {
message(paste("attempting to download", datasetName))
downloadedData <- downloadEunomiaData(datasetName = datasetName, cdmVersion = cdmVersion, pathToData = pathToData, verbose=verbose)
if (verbose) {
message("downloaded: ",downloadedData,appendLF = TRUE)
}
archiveAvailable <- TRUE
}

if (!datasetAvailable && archiveAvailable) {
message("attempting to extract and load: ", archiveLocation," to: ",datasetLocation,appendLF = TRUE)
extractLoadData(from = archiveLocation, to = datasetLocation, dbms = dbms, cdmVersion = cdmVersion, inputFormat=inputFormat, verbose=verbose)
datasetAvailable <- TRUE
}

if (verbose) {
message("copying: ",datasetLocation," to: ", databaseFile, appendLF = TRUE)
}

copySuccess <- file.copy(from = datasetLocation, to = databaseFile, overwrite = overwrite)
if (isFALSE(copySuccess)) {
stop(paste("File copy from", datasetLocation, "to", databaseFile, "failed!"))
}
invisible(databaseFile)
}
Loading

0 comments on commit 79c8944

Please sign in to comment.