Skip to content

Commit

Permalink
Update package to support synthea 3.1.0 and 3.2.0 (#178)
Browse files Browse the repository at this point in the history
* Add support for loading synthea v3.1 and v3.2

* Add synthea version to source description

* Address lint errors
  • Loading branch information
burrowse authored Jan 2, 2024
1 parent dfb403d commit 5c13c7f
Show file tree
Hide file tree
Showing 73 changed files with 955 additions and 268 deletions.
7 changes: 4 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
Package: ETLSyntheaBuilder
Type: Package
Title: A Builder for Converting the Synthea Data to the OMOP CDM
Version: 1.0
Version: 2.0
mDate: 2021-12-04
Author: Anthony Molinaro [aut, cre],
Clair Blacketer [aut],
Frank DeFalco [aut]
Maintainer: Anthony Molinaro <amolin19@its.jnj.com>
Frank DeFalco [aut],
Evanette Burrows [aut]
Maintainer: Evanette Burrows <eburrow3@its.jnj.com>
Description: ETL and Builder to convert Synthea Data to the OMOP CDM.
Staring with csv files for an OMOP Vocabulary and csv files for Synthea,
this package creates database tables from these csv files and maps them
Expand Down
58 changes: 29 additions & 29 deletions R/CreateCDMTables.r
Original file line number Diff line number Diff line change
Expand Up @@ -26,41 +26,41 @@ CreateCDMTables <-
cdmSchema,
cdmVersion,
outputFolder = NULL,
createIndices = FALSE,
createIndices = FALSE,
sqlOnly = FALSE)
{
if (!sqlOnly) {

print("Creating CDM Tables....")

CommonDataModel::executeDdl(
connectionDetails = connectionDetails,
cdmVersion = cdmVersion,
cdmDatabaseSchema = cdmSchema,
executeDdl = TRUE,
executePrimaryKey = TRUE,
executeForeignKey = FALSE
) # False for now due to bug: https://github.com/OHDSI/CommonDataModel/issues/452
print("Creating CDM Tables....")

print("CDM Tables Created.")

if (createIndices) {

print("Creating Indices on CDM Tables....")
CommonDataModel::executeDdl(
connectionDetails = connectionDetails,
cdmVersion = cdmVersion,
cdmDatabaseSchema = cdmSchema,
executeDdl = TRUE,
executePrimaryKey = TRUE,
executeForeignKey = FALSE
) # False for now due to bug: https://github.com/OHDSI/CommonDataModel/issues/452

print("CDM Tables Created.")

if (createIndices) {
print("Creating Indices on CDM Tables....")

indexSQLFile <- CommonDataModel::writeIndex(
targetDialect = connectionDetails$dbms,
cdmVersion = cdmVersion,
cdmDatabaseSchema = cdmSchema,
outputfolder = tempdir())
indexSQLFile <- CommonDataModel::writeIndex(
targetDialect = connectionDetails$dbms,
cdmVersion = cdmVersion,
cdmDatabaseSchema = cdmSchema,
outputfolder = tempdir()
)

indexDDL <-
SqlRender::readSql(paste0(tempdir(), "/", indexSQLFile))
conn <- DatabaseConnector::connect(connectionDetails)
DatabaseConnector::executeSql(conn, indexDDL)
DatabaseConnector::disconnect(conn)
print("Index Creation Complete.")
}

indexDDL <- SqlRender::readSql(paste0(tempdir(),"/",indexSQLFile))
conn <- DatabaseConnector::connect(connectionDetails)
DatabaseConnector::executeSql(conn,indexDDL)
DatabaseConnector::disconnect(conn)
print("Index Creation Complete.")
}

} else {
if (is.null(outputFolder)) {
stop("Must specify an outputFolder location when using sqlOnly = TRUE")
Expand Down
39 changes: 17 additions & 22 deletions R/CreateMapAndRollupTables.r
Original file line number Diff line number Diff line change
Expand Up @@ -19,42 +19,39 @@
#' so for example 'cdm_instance.dbo'.
#' @param cdmVersion The version of your CDM. Currently "5.3" and "5.4".
#' @param syntheaVersion The version of Synthea used to generate the csv files.
#' Currently "2.7.0" and "3.0.0" are supported.
#' Currently "2.7.0", "3.0.0", "3.1.0" and "3.2.0" are supported.
#' @param cdmSourceName The source name to insert into the CDM_SOURCE table. Default is Synthea synthetic health database.
#' @param cdmSourceAbbreviation The source abbreviation to insert into the CDM_SOURCE table. Default is Synthea.
#' @param cdmHolder The holder to insert into the CDM_SOURCE table. Default is OHDSI
#' @param cdmSourceDescription The description of the source data. Default is generic Synthea description.
#' @param createIndices A boolean that determines whether or not to create indices on CDM tables before the ETL.
#' @param sqlOnly A boolean that determines whether or not to perform the load or generate SQL scripts. Default is FALSE.
#'
#'@export


CreateMapAndRollupTables <- function(connectionDetails,
cdmSchema,
syntheaSchema,
cdmVersion,
syntheaVersion = "2.7.0",
cdmSourceName = "Synthea synthetic health database",
cdmSourceAbbreviation = "Synthea",
cdmHolder = "OHDSI",
cdmSourceDescription = "SyntheaTM is a Synthetic Patient Population Simulator. The goal is to output synthetic, realistic (but not real), patient data and associated health records in a variety of formats.",
sqlOnly = FALSE)
cdmSchema,
syntheaSchema,
cdmVersion,
syntheaVersion = "2.7.0",
cdmSourceName = "Synthea synthetic health database",
cdmSourceAbbreviation = "Synthea",
cdmHolder = "OHDSI",
cdmSourceDescription = "SyntheaTM is a Synthetic Patient Population Simulator. The goal is to output synthetic, realistic (but not real), patient data and associated health records in a variety of formats.",
sqlOnly = FALSE)
{
# Determine which sql scripts to run based on the given version.
# The path is relative to inst/sql/sql_server.
if (cdmVersion == "5.3") {
sqlFilePath <- "cdm_version/v531"
} else if (cdmVersion == "5.4") {
sqlFilePath <- "cdm_version/v540"
} else {
supportedCDMVersions <- c("5.3", "5.4")

if (!(cdmVersion %in% supportedCDMVersions)) {
stop("Unsupported CDM specified. Supported CDM versions are \"5.3\" and \"5.4\".")
}

supportedSyntheaVersions <- c("2.7.0", "3.0.0")
supportedSyntheaVersions <- c("2.7.0", "3.0.0", "3.1.0", "3.2.0")

if (!(syntheaVersion %in% supportedSyntheaVersions))
stop("Invalid Synthea version specified. Currently \"2.7.0\" and \"3.0.0\" are supported.")
stop(
"Invalid Synthea version specified. Currently \"2.7.0\", \"3.0.0\",\"3.1.0\", and \"3.2.0\" are supported."
)

# Create Vocabulary mapping tables
CreateVocabMapTables(connectionDetails, cdmSchema, cdmVersion, sqlOnly)
Expand All @@ -67,5 +64,3 @@ CreateMapAndRollupTables <- function(connectionDetails,
sqlOnly)

}


19 changes: 13 additions & 6 deletions R/CreateSyntheaTables.r
Original file line number Diff line number Diff line change
Expand Up @@ -10,25 +10,32 @@
#' Server, this should specify both the database and the schema,
#' so for example 'cdm_instance.dbo'.
#' @param syntheaVersion The version of Synthea used to generate the csv files.
#' Currently "2.7.0" and "3.0.0" are is supported.
#' Currently "2.7.0", "3.0.0", "3.1.0", and "3.2.0" are is supported.
#'
#'@export



CreateSyntheaTables <-
function (connectionDetails,
syntheaSchema,
syntheaVersion = "2.7.0")
function(connectionDetails,
syntheaSchema,
syntheaVersion = "2.7.0")
{
if (syntheaVersion == "2.7.0")
sqlFilePath <- "synthea_version/v270"
else if (syntheaVersion == "3.0.0")
sqlFilePath <- "synthea_version/v300"
else if (syntheaVersion == "3.1.0")
sqlFilePath <- "synthea_version/v310"
else if (syntheaVersion == "3.2.0")
sqlFilePath <- "synthea_version/v320"
else
stop("Invalid synthea version specified. Currently \"2.7.0\" and \"3.0.0\" are supported.")
stop(
"Invalid synthea version specified. Currently \"2.7.0\", \"3.0.0\", \"3.1.0\" and \"3.2.0\" are supported."
)

sqlFilename <- paste0(sqlFilePath, "/", "create_synthea_tables.sql")
sqlFilename <-
paste0(sqlFilePath, "/", "create_synthea_tables.sql")

translatedSql <- SqlRender::loadRenderTranslateSql(
sqlFilename = sqlFilename,
Expand Down
41 changes: 21 additions & 20 deletions R/DropSyntheaTables.r
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ DropSyntheaTables <- function(connectionDetails, syntheaSchema)
syntheaTables <- c(
"ALLERGIES",
"CAREPLANS",
"CLAIMS",
"CLAIMS_TRANSACTIONS",
"CLAIMS",
"CLAIMS_TRANSACTIONS",
"CONDITIONS",
"DEVICES",
"ENCOUNTERS",
Expand All @@ -30,33 +30,34 @@ DropSyntheaTables <- function(connectionDetails, syntheaSchema)
"OBSERVATIONS",
"ORGANIZATIONS",
"PATIENTS",
"PAYERS",
"PAYER_TRANSITIONS",
"PAYERS",
"PAYER_TRANSITIONS",
"PROCEDURES",
"PROVIDERS",
"SUPPLIES"
"SUPPLIES"
)

conn <- DatabaseConnector::connect(connectionDetails)
allTables <- DatabaseConnector::getTableNames(conn, syntheaSchema)
tablesToDrop <- allTables[which(allTables %in% syntheaTables)]

if (length(tablesToDrop) > 0) {
writeLines("Dropping Synthea tables...")
sql <-
paste(
"drop table @synthea_schema.",
tablesToDrop,
";",
collapse = "\n",
sep = ""
)
sql <- SqlRender::render(sql, synthea_schema = syntheaSchema)
sql <- SqlRender::translate(sql, targetDialect = connectionDetails$dbms)
DatabaseConnector::executeSql(conn, sql)
writeLines("Dropping Synthea tables...")
sql <-
paste(
"drop table @synthea_schema.",
tablesToDrop,
";",
collapse = "\n",
sep = ""
)
sql <- SqlRender::render(sql, synthea_schema = syntheaSchema)
sql <-
SqlRender::translate(sql, targetDialect = connectionDetails$dbms)
DatabaseConnector::executeSql(conn, sql)
} else {
print(sprintf("No synthea tables to drop in schema %s",syntheaSchema))
print(sprintf("No synthea tables to drop in schema %s", syntheaSchema))
}

on.exit(DatabaseConnector::disconnect(conn))
}
59 changes: 31 additions & 28 deletions R/LoadEventTables.r
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#' so for example 'cdm_instance.dbo'.
#' @param cdmVersion The version of your CDM. Currently "5.3" and "5.4".
#' @param syntheaVersion The version of Synthea used to generate the csv files.
#' Currently "2.7.0" and "3.0.0" are supported.
#' Currently "2.7.0","3.0.0","3.1.0" and "3.2.0" are supported.
#' @param cdmSourceName The source name to insert into the CDM_SOURCE table. Default is Synthea synthetic health database.
#' @param cdmSourceAbbreviation The source abbreviation to insert into the CDM_SOURCE table. Default is Synthea.
#' @param cdmHolder The holder to insert into the CDM_SOURCE table. Default is OHDSI
Expand All @@ -39,7 +39,7 @@ LoadEventTables <- function(connectionDetails,
cdmHolder = "OHDSI",
cdmSourceDescription = "SyntheaTM is a Synthetic Patient Population Simulator. The goal is to output synthetic, realistic (but not real), patient data and associated health records in a variety of formats.",
createIndices = FALSE,
sqlOnly = FALSE)
sqlOnly = FALSE)
{
# Determine which sql scripts to run based on the given version.
# The path is relative to inst/sql/sql_server.
Expand All @@ -51,25 +51,28 @@ LoadEventTables <- function(connectionDetails,
stop("Unsupported CDM specified. Supported CDM versions are \"5.3\" and \"5.4\".")
}

supportedSyntheaVersions <- c("2.7.0", "3.0.0")
supportedSyntheaVersions <- c("2.7.0", "3.0.0", "3.1.0", "3.2.0")

if (!(syntheaVersion %in% supportedSyntheaVersions))
stop("Invalid Synthea version specified. Currently \"2.7.0\" and \"3.0.0\" are supported.")
stop(
"Invalid Synthea version specified. Currently \"2.7.0\", \"3.0.0\",\"3.1.0\", and \"3.2.0\" are supported."
)

if (createIndices) {
print("Creating Indices on CDM Tables....")
print("Creating Indices on CDM Tables....")

indexSQLFile <- CommonDataModel::writeIndex(
targetDialect = connectionDetails$dbms,
cdmVersion = cdmVersion,
cdmDatabaseSchema = cdmSchema,
outputfolder = tempdir())
indexSQLFile <- CommonDataModel::writeIndex(
targetDialect = connectionDetails$dbms,
cdmVersion = cdmVersion,
cdmDatabaseSchema = cdmSchema,
outputfolder = tempdir()
)

indexDDL <- SqlRender::readSql(paste0(tempdir(),"/",indexSQLFile))
conn <- DatabaseConnector::connect(connectionDetails)
DatabaseConnector::executeSql(conn,indexDDL)
DatabaseConnector::disconnect(conn)
print("Index Creation Complete.")
indexDDL <- SqlRender::readSql(paste0(tempdir(), "/", indexSQLFile))
conn <- DatabaseConnector::connect(connectionDetails)
DatabaseConnector::executeSql(conn, indexDDL)
DatabaseConnector::disconnect(conn)
print("Index Creation Complete.")
}

if (!sqlOnly) {
Expand Down Expand Up @@ -233,7 +236,7 @@ LoadEventTables <- function(connectionDetails,
cdm_source_name = cdmSourceName,
cdm_source_abbreviation = cdmSourceAbbreviation,
cdm_holder = cdmHolder,
source_description = cdmSourceDescription
source_description = paste("Synthea version: ", syntheaVersion, " ", cdmSourceDescription)
)
runStep(sql, fileQuery)

Expand Down Expand Up @@ -267,24 +270,24 @@ LoadEventTables <- function(connectionDetails,
dbms = connectionDetails$dbms,
cdm_schema = cdmSchema,
synthea_schema = syntheaSchema,
synthea_version = syntheaVersion
synthea_version = syntheaVersion
)
runStep(sql, fileQuery)

# cost
if (syntheaVersion == "2.7.0")
fileQuery <- "insert_cost_v270.sql"
else if (syntheaVersion == "3.0.0")
fileQuery <- "insert_cost_v300.sql"
fileQuery <- "insert_cost_v270.sql"
else if (syntheaVersion %in% c("3.0.0", "3.1.0", "3.2.0"))
fileQuery <- "insert_cost_v300.sql"

sql <- SqlRender::loadRenderTranslateSql(
sqlFilename = file.path(sqlFilePath, fileQuery),
packageName = "ETLSyntheaBuilder",
dbms = connectionDetails$dbms,
cdm_schema = cdmSchema,
synthea_schema = syntheaSchema
)
runStep(sql, fileQuery)
sql <- SqlRender::loadRenderTranslateSql(
sqlFilename = file.path(sqlFilePath, fileQuery),
packageName = "ETLSyntheaBuilder",
dbms = connectionDetails$dbms,
cdm_schema = cdmSchema,
synthea_schema = syntheaSchema
)
runStep(sql, fileQuery)

if (!sqlOnly) {
DatabaseConnector::disconnect(conn)
Expand Down
Loading

0 comments on commit 5c13c7f

Please sign in to comment.