Skip to content

Commit

Permalink
Merge pull request #223 from Sage-Bionetworks/fds-1213-rfc-data-model
Browse files Browse the repository at this point in the history
FDS-1213 Integrate new data model into Data Flow App
  • Loading branch information
lakikowolfe authored Oct 31, 2023
2 parents 3c2b34f + fa26ee6 commit 4db0727
Show file tree
Hide file tree
Showing 5 changed files with 365 additions and 124 deletions.
88 changes: 15 additions & 73 deletions R/app_server.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,8 @@ app_server <- function( input, output, session ) {
# Component = NA),
icon = mod_select_dcc_out()$selected_dcc_config$icon,
na_replace = list(num_items = "No Manifest",
release_scheduled = "Not Scheduled",
embargo = "No Embargo",
dataset = "No Manifest"),
scheduled_release_date = "Not Scheduled",
dataset_type = "No Manifest"),
base_url = schematic_api_url)
})

Expand All @@ -95,58 +94,16 @@ app_server <- function( input, output, session ) {
config = dash_config_react())
})


# output$tstPrint <- renderPrint({
# df_manifest_react()
# })

# PREPARE MANIFEST FOR DASH ###########################################################

# add status to manifest
manifest_w_status <- shiny::reactive({
manifest_dfa <- df_manifest_react()

# add some columns to manifest to make logic easier
manifest <- manifest_dfa %>%
dplyr::mutate(scheduled = !is.na(release_scheduled),
no_embargo = is.na(embargo) || embargo < Sys.Date(),
past_due = !is.na(release_scheduled) && release_scheduled < Sys.Date())

# generate status variable based on some logic that defines various data flow statuses
status <- sapply(1:nrow(manifest), function(i) {
row <- manifest[i, ]

if (row$scheduled == FALSE) {
status <- "not scheduled"
} else if (row$no_embargo == FALSE || row$standard_compliance == FALSE) {
status <- "quarantine"
} else if (row$no_embargo == TRUE & row$standard_compliance == TRUE & row$released == FALSE) {
status <- "quarantine (ready for release)"
} else if (row$released == TRUE) {
status <- "released"
} else {
NA
}

status
})

# add status to manifest
manifest$data_flow_status <- status

manifest
})

# FILTER MANIFEST FOR DASH UI ###########################################################

# prepare inputs for filter module
filter_inputs <- shiny::reactive({

contributor_choices <- unique(manifest_w_status()$contributor)
dataset_choices <- unique(manifest_w_status()$dataset)
release_daterange_start <- min(manifest_w_status()$release_scheduled, na.rm = TRUE)
release_daterange_end <- max(manifest_w_status()$release_scheduled, na.rm = TRUE)
status_choices <- unique(manifest_w_status()$data_flow_status)
contributor_choices <- unique(df_manifest_react()$contributor)
dataset_choices <- unique(df_manifest_react()$dataset_type)
release_daterange_start <- min(df_manifest_react()$scheduled_release_date, na.rm = TRUE)
release_daterange_end <- max(df_manifest_react()$scheduled_release_date, na.rm = TRUE)
status_choices <- unique(df_manifest_react()$status)

list(contributor_choices,
dataset_choices,
Expand All @@ -166,7 +123,7 @@ app_server <- function( input, output, session ) {

# FILTER MANIFEST FOR DASH SERVER ####################################################
filtered_manifest <- dfamodules::mod_datatable_filters_server("datatable_filters_1",
manifest_w_status)
df_manifest_react)


# DATASET DASH #######################################################################
Expand All @@ -187,7 +144,7 @@ app_server <- function( input, output, session ) {

dfamodules::mod_distribution_server(id = "distribution_datatype",
df = filtered_manifest,
group_by_var = "dataset",
group_by_var = "dataset_type",
title = NULL,
x_lab = "Type of dataset",
y_lab = "Number of Datasets",
Expand Down Expand Up @@ -276,21 +233,21 @@ app_server <- function( input, output, session ) {
})

# STORAGE PROJECT SELECTION

# have to capture in a reactive or else it will not work in select storage module
# FIXME: Convert to reactive value?
reactive_asset_view <- reactive({
mod_select_dcc_out()$selected_dcc_config$synapse_asset_view
})

reactive_manifest_id <- reactive({
mod_select_dcc_out()$selected_dcc_config$manifest_dataset_id
})

reactive_schema_url <- reactive({
mod_select_dcc_out()$selected_dcc_config$schema_url
})

mod_select_storage_project_out <- dfamodules::mod_select_storage_project_server(
id = "select_storage_project_1",
asset_view = reactive_asset_view,
Expand Down Expand Up @@ -339,31 +296,16 @@ app_server <- function( input, output, session ) {
dash_config_react())
})

# DISPLAY MANIFEST
admin_display_manifest <- shiny::reactive({

# rearrange manifest so it's more readable
manifest <- dfamodules::rearrange_dataframe(manifest_submit(),
names(dash_config_react()))

# make columns factors
factor_cols <- dfamodules::get_colname_by_type(dash_config_react(), type = "drop_down_filter")
manifest[factor_cols] <- lapply(manifest[,factor_cols], factor)

# return
manifest
})

# get names of selected datasets
selected_row_names <- shiny::reactive({
dataset_selection()$id

})

dfamodules::mod_highlight_datatable_server("highlight_datatable_1",
admin_display_manifest,
manifest_submit,
selected_row_names,
"entityId")
"dataset_id")

# SUBMIT MODEL TO SYNAPSE
# make sure to submit using a manifest that has been run through date to string
Expand Down
3 changes: 2 additions & 1 deletion R/global.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# READ IN CONFIG
dcc_config <- readr::read_csv("https://raw.githubusercontent.com/Sage-Bionetworks/data_flow_config/example/dcc_config.csv",
dcc_config <- readr::read_csv("https://raw.githubusercontent.com/Sage-Bionetworks/data_flow_config/example-rfc-model/dcc_config.csv",
show_col_types = FALSE)


# TESTING FOR ONE DCC
# dcc_config <- readr::read_csv("https://raw.githubusercontent.com/Sage-Bionetworks/data_flow_config/example/dcc_config.csv",
# show_col_types = FALSE)
Expand Down
25 changes: 14 additions & 11 deletions inst/data_model/dataflow_component.csv
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
Attribute,Description,Valid Values,DependsOn,Properties,Required,Parent,DependsOn Component,Source,Validation Rules
dataFlow,Describes data flow status,,"Component, contributor, entityId, dataset_name, dataset, num_items, release_scheduled, embargo, standard_compliance, data_portal, released",,TRUE,DataType,,,
contributor,Storage project that dataset is contained in,,,,TRUE,DataProperty,,,str
entityId,Synapse unique ID,,,,TRUE,DataProperty,,,str
dataset_name,Unique dataset name,,,,TRUE,DataProperty,,,str
dataset,Type of dataset,,,,TRUE,DataProperty,,,str
dataFlow,Describes data flow status,,"Component, contributor, dataset_id, dataset_name, dataset_type, upload_date, num_items, dataset_size, scheduled_release_date, release_date, status, released_destinations, released, metadata_check, governance_compliance",,TRUE,DataType,,,
contributor,"The team of a contributing user. The team reflects the contributing institution in standard DCC setups).  Note that for DCCs picking option 1 in the standard DCC setup, the Synapse project name also reflects the contributing institutions and can be used instead of the team name.",,,,TRUE,DataProperty,,,str
dataset_id,Synapse ID of manifest folder,,,,TRUE,DataProperty,,,str
dataset_name,Name of dataset,,,,TRUE,DataProperty,,,str
dataset_type,Type of dataset,,,,TRUE,DataProperty,,,str
upload_date,Date when both data records and data files (if applicable) have been uploaded,,,,TRUE,DataProperty,,,date
num_items,Number of files contained in a manifest,,,,TRUE,DataProperty,,,int
release_scheduled,Date that a dataset is scheduled for release,,,,TRUE,DataProperty,,,date
embargo,Date that indicates when a dataset is released from embargo,,,,TRUE,DataProperty,,,date
standard_compliance,Has dataset passed standard validation checks,"TRUE, FALSE",,,TRUE,DataProperty,,,str
data_portal,"Has dataset been published to data portal
dataset_size,GB of data in a dataset,,,,TRUE,DataProperty,,,int
scheduled_release_date,Date that a dataset is scheduled for release,,,,TRUE,DataProperty,,,date
release_date,Date when a given dataset has actually been released,,,,TRUE,DataProperty,,,date
status,Data flow status of a dataset,"Not uploaded, uploaded, curated, quarantine, preprocessing, scheduled for release, ready for release, released",,,TRUE,DataProperty,,,str
released_destinations,Location that a dataset has been released to,"Not applicable, data portal, dbGaP",,,TRUE,DataProperty,,,str
released,"Whether or not a dataset has been released
","TRUE, FALSE",,,TRUE,DataProperty,,,str
released,"Has this dataset been released
","TRUE, FALSE",,,TRUE,DataProperty,,,str
metadata_check,Whether or not a dataset has passed DCA/Schematic metadata checks,"TRUE, FALSE",,,TRUE,DataProperty,,,str
governance_compliance,Whether or not all files in a manifest have passed governance requirements,"TRUE, FALSE",,,TRUE,DataProperty,,,str
Loading

0 comments on commit 4db0727

Please sign in to comment.