diff --git a/R/app_server.R b/R/app_server.R index 1cd4674..f18e586 100644 --- a/R/app_server.R +++ b/R/app_server.R @@ -67,9 +67,8 @@ app_server <- function( input, output, session ) { # Component = NA), icon = mod_select_dcc_out()$selected_dcc_config$icon, na_replace = list(num_items = "No Manifest", - release_scheduled = "Not Scheduled", - embargo = "No Embargo", - dataset = "No Manifest"), + scheduled_release_date = "Not Scheduled", + dataset_type = "No Manifest"), base_url = schematic_api_url) }) @@ -95,58 +94,16 @@ app_server <- function( input, output, session ) { config = dash_config_react()) }) - - # output$tstPrint <- renderPrint({ - # df_manifest_react() - # }) - - # PREPARE MANIFEST FOR DASH ########################################################### - - # add status to manifest - manifest_w_status <- shiny::reactive({ - manifest_dfa <- df_manifest_react() - - # add some columns to manifest to make logic easier - manifest <- manifest_dfa %>% - dplyr::mutate(scheduled = !is.na(release_scheduled), - no_embargo = is.na(embargo) || embargo < Sys.Date(), - past_due = !is.na(release_scheduled) && release_scheduled < Sys.Date()) - - # generate status variable based on some logic that defines various data flow statuses - status <- sapply(1:nrow(manifest), function(i) { - row <- manifest[i, ] - - if (row$scheduled == FALSE) { - status <- "not scheduled" - } else if (row$no_embargo == FALSE || row$standard_compliance == FALSE) { - status <- "quarantine" - } else if (row$no_embargo == TRUE & row$standard_compliance == TRUE & row$released == FALSE) { - status <- "quarantine (ready for release)" - } else if (row$released == TRUE) { - status <- "released" - } else { - NA - } - - status - }) - - # add status to manifest - manifest$data_flow_status <- status - - manifest - }) - # FILTER MANIFEST FOR DASH UI ########################################################### # prepare inputs for filter module filter_inputs <- shiny::reactive({ - contributor_choices <- unique(manifest_w_status()$contributor) - dataset_choices <- unique(manifest_w_status()$dataset) - release_daterange_start <- min(manifest_w_status()$release_scheduled, na.rm = TRUE) - release_daterange_end <- max(manifest_w_status()$release_scheduled, na.rm = TRUE) - status_choices <- unique(manifest_w_status()$data_flow_status) + contributor_choices <- unique(df_manifest_react()$contributor) + dataset_choices <- unique(df_manifest_react()$dataset_type) + release_daterange_start <- min(df_manifest_react()$scheduled_release_date, na.rm = TRUE) + release_daterange_end <- max(df_manifest_react()$scheduled_release_date, na.rm = TRUE) + status_choices <- unique(df_manifest_react()$status) list(contributor_choices, dataset_choices, @@ -166,7 +123,7 @@ app_server <- function( input, output, session ) { # FILTER MANIFEST FOR DASH SERVER #################################################### filtered_manifest <- dfamodules::mod_datatable_filters_server("datatable_filters_1", - manifest_w_status) + df_manifest_react) # DATASET DASH ####################################################################### @@ -187,7 +144,7 @@ app_server <- function( input, output, session ) { dfamodules::mod_distribution_server(id = "distribution_datatype", df = filtered_manifest, - group_by_var = "dataset", + group_by_var = "dataset_type", title = NULL, x_lab = "Type of dataset", y_lab = "Number of Datasets", @@ -276,21 +233,21 @@ app_server <- function( input, output, session ) { }) # STORAGE PROJECT SELECTION - + # have to capture in a reactive or else it will not work in select storage module # FIXME: Convert to reactive value? reactive_asset_view <- reactive({ mod_select_dcc_out()$selected_dcc_config$synapse_asset_view }) - + reactive_manifest_id <- reactive({ mod_select_dcc_out()$selected_dcc_config$manifest_dataset_id }) - + reactive_schema_url <- reactive({ mod_select_dcc_out()$selected_dcc_config$schema_url }) - + mod_select_storage_project_out <- dfamodules::mod_select_storage_project_server( id = "select_storage_project_1", asset_view = reactive_asset_view, @@ -339,21 +296,6 @@ app_server <- function( input, output, session ) { dash_config_react()) }) - # DISPLAY MANIFEST - admin_display_manifest <- shiny::reactive({ - - # rearrange manifest so it's more readable - manifest <- dfamodules::rearrange_dataframe(manifest_submit(), - names(dash_config_react())) - - # make columns factors - factor_cols <- dfamodules::get_colname_by_type(dash_config_react(), type = "drop_down_filter") - manifest[factor_cols] <- lapply(manifest[,factor_cols], factor) - - # return - manifest - }) - # get names of selected datasets selected_row_names <- shiny::reactive({ dataset_selection()$id @@ -361,9 +303,9 @@ app_server <- function( input, output, session ) { }) dfamodules::mod_highlight_datatable_server("highlight_datatable_1", - admin_display_manifest, + manifest_submit, selected_row_names, - "entityId") + "dataset_id") # SUBMIT MODEL TO SYNAPSE # make sure to submit using a manifest that has been run through date to string diff --git a/R/global.R b/R/global.R index 615b255..b3b325c 100644 --- a/R/global.R +++ b/R/global.R @@ -1,7 +1,8 @@ # READ IN CONFIG -dcc_config <- readr::read_csv("https://raw.githubusercontent.com/Sage-Bionetworks/data_flow_config/example/dcc_config.csv", +dcc_config <- readr::read_csv("https://raw.githubusercontent.com/Sage-Bionetworks/data_flow_config/example-rfc-model/dcc_config.csv", show_col_types = FALSE) + # TESTING FOR ONE DCC # dcc_config <- readr::read_csv("https://raw.githubusercontent.com/Sage-Bionetworks/data_flow_config/example/dcc_config.csv", # show_col_types = FALSE) diff --git a/inst/data_model/dataflow_component.csv b/inst/data_model/dataflow_component.csv index a2b5e60..6ee39e5 100644 --- a/inst/data_model/dataflow_component.csv +++ b/inst/data_model/dataflow_component.csv @@ -1,14 +1,17 @@ Attribute,Description,Valid Values,DependsOn,Properties,Required,Parent,DependsOn Component,Source,Validation Rules -dataFlow,Describes data flow status,,"Component, contributor, entityId, dataset_name, dataset, num_items, release_scheduled, embargo, standard_compliance, data_portal, released",,TRUE,DataType,,, -contributor,Storage project that dataset is contained in,,,,TRUE,DataProperty,,,str -entityId,Synapse unique ID,,,,TRUE,DataProperty,,,str -dataset_name,Unique dataset name,,,,TRUE,DataProperty,,,str -dataset,Type of dataset,,,,TRUE,DataProperty,,,str +dataFlow,Describes data flow status,,"Component, contributor, dataset_id, dataset_name, dataset_type, upload_date, num_items, dataset_size, scheduled_release_date, release_date, status, released_destinations, released, metadata_check, governance_compliance",,TRUE,DataType,,, +contributor,"The team of a contributing user. The team reflects the contributing institution in standard DCC setups).  Note that for DCCs picking option 1 in the standard DCC setup, the Synapse project name also reflects the contributing institutions and can be used instead of the team name.",,,,TRUE,DataProperty,,,str +dataset_id,Synapse ID of manifest folder,,,,TRUE,DataProperty,,,str +dataset_name,Name of dataset,,,,TRUE,DataProperty,,,str +dataset_type,Type of dataset,,,,TRUE,DataProperty,,,str +upload_date,Date when both data records and data files (if applicable) have been uploaded,,,,TRUE,DataProperty,,,date num_items,Number of files contained in a manifest,,,,TRUE,DataProperty,,,int -release_scheduled,Date that a dataset is scheduled for release,,,,TRUE,DataProperty,,,date -embargo,Date that indicates when a dataset is released from embargo,,,,TRUE,DataProperty,,,date -standard_compliance,Has dataset passed standard validation checks,"TRUE, FALSE",,,TRUE,DataProperty,,,str -data_portal,"Has dataset been published to data portal +dataset_size,GB of data in a dataset,,,,TRUE,DataProperty,,,int +scheduled_release_date,Date that a dataset is scheduled for release,,,,TRUE,DataProperty,,,date +release_date,Date when a given dataset has actually been released,,,,TRUE,DataProperty,,,date +status,Data flow status of a dataset,"Not uploaded, uploaded, curated, quarantine, preprocessing, scheduled for release, ready for release, released",,,TRUE,DataProperty,,,str +released_destinations,Location that a dataset has been released to,"Not applicable, data portal, dbGaP",,,TRUE,DataProperty,,,str +released,"Whether or not a dataset has been released ","TRUE, FALSE",,,TRUE,DataProperty,,,str -released,"Has this dataset been released -","TRUE, FALSE",,,TRUE,DataProperty,,,str \ No newline at end of file +metadata_check,Whether or not a dataset has passed DCA/Schematic metadata checks,"TRUE, FALSE",,,TRUE,DataProperty,,,str +governance_compliance,Whether or not all files in a manifest have passed governance requirements,"TRUE, FALSE",,,TRUE,DataProperty,,,str \ No newline at end of file diff --git a/inst/data_model/dataflow_component.jsonld b/inst/data_model/dataflow_component.jsonld index 9126097..1c32498 100644 --- a/inst/data_model/dataflow_component.jsonld +++ b/inst/data_model/dataflow_component.jsonld @@ -1995,31 +1995,43 @@ "@id": "bts:Contributor" }, { - "@id": "bts:EntityId" + "@id": "bts:DatasetId" }, { "@id": "bts:DatasetName" }, { - "@id": "bts:Dataset" + "@id": "bts:DatasetType" + }, + { + "@id": "bts:UploadDate" }, { "@id": "bts:NumItems" }, { - "@id": "bts:ReleaseScheduled" + "@id": "bts:DatasetSize" + }, + { + "@id": "bts:ScheduledReleaseDate" }, { - "@id": "bts:Embargo" + "@id": "bts:ReleaseDate" }, { - "@id": "bts:StandardCompliance" + "@id": "bts:Status" }, { - "@id": "bts:DataPortal" + "@id": "bts:ReleasedDestinations" }, { "@id": "bts:Released" + }, + { + "@id": "bts:MetadataCheck" + }, + { + "@id": "bts:GovernanceCompliance" } ], "sms:validationRules": [] @@ -2027,7 +2039,7 @@ { "@id": "bts:Contributor", "@type": "rdfs:Class", - "rdfs:comment": "Storage project that dataset is contained in", + "rdfs:comment": "The team of a contributing user. The team reflects the contributing institution in standard DCC setups).  Note that for DCCs picking option 1 in the standard DCC setup, the Synapse project name also reflects the contributing institutions and can be used instead of the team name.", "rdfs:label": "Contributor", "rdfs:subClassOf": [ { @@ -2044,10 +2056,10 @@ ] }, { - "@id": "bts:EntityId", + "@id": "bts:DatasetId", "@type": "rdfs:Class", - "rdfs:comment": "Synapse unique ID", - "rdfs:label": "EntityId", + "rdfs:comment": "Synapse ID of manifest folder", + "rdfs:label": "DatasetId", "rdfs:subClassOf": [ { "@id": "bts:DataProperty" @@ -2056,7 +2068,7 @@ "schema:isPartOf": { "@id": "http://schema.biothings.io" }, - "sms:displayName": "entityId", + "sms:displayName": "dataset_id", "sms:required": "sms:true", "sms:validationRules": [ "str" @@ -2065,7 +2077,7 @@ { "@id": "bts:DatasetName", "@type": "rdfs:Class", - "rdfs:comment": "Unique dataset name", + "rdfs:comment": "Name of dataset", "rdfs:label": "DatasetName", "rdfs:subClassOf": [ { @@ -2082,10 +2094,10 @@ ] }, { - "@id": "bts:Dataset", + "@id": "bts:DatasetType", "@type": "rdfs:Class", "rdfs:comment": "Type of dataset", - "rdfs:label": "Dataset", + "rdfs:label": "DatasetType", "rdfs:subClassOf": [ { "@id": "bts:DataProperty" @@ -2094,12 +2106,31 @@ "schema:isPartOf": { "@id": "http://schema.biothings.io" }, - "sms:displayName": "dataset", + "sms:displayName": "dataset_type", "sms:required": "sms:true", "sms:validationRules": [ "str" ] }, + { + "@id": "bts:UploadDate", + "@type": "rdfs:Class", + "rdfs:comment": "Date when both data records and data files (if applicable) have been uploaded", + "rdfs:label": "UploadDate", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "upload_date", + "sms:required": "sms:true", + "sms:validationRules": [ + "date" + ] + }, { "@id": "bts:NumItems", "@type": "rdfs:Class", @@ -2120,10 +2151,29 @@ ] }, { - "@id": "bts:ReleaseScheduled", + "@id": "bts:DatasetSize", + "@type": "rdfs:Class", + "rdfs:comment": "GB of data in a dataset", + "rdfs:label": "DatasetSize", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "dataset_size", + "sms:required": "sms:true", + "sms:validationRules": [ + "int" + ] + }, + { + "@id": "bts:ScheduledReleaseDate", "@type": "rdfs:Class", "rdfs:comment": "Date that a dataset is scheduled for release", - "rdfs:label": "ReleaseScheduled", + "rdfs:label": "ScheduledReleaseDate", "rdfs:subClassOf": [ { "@id": "bts:DataProperty" @@ -2132,17 +2182,17 @@ "schema:isPartOf": { "@id": "http://schema.biothings.io" }, - "sms:displayName": "release_scheduled", + "sms:displayName": "scheduled_release_date", "sms:required": "sms:true", "sms:validationRules": [ "date" ] }, { - "@id": "bts:Embargo", + "@id": "bts:ReleaseDate", "@type": "rdfs:Class", - "rdfs:comment": "Date that indicates when a dataset is released from embargo", - "rdfs:label": "Embargo", + "rdfs:comment": "Date when a given dataset has actually been released", + "rdfs:label": "ReleaseDate", "rdfs:subClassOf": [ { "@id": "bts:DataProperty" @@ -2151,17 +2201,92 @@ "schema:isPartOf": { "@id": "http://schema.biothings.io" }, - "sms:displayName": "embargo", + "sms:displayName": "release_date", "sms:required": "sms:true", "sms:validationRules": [ "date" ] }, { - "@id": "bts:StandardCompliance", + "@id": "bts:Status", "@type": "rdfs:Class", - "rdfs:comment": "Has dataset passed standard validation checks", - "rdfs:label": "StandardCompliance", + "rdfs:comment": "Data flow status of a dataset", + "rdfs:label": "Status", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Notuploaded" + }, + { + "@id": "bts:Uploaded" + }, + { + "@id": "bts:Curated" + }, + { + "@id": "bts:Quarantine" + }, + { + "@id": "bts:Preprocessing" + }, + { + "@id": "bts:Scheduledforrelease" + }, + { + "@id": "bts:Readyforrelease" + }, + { + "@id": "bts:Released" + } + ], + "sms:displayName": "status", + "sms:required": "sms:true", + "sms:validationRules": [ + "str" + ] + }, + { + "@id": "bts:ReleasedDestinations", + "@type": "rdfs:Class", + "rdfs:comment": "Location that a dataset has been released to", + "rdfs:label": "ReleasedDestinations", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Notapplicable" + }, + { + "@id": "bts:Dataportal" + }, + { + "@id": "bts:DbGaP" + } + ], + "sms:displayName": "released_destinations", + "sms:required": "sms:true", + "sms:validationRules": [ + "str" + ] + }, + { + "@id": "bts:Released", + "@type": "rdfs:Class", + "rdfs:comment": "Whether or not a dataset has been released\n", + "rdfs:label": "Released", "rdfs:subClassOf": [ { "@id": "bts:DataProperty" @@ -2178,17 +2303,17 @@ "@id": "bts:FALSE" } ], - "sms:displayName": "standard_compliance", + "sms:displayName": "released", "sms:required": "sms:true", "sms:validationRules": [ "str" ] }, { - "@id": "bts:DataPortal", + "@id": "bts:MetadataCheck", "@type": "rdfs:Class", - "rdfs:comment": "Has dataset been published to data portal\n", - "rdfs:label": "DataPortal", + "rdfs:comment": "Whether or not a dataset has passed DCA/Schematic metadata checks", + "rdfs:label": "MetadataCheck", "rdfs:subClassOf": [ { "@id": "bts:DataProperty" @@ -2205,17 +2330,17 @@ "@id": "bts:FALSE" } ], - "sms:displayName": "data_portal", + "sms:displayName": "metadata_check", "sms:required": "sms:true", "sms:validationRules": [ "str" ] }, { - "@id": "bts:Released", + "@id": "bts:GovernanceCompliance", "@type": "rdfs:Class", - "rdfs:comment": "Has this dataset been released\n", - "rdfs:label": "Released", + "rdfs:comment": "Whether or not all files in a manifest have passed governance requirements", + "rdfs:label": "GovernanceCompliance", "rdfs:subClassOf": [ { "@id": "bts:DataProperty" @@ -2232,7 +2357,7 @@ "@id": "bts:FALSE" } ], - "sms:displayName": "released", + "sms:displayName": "governance_compliance", "sms:required": "sms:true", "sms:validationRules": [ "str" @@ -2255,6 +2380,176 @@ "sms:required": "sms:false", "sms:validationRules": [] }, + { + "@id": "bts:Notuploaded", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Notuploaded", + "rdfs:subClassOf": [ + { + "@id": "bts:Status" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Not uploaded", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Uploaded", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Uploaded", + "rdfs:subClassOf": [ + { + "@id": "bts:Status" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "uploaded", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Curated", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Curated", + "rdfs:subClassOf": [ + { + "@id": "bts:Status" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "curated", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Quarantine", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Quarantine", + "rdfs:subClassOf": [ + { + "@id": "bts:Status" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "quarantine", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Preprocessing", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Preprocessing", + "rdfs:subClassOf": [ + { + "@id": "bts:Status" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "preprocessing", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Scheduledforrelease", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Scheduledforrelease", + "rdfs:subClassOf": [ + { + "@id": "bts:Status" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "scheduled for release", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Readyforrelease", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Readyforrelease", + "rdfs:subClassOf": [ + { + "@id": "bts:Status" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "ready for release", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Notapplicable", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Notapplicable", + "rdfs:subClassOf": [ + { + "@id": "bts:ReleasedDestinations" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Not applicable", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Dataportal", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Dataportal", + "rdfs:subClassOf": [ + { + "@id": "bts:ReleasedDestinations" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "data portal", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:DbGaP", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "DbGaP", + "rdfs:subClassOf": [ + { + "@id": "bts:ReleasedDestinations" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "dbGaP", + "sms:required": "sms:false", + "sms:validationRules": [] + }, { "@id": "bts:TRUE", "@type": "rdfs:Class", @@ -2262,7 +2557,7 @@ "rdfs:label": "TRUE", "rdfs:subClassOf": [ { - "@id": "bts:StandardCompliance" + "@id": "bts:Released" } ], "schema:isPartOf": { @@ -2279,7 +2574,7 @@ "rdfs:label": "FALSE", "rdfs:subClassOf": [ { - "@id": "bts:StandardCompliance" + "@id": "bts:Released" } ], "schema:isPartOf": { diff --git a/renv.lock b/renv.lock index e2a81f4..1b4bc7f 100644 --- a/renv.lock +++ b/renv.lock @@ -422,8 +422,8 @@ "RemoteHost": "api.github.com", "RemoteUsername": "Sage-Bionetworks", "RemoteRepo": "dfamodules", - "RemoteRef": "dev-select-dcc", - "RemoteSha": "6408158f9d10faa9768ea3fa430c16c2596c765a", + "RemoteRef": "dev-mt", + "RemoteSha": "9b2970726da9c05bd2387a344e3711b11530c3e3", "Requirements": [ "DT", "dplyr", @@ -440,7 +440,7 @@ "shinyjs", "waiter" ], - "Hash": "9a0115bf1fcb2628675cfe28068ff019" + "Hash": "687267390464e19c546ab3371c383fff" }, "diffobj": { "Package": "diffobj",