From 778f1c894232ab3b7c56185138f2704abc92f9b6 Mon Sep 17 00:00:00 2001 From: Atharva Rane <41084525+atharvar28@users.noreply.github.com> Date: Mon, 17 Jun 2024 17:58:26 -0400 Subject: [PATCH] New index on jenkins-niaid (#2893) Co-authored-by: Hara Prasad --- jenkins-niaid.planx-pla.net/etlMapping.yaml | 197 +++++++++++++---- jenkins-niaid.planx-pla.net/manifest.json | 4 +- .../portal/gitops.json | 200 ++++++++++-------- 3 files changed, 267 insertions(+), 134 deletions(-) diff --git a/jenkins-niaid.planx-pla.net/etlMapping.yaml b/jenkins-niaid.planx-pla.net/etlMapping.yaml index 9b3d75e26..b2f08f641 100644 --- a/jenkins-niaid.planx-pla.net/etlMapping.yaml +++ b/jenkins-niaid.planx-pla.net/etlMapping.yaml @@ -1,56 +1,163 @@ mappings: - - name: jenkins-niaid.planx-pla.net_clinical_trials - doc_type: clinical_trials + - name: jenkins-niaid.planx-pla.net_subject + doc_type: subject type: aggregator - root: core_metadata_collection + root: subject props: - - name: creator - - name: nct_number - - name: condition - - name: category - - name: study_design_allocation - - name: study_design_intervention_model - - name: study_design_masking - - name: study_design_primary_purpose - - name: responsible_party - - name: study_start_date - - name: study_completion_date - - name: data_available - - name: data_availability_date - - name: data_available_for_request - - name: clinical_trial_website - - name: publications - - name: brief_summary - - name: description - - name: title - - name: project_id - - name: cmc_unique_id - src: submitter_id - - name: jenkins-niaid.planx-pla.net_ctfile - doc_type: ctfile - type: aggregator - root: clinical_trial_file - props: - - name: object_id - - name: md5sum - - name: file_name - - name: file_size - - name: data_format - - name: data_type + - name: submitter_id - name: project_id + - name: sex + - name: age_value + - name: ancestry + - name: disease_description + - name: phenotype_present + - name: phenotype_absent + - name: disease_id + - name: solve_state + - name: congenital_status + - name: age_of_onset + - name: phenotype_group + - name: anvil_project_id +# flatten_props: + aggregated_props: + - name: sample_provider + path: samples + src: sample_provider + fn: set + - name: tissue_affected_status + path: samples + src: tissue_affected_status + fn: set + - name: tissue_type + path: samples + src: tissue_type + fn: set + - name: sample_type + path: samples + src: sample_type + fn: set + - name: original_material_type + path: samples + src: original_material_type + fn: set + - name: exome_capture_platform + path: samples.sequencings + src: exome_capture_platform + fn: set + - name: library_prep_kit_method + path: samples.sequencings + src: library_prep_kit_method + fn: set + - name: capture_region_bed_file + path: samples.sequencings + src: capture_region_bed_file + fn: set + - name: reference_genome_build + path: samples.sequencings + src: reference_genome_build + fn: set + - name: sequencing_assay + path: samples.sequencings + src: sequencing_assay + fn: set + - name: alignment_method + path: samples.sequencings + src: alignment_method + fn: set + - name: data_processing_pipeline + path: samples.sequencings + src: data_processing_pipeline + fn: set + - name: _samples_count + path: samples + fn: count + - name: _sequencings_count + path: samples.sequencings + fn: count + joining_props: + - index: file + join_on: _subject_id + props: + - name: data_format + src: data_format + fn: set + - name: data_type + src: data_type + fn: set + - name: data_category + src: data_category + fn: set + - name: file_count + src: _file_id + fn: count parent_props: - - path: core_metadata_collections[cmc_unique_id:submitter_id] - - name: jenkins-niaid.planx-pla.net_oafile - doc_type: oafile - type: aggregator - root: open_access_doc + - path: projects[project_code:code] + - path: projects[project_name:name] + - path: projects[project_dbgap_phs:dbgap_phs] + - path: projects[project_dbgap_consent_text:dbgap_consent_text] + - path: projects[project_short_name:short_name] + - path: projects[project_dbgap_accession_number:dbgap_accession_number] + - name: jenkins-niaid.planx-pla.net_file + doc_type: file + type: collector + root: None + category: data_file props: + - name: project_id + - name: submitter_id + - name: object_id - name: md5sum - name: file_name - name: file_size - name: data_format - name: data_type - - name: doc_url - - name: project_id - parent_props: - - path: core_metadata_collections[cmc_unique_id:submitter_id] + - name: state + - name: data_category + - name: analyte_type + - name: sequencing_assay + - name: source_node + injecting_props: + subject: + props: + - name: _subject_id + src: id + fn: set + - name: subject_submitter_id + src: submitter_id + fn: set + - name: anvil_project_id + src: anvil_project_id + fn: set + - name: sex + src: sex + fn: set + - name: age_value + src: age_value + fn: set + - name: ancestry + src: ancestry + fn: set + - name: disease_description + src: disease_description + fn: set + - name: phenotype_present + src: phenotype_present + fn: set + - name: phenotype_absent + src: phenotype_absent + fn: set + - name: disease_id + src: disease_id + fn: set + - name: solve_state + src: solve_state + fn: set + - name: congenital_status + src: congenital_status + fn: set + - name: age_of_onset + src: age_of_onset + fn: set + - name: phenotype_group + src: phenotype_group + fn: set \ No newline at end of file diff --git a/jenkins-niaid.planx-pla.net/manifest.json b/jenkins-niaid.planx-pla.net/manifest.json index a6ca0514e..65beffb21 100644 --- a/jenkins-niaid.planx-pla.net/manifest.json +++ b/jenkins-niaid.planx-pla.net/manifest.json @@ -280,7 +280,7 @@ "environment": "qaplanetv1", "hostname": "jenkins-niaid.planx-pla.net", "revproxy_arn": "arn:aws:acm:us-east-1:707767160287:certificate/c676c81c-9546-4e9a-9a72-725dd3912bc8", - "dictionary_url": "https://s3.amazonaws.com/dictionary-artifacts/niaid-clinical-trials-dictionary/master/schema.json", + "dictionary_url": "https://s3.amazonaws.com/dictionary-artifacts/anvil/master/schema.json", "portal_app": "gitops", "kube_bucket": "kube-qaplanetv1-gen3", "logs_bucket": "logs-qaplanetv1-gen3", @@ -301,7 +301,7 @@ "guppy": { "indices": [ { - "index": "jenkins_subject_alias", + "index": "jenkins_subject_new_alias", "type": "subject" }, { diff --git a/jenkins-niaid.planx-pla.net/portal/gitops.json b/jenkins-niaid.planx-pla.net/portal/gitops.json index 0a5866657..a45c6453c 100644 --- a/jenkins-niaid.planx-pla.net/portal/gitops.json +++ b/jenkins-niaid.planx-pla.net/portal/gitops.json @@ -494,22 +494,19 @@ }, "studyViewerConfig": [ { - "dataType": "clinical_trials", + "dataType": "subject", "title": "Studies", - "titleField": "title", - "rowAccessor": "cmc_unique_id", + "titleField": "project_id", + "rowAccessor": "submitter_id", "listItemConfig": { - "blockFields": ["brief_summary"], - "tableFields": ["data_availability_date", "data_available", "creator", "nct_number", "condition", "category", "clinical_trial_website", "publications"] + "blockFields": ["project_id"], + "tableFields": ["data_type"] }, "fieldMapping": [ - { "field": "brief_summary", "name": "Brief Study Description" }, - { "field": "description", "name": "Detailed Description"}, - { "field": "creator", "name": "Sponsor"}, { "field": "category", "name": "Study Type"}, - { "field": "clinical_trial_website", "name": "Websites"}, - { "field": "nct_number", "name": "NCT Number"}, - { "field": "publications", "name": "Study Publications"} + { "field": "project_id", "name": "Project_ID"}, + { "field": "data_type", "name": "Data_Type"}, + { "field": "condition", "name": "Condition"} ], "openMode": "close-all", "buttons": [ @@ -527,13 +524,18 @@ ], "useArboristUI": true, "dataExplorerConfig": { + "charts": { + "project_id": { + "chartType": "count", + "title": "Projects" + } + }, "filters": { "tabs": [ { "title": "Files", "fields": [ "project_id", - "file_size", "data_format", "data_type" ] @@ -544,22 +546,33 @@ "enabled": true, "fields": [ "project_id", - "file_name", - "file_size", "data_format" ] }, + "dropdowns": { + "download": { + "title": "Download" + } + }, "guppyConfig": { - "dataType": "ctfile", - "nodeCountTitle": "Files", + "dataType": "subject", + "nodeCountTitle": "Subjects", "fieldMapping": [ + { + "field": "_subject_id", + "name": "Subject ID" + } ], "manifestMapping": { - "resourceIndexType": "ctfile", + "resourceIndexType": "file", "resourceIdField": "object_id", - "referenceIdFieldInResourceIndex": "project_id", - "referenceIdFieldInDataIndex": "project_id" - } + "referenceIdFieldInResourceIndex": "_subject_id", + "referenceIdFieldInDataIndex": "_subject_id" + }, + "accessibleFieldCheckList": [ + "project_id" + ], + "accessibleValidationField": "project_id" }, "buttons": [ { @@ -578,89 +591,102 @@ } ] }, - "arrangerConfig": { + "fileExplorerConfig": { "charts": { - "project_id": { - "chartType": "count", - "title": "Projects" - }, - "submitter_id": { - "chartType": "count", - "title": "Subjects" + "data_type": { + "chartType": "stackedBar", + "title": "File Type" }, - "gender": { - "chartType": "pie", - "title": "Gender" - }, - "race": { - "chartType": "bar", - "title": "Race" - }, - "ethnicity": { - "chartType": "bar", - "title": "Ethnicity" + "data_format": { + "chartType": "stackedBar", + "title": "File Format" } }, "filters": { "tabs": [ { - "title": "Subject", - "fields":[ - "project_id", - "gender", - "race", - "ethnicity", - "vital_status", - "frstdthd" - ] - }, { - "title": "Diagnosis", + "title": "File", "fields": [ - "arthxbase", - "bshbvstat", - "bshcvstat", - "cd4nadir", - "status", - "virus_type", - "virus_subtype" + "project_id", + "data_category", + "data_type", + "data_format" + ], + "searchFields": [ + "file_name" ] } ] }, - "manifestMapping": { - "resourceIndexType": "file", - "resourceIdField": "object_id", - "referenceIdFieldInResourceIndex": "subject_id", - "referenceIdFieldInDataIndex": "node_id" - }, "table": { - "buttons": [ + "enabled": true, + "fields": [ + "project_id", + "file_name", + "file_size", + "object_id" ] + }, + "guppyConfig": { + "dataType": "file", + "fieldMapping": [ { - "enabled": true, - "type": "data", - "title": "Download Clinical", - "leftIcon": "user", - "rightIcon": "download", - "fileName": "clinical.json" + "field": "object_id", + "name": "GUID" }, { - "enabled": true, - "type": "manifest", - "title": "Download Manifest", - "leftIcon": "datafile", - "rightIcon": "download", - "fileName": "manifest.json" - },{ - "enabled": false, - "type": "export-to-workspace", - "title": "Export to Workspace", - "leftIcon": "datafile", - "rightIcon": "download" - } - ] + "field": "programs_name", + "name": "Program" + } + ], + "nodeCountTitle": "Files", + "manifestMapping": { + "resourceIndexType": "subject", + "resourceIdField": "_subject_id", + "referenceIdFieldInResourceIndex": "object_id", + "referenceIdFieldInDataIndex": "object_id" + }, + "accessibleFieldCheckList": [ + "project_id" + ], + "accessibleValidationField": "project_id", + "downloadAccessor": "object_id" }, - "projectId": "search", - "graphqlField": "subject", - "index": "" + "buttons": [ + { + "enabled": true, + "type": "file-manifest", + "title": "Download Manifest", + "leftIcon": "datafile", + "rightIcon": "download", + "fileName": "file-manifest.json", + "dropdownId": "download" + }, + { + "enabled": true, + "type": "export-files-to-workspace", + "title": "Export to Workspace", + "leftIcon": "datafile", + "rightIcon": "download" + }, + { + "enabled": true, + "type": "export-files-to-pfb", + "title": "Export All to PFB", + "rightIcon": "external-link" + }, + { + "enabled": true, + "type": "export-files", + "title": "Export All to Terra", + "rightIcon": "external-link" + }, + { + "enabled": true, + "type": "export-files-to-seven-bridges", + "title": "Export All to Seven Bridges", + "rightIcon": "external-link" + } + ], + "dropdowns": {} } }