Skip to content

Commit

Permalink
New index on jenkins-niaid (#2893)
Browse files Browse the repository at this point in the history
Co-authored-by: Hara Prasad <haraprasadj@uchicago.edu>
  • Loading branch information
atharvar28 and haraprasadj authored Jun 17, 2024
1 parent 7b9f594 commit 778f1c8
Show file tree
Hide file tree
Showing 3 changed files with 267 additions and 134 deletions.
197 changes: 152 additions & 45 deletions jenkins-niaid.planx-pla.net/etlMapping.yaml
Original file line number Diff line number Diff line change
@@ -1,56 +1,163 @@
mappings:
- name: jenkins-niaid.planx-pla.net_clinical_trials
doc_type: clinical_trials
- name: jenkins-niaid.planx-pla.net_subject
doc_type: subject
type: aggregator
root: core_metadata_collection
root: subject
props:
- name: creator
- name: nct_number
- name: condition
- name: category
- name: study_design_allocation
- name: study_design_intervention_model
- name: study_design_masking
- name: study_design_primary_purpose
- name: responsible_party
- name: study_start_date
- name: study_completion_date
- name: data_available
- name: data_availability_date
- name: data_available_for_request
- name: clinical_trial_website
- name: publications
- name: brief_summary
- name: description
- name: title
- name: project_id
- name: cmc_unique_id
src: submitter_id
- name: jenkins-niaid.planx-pla.net_ctfile
doc_type: ctfile
type: aggregator
root: clinical_trial_file
props:
- name: object_id
- name: md5sum
- name: file_name
- name: file_size
- name: data_format
- name: data_type
- name: submitter_id
- name: project_id
- name: sex
- name: age_value
- name: ancestry
- name: disease_description
- name: phenotype_present
- name: phenotype_absent
- name: disease_id
- name: solve_state
- name: congenital_status
- name: age_of_onset
- name: phenotype_group
- name: anvil_project_id
# flatten_props:
aggregated_props:
- name: sample_provider
path: samples
src: sample_provider
fn: set
- name: tissue_affected_status
path: samples
src: tissue_affected_status
fn: set
- name: tissue_type
path: samples
src: tissue_type
fn: set
- name: sample_type
path: samples
src: sample_type
fn: set
- name: original_material_type
path: samples
src: original_material_type
fn: set
- name: exome_capture_platform
path: samples.sequencings
src: exome_capture_platform
fn: set
- name: library_prep_kit_method
path: samples.sequencings
src: library_prep_kit_method
fn: set
- name: capture_region_bed_file
path: samples.sequencings
src: capture_region_bed_file
fn: set
- name: reference_genome_build
path: samples.sequencings
src: reference_genome_build
fn: set
- name: sequencing_assay
path: samples.sequencings
src: sequencing_assay
fn: set
- name: alignment_method
path: samples.sequencings
src: alignment_method
fn: set
- name: data_processing_pipeline
path: samples.sequencings
src: data_processing_pipeline
fn: set
- name: _samples_count
path: samples
fn: count
- name: _sequencings_count
path: samples.sequencings
fn: count
joining_props:
- index: file
join_on: _subject_id
props:
- name: data_format
src: data_format
fn: set
- name: data_type
src: data_type
fn: set
- name: data_category
src: data_category
fn: set
- name: file_count
src: _file_id
fn: count
parent_props:
- path: core_metadata_collections[cmc_unique_id:submitter_id]
- name: jenkins-niaid.planx-pla.net_oafile
doc_type: oafile
type: aggregator
root: open_access_doc
- path: projects[project_code:code]
- path: projects[project_name:name]
- path: projects[project_dbgap_phs:dbgap_phs]
- path: projects[project_dbgap_consent_text:dbgap_consent_text]
- path: projects[project_short_name:short_name]
- path: projects[project_dbgap_accession_number:dbgap_accession_number]
- name: jenkins-niaid.planx-pla.net_file
doc_type: file
type: collector
root: None
category: data_file
props:
- name: project_id
- name: submitter_id
- name: object_id
- name: md5sum
- name: file_name
- name: file_size
- name: data_format
- name: data_type
- name: doc_url
- name: project_id
parent_props:
- path: core_metadata_collections[cmc_unique_id:submitter_id]
- name: state
- name: data_category
- name: analyte_type
- name: sequencing_assay
- name: source_node
injecting_props:
subject:
props:
- name: _subject_id
src: id
fn: set
- name: subject_submitter_id
src: submitter_id
fn: set
- name: anvil_project_id
src: anvil_project_id
fn: set
- name: sex
src: sex
fn: set
- name: age_value
src: age_value
fn: set
- name: ancestry
src: ancestry
fn: set
- name: disease_description
src: disease_description
fn: set
- name: phenotype_present
src: phenotype_present
fn: set
- name: phenotype_absent
src: phenotype_absent
fn: set
- name: disease_id
src: disease_id
fn: set
- name: solve_state
src: solve_state
fn: set
- name: congenital_status
src: congenital_status
fn: set
- name: age_of_onset
src: age_of_onset
fn: set
- name: phenotype_group
src: phenotype_group
fn: set
4 changes: 2 additions & 2 deletions jenkins-niaid.planx-pla.net/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@
"environment": "qaplanetv1",
"hostname": "jenkins-niaid.planx-pla.net",
"revproxy_arn": "arn:aws:acm:us-east-1:707767160287:certificate/c676c81c-9546-4e9a-9a72-725dd3912bc8",
"dictionary_url": "https://s3.amazonaws.com/dictionary-artifacts/niaid-clinical-trials-dictionary/master/schema.json",
"dictionary_url": "https://s3.amazonaws.com/dictionary-artifacts/anvil/master/schema.json",
"portal_app": "gitops",
"kube_bucket": "kube-qaplanetv1-gen3",
"logs_bucket": "logs-qaplanetv1-gen3",
Expand All @@ -301,7 +301,7 @@
"guppy": {
"indices": [
{
"index": "jenkins_subject_alias",
"index": "jenkins_subject_new_alias",
"type": "subject"
},
{
Expand Down
Loading

0 comments on commit 778f1c8

Please sign in to comment.