From 67df936af73222c9eaae24a353d1b29584872a25 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Mon, 10 Jun 2024 12:38:02 +0200 Subject: [PATCH 1/5] fix args bug --- scripts/generate_anglerfish_samplesheet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generate_anglerfish_samplesheet.py b/scripts/generate_anglerfish_samplesheet.py index 582a38a3..ed0da6ac 100644 --- a/scripts/generate_anglerfish_samplesheet.py +++ b/scripts/generate_anglerfish_samplesheet.py @@ -181,7 +181,7 @@ def main(): logging.info(f"Script called with arguments: \n\t{args_str}") try: - file_name = generate_anglerfish_samplesheet(process, args) + file_name = generate_anglerfish_samplesheet(process) logging.info("Uploading samplesheet to LIMS...") upload_file( From 1ba2155b7783fb3bb806af642fac0b6c2c13585e Mon Sep 17 00:00:00 2001 From: kedhammar Date: Mon, 10 Jun 2024 12:38:29 +0200 Subject: [PATCH 2/5] use db query to link samples and indices --- scripts/generate_minknow_samplesheet.py | 90 ++++++++++++++++++++----- 1 file changed, 75 insertions(+), 15 deletions(-) diff --git a/scripts/generate_minknow_samplesheet.py b/scripts/generate_minknow_samplesheet.py index f2d58a7c..875cad4e 100644 --- a/scripts/generate_minknow_samplesheet.py +++ b/scripts/generate_minknow_samplesheet.py @@ -9,6 +9,8 @@ from datetime import datetime as dt import pandas as pd +import psycopg2 +import yaml from genologics.config import BASEURI, PASSWORD, USERNAME from genologics.entities import Artifact, Process from genologics.lims import Lims @@ -24,6 +26,9 @@ TIMESTAMP = dt.now().strftime("%y%m%d_%H%M%S") SCRIPT_NAME: str = os.path.basename(__file__).split(".")[0] +with open("/opt/gls/clarity/users/glsai/config/genosqlrc.yaml") as f: + config = yaml.safe_load(f) + def get_ont_library_contents( ont_library: Artifact, @@ -38,10 +43,15 @@ def get_ont_library_contents( """ + # Link ONT barcode well to ONT barcode ont_barcode_well2label = {} for ont_barcode_dict in ONT_BARCODES: ont_barcode_well2label[ont_barcode_dict["well"]] = ont_barcode_dict["label"] + # Link samples to reagent_labels, if applicable + if len(ont_library.reagent_labels) > 1: + sample2label = get_pool_sample_label_mapping(ont_library) + logging.info( f"Compiling sample-level information for library '{ont_library.name}'..." ) @@ -87,17 +97,15 @@ def get_ont_library_contents( library_contents_msg += f"\n\t - '{ont_pooling_input.name}': Illumina indexed pool with ONT-barcode '{ont_barcode}'" - for sample, illumina_index in zip( - ont_pooling_input.samples, ont_pooling_input.reagent_labels - ): - library_contents_msg += f"\n\t\t - '{sample.name}': Illumina sample with index '{illumina_index}'." + for sample in ont_pooling_input.samples: + library_contents_msg += f"\n\t\t - '{sample.name}': Illumina sample with index '{sample2label[sample.name]}'." rows.append( { "sample_name": sample.name, "sample_id": sample.id, "project_name": sample.project.name, "project_id": sample.project.id, - "illumina_index": illumina_index, + "illumina_index": sample2label[sample.name], "illumina_pool_name": ont_pooling_input.name, "illumina_pool_id": ont_pooling_input.id, "ont_barcode": ont_barcode, @@ -114,17 +122,15 @@ def get_ont_library_contents( ), f"ONT-pooling input '{ont_pooling_input.name}' lacks any reagent labels. Mixing barcoded and non-barcoded samples is not allowed." # ONT barcode-level demultiplexing - for ont_sample, ont_barcode in zip( - ont_pooling_input.samples, ont_pooling_input.reagent_labels - ): - library_contents_msg += f"\n\t - '{ont_pooling_input.name}': ONT sample with barcode '{ont_barcode}'" + for ont_sample in ont_pooling_input.samples: + library_contents_msg += f"\n\t - '{ont_pooling_input.name}': ONT sample with barcode '{sample2label[sample.name]}'" rows.append( { "sample_name": ont_sample.name, "sample_id": ont_sample.id, "project_name": ont_sample.project.name, "project_id": ont_sample.project.id, - "ont_barcode": ont_barcode, + "ont_barcode": sample2label[sample.name], "ont_pool_name": ont_pooling_output.name, "ont_pool_id": ont_pooling_output.id, } @@ -143,17 +149,16 @@ def get_ont_library_contents( if len(ont_library.reagent_labels) > 0: # Remaining possibilities: # (3) Illumina-indexes only - for sample, illumina_index in zip( - ont_library.samples, ont_library.reagent_labels - ): - library_contents_msg += f"\n - '{sample.name}': Illumina sample with index '{illumina_index}'." + + for sample in ont_library.samples: + library_contents_msg += f"\n - '{sample.name}': Illumina sample with index '{sample2label[sample.name]}'." rows.append( { "sample_name": sample.name, "sample_id": sample.id, "project_name": sample.project.name, "project_id": sample.project.id, - "illumina_index": illumina_index, + "illumina_index": sample2label[sample.name], "illumina_pool_name": ont_library.name, "illumina_pool_id": ont_library.id, } @@ -188,6 +193,61 @@ def get_ont_library_contents( return df +def get_pool_sample_label_mapping(pool: Artifact) -> dict[str, str]: + # Setup DB connection + connection = psycopg2.connect( + user=config["username"], + host=config["url"], + database=config["db"], + password=config["password"], + ) + cursor = connection.cursor() + + # Find all reagent labels linked to 'analyte' type artifacts matching the given name + query = """ + select + distinct( rl.name ) + from + reagentlabel rl, + artifact art, + artifact_label_map alm + where + rl.labelid = alm.labelid + and art.artifactid = alm.artifactid + and art.artifacttypeid = 2 + and art.name = '{}'; + """ + + errors = False + sample2label = {} + for sample in pool.samples: + try: + cursor.execute(query.format(sample.name)) + query_results = cursor.fetchall() + + assert ( + len(query_results) != 0 + ), f"No reagent labels found for sample '{sample.name}'." + assert ( + len(query_results) == 1 + ), f"Multiple reagent labels found for sample '{sample.name}'." + + label = query_results[0][0] + sample2label[sample.name] = label + except AssertionError as e: + logging.error(str(e), exc_info=True) + logging.warning(f"Skipping sample '{sample.name}' due to error.") + errors = True + continue + + if errors: + raise AssertionError( + "Errors occurred when linking samples and indices. Please report this error." + ) + else: + return sample2label + + def get_kit_string(process: Process) -> str: """Combine prep kit and expansion kit UDFs (if any) into space-separated string""" prep_kit = process.udf.get("ONT prep kit") From ba3f1a9adb67156ed45da378c37420b5917d2d8e Mon Sep 17 00:00:00 2001 From: kedhammar Date: Mon, 10 Jun 2024 12:49:43 +0200 Subject: [PATCH 3/5] grab run name from artifact, not process --- scripts/generate_anglerfish_samplesheet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generate_anglerfish_samplesheet.py b/scripts/generate_anglerfish_samplesheet.py index ed0da6ac..bac0491b 100644 --- a/scripts/generate_anglerfish_samplesheet.py +++ b/scripts/generate_anglerfish_samplesheet.py @@ -67,7 +67,7 @@ def generate_anglerfish_samplesheet(process): df_anglerfish = df[["sample_name", "adaptor_type", "index_seq", "fastq_path"]] # Get run name from LIMS field - run_name = process.udf.get("ONT run name") + run_name = ont_library.udf.get("ONT run name") file_name = f"anglerfish_samplesheet_{run_name}_{TIMESTAMP}.csv" df_anglerfish.to_csv( From 40e37acff421d06409e4ea9dac29e44e77b8e78b Mon Sep 17 00:00:00 2001 From: kedhammar Date: Mon, 10 Jun 2024 16:26:31 +0200 Subject: [PATCH 4/5] bump docs --- scripts/generate_minknow_samplesheet.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/generate_minknow_samplesheet.py b/scripts/generate_minknow_samplesheet.py index 875cad4e..25122426 100644 --- a/scripts/generate_minknow_samplesheet.py +++ b/scripts/generate_minknow_samplesheet.py @@ -37,7 +37,7 @@ def get_ont_library_contents( ) -> pd.DataFrame: """For an ONT sequencing library, compile a dataframe with sample-level information. - Will backtrack the library to previous pooling step (if any) to elucidate + Will backtrack the library to previous ONT pooling step (if any) to elucidate sample and index information and decide whether to demultiplex at the level of ONT barcodes, Illumina indices, both or neither. @@ -48,7 +48,7 @@ def get_ont_library_contents( for ont_barcode_dict in ONT_BARCODES: ont_barcode_well2label[ont_barcode_dict["well"]] = ont_barcode_dict["label"] - # Link samples to reagent_labels, if applicable + # Link samples to reagent_labels via database queries, if applicable if len(ont_library.reagent_labels) > 1: sample2label = get_pool_sample_label_mapping(ont_library) From f0a9db9b6fb5a02423b38e4015f52f7b43b134f3 Mon Sep 17 00:00:00 2001 From: kedhammar Date: Mon, 10 Jun 2024 16:27:29 +0200 Subject: [PATCH 5/5] bump vlog --- VERSIONLOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/VERSIONLOG.md b/VERSIONLOG.md index 26aa6a61..d465f1fc 100644 --- a/VERSIONLOG.md +++ b/VERSIONLOG.md @@ -1,5 +1,9 @@ # Scilifelab_epps Version Log +## 20240610.1 + +When parsing ONT sequencing libraries, use database queries to link pool samples to their respective labels. + ## 20240530.1 Support VC100 in logbook