From afc89fbb5701c098b9b9946606b1265eba733704 Mon Sep 17 00:00:00 2001 From: RobbinBouwmeester Date: Mon, 18 Sep 2023 16:44:36 +0200 Subject: [PATCH] Fix modifications on peptides that are too long --- CHANGELOG.md | 4 ++++ deeplc/feat_extractor.py | 28 ++++++++++++++++++---------- setup.py | 2 +- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f70fd14..df45379 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +# [2.2.17] - 2023-09-18 + +- Fix peptides that are too long and their modifcations + # [2.2.16] - 2023-09-18 - bump version diff --git a/deeplc/feat_extractor.py b/deeplc/feat_extractor.py index e32ab3e..6a6cb93 100644 --- a/deeplc/feat_extractor.py +++ b/deeplc/feat_extractor.py @@ -444,7 +444,7 @@ def rolling_sum(a, n=2): if seq_len > padding_length: seq = seq[0:padding_length] seq_len = len(seq) - logger.debug( + logger.warning( "Truncating peptide (too long): %s" % (seq)) peptide_composition = [mass.std_aa_comp[aa] for aa in seq] @@ -462,8 +462,8 @@ def rolling_sum(a, n=2): for k, v in position_composition.items(): try: matrix[i, dict_index[k]] = v - except KeyError: - logger.debug(f"Could not add the following value: {v}") + except warning: + logger.warning(f"Could not add the following value: {v}") for p in positions_pos: aa = seq[p] @@ -471,7 +471,9 @@ def rolling_sum(a, n=2): try: matrix_pos[p, dict_index_pos[atom]] = val except KeyError: - logger.debug(f"Could not add the following atom: {atom}") + logger.warning(f"Could not add the following atom: {atom}") + except IndexError: + logger.warning(f"Could not add the following atom: {p} {atom} {val}") for pn in positions_neg: aa = seq[seq_len + pn] @@ -479,22 +481,25 @@ def rolling_sum(a, n=2): try: matrix_pos[pn, dict_index_pos[atom]] = val except KeyError: - logger.debug(f"Could not add the following atom: {atom}") + logger.warning(f"Could not add the following atom: {atom}") + except IndexError: + logger.warning(f"Could not add the following atom: {pn} {atom} {val}") for i, peptide_position in enumerate(peptidoform.parsed_sequence): try: matrix_hc[i, dict_aa[peptide_position[0]]] = 1. except KeyError: - pass + logger.warning( + "Skipping the following (not in library): ", peptide_position[1]) except IndexError: # Likely to be a sequence > 60 AA - pass + logger.warning(f"Could not add the following atom: {i} {peptide_position}") if peptide_position[1] is not None: try: modification_composition = peptide_position[1][0].composition except KeyError: - logger.debug( + logger.warning( "Skipping the following (not in library): ", peptide_position[1]) continue @@ -507,7 +512,7 @@ def rolling_sum(a, n=2): matrix_pos[i - seq_len, dict_index_pos[atom_position_composition]] += atom_change except KeyError: try: - logger.debug(f"Could not add the following atom: {atom_position_composition}, attempting to replace the [] part") + logger.warning(f"Could not add the following atom: {atom_position_composition}, attempting to replace the [] part") atom_position_composition = sub("\[.*?\]", "", atom_position_composition) matrix[i, dict_index[atom_position_composition]] += atom_change if i in positions: @@ -515,8 +520,11 @@ def rolling_sum(a, n=2): elif i - seq_len in positions: matrix_pos[i - seq_len, dict_index_pos[atom_position_composition]] += atom_change except KeyError: - logger.debug(f"Could not add the following atom: {atom_position_composition}, second attempt, now ignored") + logger.warning(f"Could not add the following atom: {atom_position_composition}, second attempt, now ignored") continue + except IndexError: + logger.warning(f"Could not add the following atom: {i} {atom_position_composition} {atom_change}") + matrix_all = np.sum(matrix, axis=0) matrix_all = np.append(matrix_all, seq_len) diff --git a/setup.py b/setup.py index cf72eaf..48859d5 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name='deeplc', - version='2.2.16', + version='2.2.17', license='apache-2.0', description='DeepLC: Retention time prediction for (modified) peptides using Deep Learning.', long_description=LONG_DESCRIPTION,