Skip to content

Commit

Permalink
Fix modifications on peptides that are too long
Browse files Browse the repository at this point in the history
  • Loading branch information
RobbinBouwmeester committed Sep 18, 2023
1 parent 0370e29 commit afc89fb
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 11 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to
[Semantic Versioning](https://semver.org/spec/v2.0.0.html).

# [2.2.17] - 2023-09-18

- Fix peptides that are too long and their modifcations

# [2.2.16] - 2023-09-18

- bump version
Expand Down
28 changes: 18 additions & 10 deletions deeplc/feat_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ def rolling_sum(a, n=2):
if seq_len > padding_length:
seq = seq[0:padding_length]
seq_len = len(seq)
logger.debug(
logger.warning(
"Truncating peptide (too long): %s" % (seq))

peptide_composition = [mass.std_aa_comp[aa] for aa in seq]
Expand All @@ -462,39 +462,44 @@ def rolling_sum(a, n=2):
for k, v in position_composition.items():
try:
matrix[i, dict_index[k]] = v
except KeyError:
logger.debug(f"Could not add the following value: {v}")
except warning:
logger.warning(f"Could not add the following value: {v}")

for p in positions_pos:
aa = seq[p]
for atom, val in mass.std_aa_comp[aa].items():
try:
matrix_pos[p, dict_index_pos[atom]] = val
except KeyError:
logger.debug(f"Could not add the following atom: {atom}")
logger.warning(f"Could not add the following atom: {atom}")
except IndexError:
logger.warning(f"Could not add the following atom: {p} {atom} {val}")

for pn in positions_neg:
aa = seq[seq_len + pn]
for atom, val in mass.std_aa_comp[aa].items():
try:
matrix_pos[pn, dict_index_pos[atom]] = val
except KeyError:
logger.debug(f"Could not add the following atom: {atom}")
logger.warning(f"Could not add the following atom: {atom}")
except IndexError:
logger.warning(f"Could not add the following atom: {pn} {atom} {val}")

for i, peptide_position in enumerate(peptidoform.parsed_sequence):
try:
matrix_hc[i, dict_aa[peptide_position[0]]] = 1.
except KeyError:
pass
logger.warning(
"Skipping the following (not in library): ", peptide_position[1])
except IndexError:
# Likely to be a sequence > 60 AA
pass
logger.warning(f"Could not add the following atom: {i} {peptide_position}")

if peptide_position[1] is not None:
try:
modification_composition = peptide_position[1][0].composition
except KeyError:
logger.debug(
logger.warning(
"Skipping the following (not in library): ", peptide_position[1])
continue

Expand All @@ -507,16 +512,19 @@ def rolling_sum(a, n=2):
matrix_pos[i - seq_len, dict_index_pos[atom_position_composition]] += atom_change
except KeyError:
try:
logger.debug(f"Could not add the following atom: {atom_position_composition}, attempting to replace the [] part")
logger.warning(f"Could not add the following atom: {atom_position_composition}, attempting to replace the [] part")
atom_position_composition = sub("\[.*?\]", "", atom_position_composition)
matrix[i, dict_index[atom_position_composition]] += atom_change
if i in positions:
matrix_pos[i, dict_index_pos[atom_position_composition]] += atom_change
elif i - seq_len in positions:
matrix_pos[i - seq_len, dict_index_pos[atom_position_composition]] += atom_change
except KeyError:
logger.debug(f"Could not add the following atom: {atom_position_composition}, second attempt, now ignored")
logger.warning(f"Could not add the following atom: {atom_position_composition}, second attempt, now ignored")
continue
except IndexError:
logger.warning(f"Could not add the following atom: {i} {atom_position_composition} {atom_change}")


matrix_all = np.sum(matrix, axis=0)
matrix_all = np.append(matrix_all, seq_len)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

setup(
name='deeplc',
version='2.2.16',
version='2.2.17',
license='apache-2.0',
description='DeepLC: Retention time prediction for (modified) peptides using Deep Learning.',
long_description=LONG_DESCRIPTION,
Expand Down

0 comments on commit afc89fb

Please sign in to comment.