Skip to content

Commit

Permalink
added get_all_token method
Browse files Browse the repository at this point in the history
  • Loading branch information
picciama committed Aug 9, 2024
1 parent 04bec44 commit 3babf78
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 1 deletion.
11 changes: 10 additions & 1 deletion spectrum_fundamentals/mod_string.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import difflib
import re
from itertools import combinations, repeat
from typing import Dict, List, Optional, Tuple, Union
from typing import Dict, List, Optional, Set, Tuple, Union

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -342,6 +342,15 @@ def split_modstring(sequence: str, r_pattern):
return map(split_modstring, sequences, repeat(regex_pattern))


def get_all_tokens(sequences: List[str]) -> Set[str]:
"""Parse given sequences in UNIMOD ProForma standard into a set of all tokens."""
pattern = r"[ACDEFGHIKLMNPQRSTVWY](\[UNIMOD:\d+\])?"
tokens = set()
for seq in sequences:
tokens |= {match.group() for match in re.finditer(pattern, seq)}
return tokens


def add_permutations(modified_sequence: str, unimod_id: int, residues: List[str]):
"""
Generate different peptide sequences with moving the modification to all possible residues.
Expand Down
7 changes: 7 additions & 0 deletions tests/unit_tests/test_mod_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,13 @@ def test_parse_modstrings_invalid_with_filtering(self):
invalid_seq = "testing"
self.assertEqual(next(mod.parse_modstrings([invalid_seq], alphabet=c.ALPHABET, filter=True)), [0])

def test_get_all_tokens(self):
"""Test parsing of any UNIMOD sequence into tokens."""
seqs = ["ACKC[UNIMOD:4]AD", "PEPTIDE", "PEM[UNIMOD:35]"]

result = mod.get_all_tokens(seqs)
self.assertEqual(result, {"A", "C", "C[UNIMOD:4]", "D", "E", "I", "K", "M[UNIMOD:35]", "P", "T"})


class TestCustomToInternal(unittest.TestCase):
"""Class to test custom to internal."""
Expand Down

0 comments on commit 3babf78

Please sign in to comment.