Skip to content

Commit

Permalink
Fixed doc string for sage to internal.
Browse files Browse the repository at this point in the history
  • Loading branch information
“Karim committed Nov 10, 2023
1 parent 8c04d71 commit 0382948
Showing 1 changed file with 38 additions and 29 deletions.
67 changes: 38 additions & 29 deletions spectrum_fundamentals/mod_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,40 +3,50 @@
from itertools import repeat
from typing import Dict, List, Optional, Tuple

from .constants import MAXQUANT_VAR_MODS, MOD_MASSES, MOD_NAMES, SPECTRONAUT_MODS , MOD_MASSES_SAGE
from .constants import MAXQUANT_VAR_MODS, MOD_MASSES, MOD_MASSES_SAGE, MOD_NAMES, SPECTRONAUT_MODS

def sage_to_internal(strings: List[str]) -> List[str]:

def sage_to_internal(sequences: List[str]) -> List[str]:
"""
Convert mod string from sage to the internal format.
This function converts sequences using the mass change of a modification in
square brackets as done by Sage to the internal format by replacing the mass
shift with the corresponding UNIMOD identifier of known and supported
modifications defined in the constants.
:param sequences: A list of sequences with values inside square brackets.
:return: A list of modified sequences with values converted to internal format.
"""
# Define a regular expression pattern to match values within square brackets, like [+1.0] or [-2.0].
pattern = r"\[([\+\-]\d+\.\d+)\]"

# Define a function 'replace' that takes a regex match object.
def replace(match):
# Extract the value inside the square brackets as a float.
value = float(match.group(1))

# Check if the 'MOD_MASSES_SAGE' dictionary has a replacement value for the extracted value.
# If it does, use the replacement value; otherwise, use the original value from the match.
unimod_expression = MOD_MASSES_SAGE.get(value, match.group(0))

return unimod_expression

# Create an empty list 'modified_strings' to store the modified sequences.
modified_strings = []

for string in strings:
# Find the number within square brackets (as a float)
start_idx = string.find('[') + 1
end_idx = string.find(']')

if start_idx > 0 and end_idx > start_idx:
# Extract the number string
number_str = string[start_idx:end_idx]

try:
# Attempt to convert the number to a float
number = float(number_str)
except ValueError:
# If conversion fails, keep the original text
modified_strings.append(string)
continue

# Replace with the corresponding value from the dictionary
if number in MOD_MASSES_SAGE:
modified_value = string.replace(f'[{number_str}]', MOD_MASSES_SAGE[number])
modified_strings.append(modified_value)
else:
modified_strings.append(string)
else:
modified_strings.append(string)
# Iterate through the input 'sequences'.
for string in sequences:
# Use 're.sub' to search and replace values within square brackets in the 'string' using the 'replace' function.
modified_string = re.sub(pattern, replace, string)

# Append the modified string to the 'modified_strings' list.
modified_strings.append(modified_string)

# Return the list of modified sequences.
return modified_strings


def internal_to_spectronaut(sequences: List[str]) -> List[str]:
"""
Function to translate a modstring from the internal format to the spectronaut format.
Expand Down Expand Up @@ -270,4 +280,3 @@ def get_mods_list(mods_variable: str, mods_fixed: str):
return mods_variable.split(";")
else:
return mods_variable.split(";") + mods_fixed.split(";")

0 comments on commit 0382948

Please sign in to comment.