Skip to content

Commit

Permalink
Merge pull request #135 from wilhelm-lab/patch/0.7.3
Browse files Browse the repository at this point in the history
Patch/0.7.3
  • Loading branch information
picciama authored Aug 13, 2024
2 parents 6f0465f + 9a8c0ae commit 040d3c5
Show file tree
Hide file tree
Showing 18 changed files with 2,500 additions and 227 deletions.
2 changes: 1 addition & 1 deletion .cookietemple.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@ full_name: Victor Giurcoiu
email: victor.giurcoiu@tum.de
project_name: spectrum_fundamentals
project_short_description: Fundamentals public repo
version: 0.7.2
version: 0.7.3
license: MIT
4 changes: 2 additions & 2 deletions .github/release-drafter.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name-template: "0.7.2 🌈" # <<COOKIETEMPLE_FORCE_BUMP>>
tag-template: 0.7.2 # <<COOKIETEMPLE_FORCE_BUMP>>
name-template: "0.7.3 🌈" # <<COOKIETEMPLE_FORCE_BUMP>>
tag-template: 0.7.3 # <<COOKIETEMPLE_FORCE_BUMP>>
exclude-labels:
- "skip-changelog"

Expand Down
2 changes: 1 addition & 1 deletion cookietemple.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.7.2
current_version = 0.7.3

[bumpversion_files_whitelisted]
init_file = spectrum_fundamentals/__init__.py
Expand Down
4 changes: 2 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@
# the built documents.
#
# The short X.Y version.
version = "0.7.2"
version = "0.7.3"
# The full version, including alpha/beta/rc tags.
release = "0.7.2"
release = "0.7.3"

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "spectrum_fundamentals"
version = "0.7.2" # <<COOKIETEMPLE_FORCE_BUMP>>
version = "0.7.3" # <<COOKIETEMPLE_FORCE_BUMP>>
description = "Fundamental functions, annotation pipeline and constants for oktoberfest"
authors = ["Wilhelmlab at Technical University of Munich"]
license = "MIT"
Expand Down
2 changes: 1 addition & 1 deletion spectrum_fundamentals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

__author__ = "Mario Picciani"
__email__ = "mario.picciani@tum.de"
__version__ = "0.7.2"
__version__ = "0.7.3"

import logging
import logging.handlers
Expand Down
2 changes: 1 addition & 1 deletion spectrum_fundamentals/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


@click.command()
@click.version_option(version="0.7.2", message=click.style("spectrum_fundamentals Version: 0.7.2"))
@click.version_option(version="0.7.3", message=click.style("spectrum_fundamentals Version: 0.7.3"))
def main() -> None:
"""spectrum_fundamentals."""

Expand Down
2 changes: 1 addition & 1 deletion spectrum_fundamentals/annotation/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ def generate_annotation_matrix(
exp_mass_col = matched_peaks.columns.get_loc("exp_mass")

for peak in matched_peaks.values:
ion_type_index = ion_types.index(peak[ion_type][0])
ion_type_index = ion_types.index(peak[ion_type].split("-", 1)[0])
peak_pos = ((peak[no_col] - 1) * charge_const * len(ion_types)) + (peak[charge_col] - 1) + 3 * ion_type_index

if peak_pos >= constants.VEC_LENGTH:
Expand Down
85 changes: 70 additions & 15 deletions spectrum_fundamentals/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
VEC_LENGTH_CMS2 = (SEQ_LEN - 1) * 2 * 3 * 2
# peptide of length 30 can have 29 b, y, b_short, y_short, b_long and y_long ions, each with charge 1+, 2+ and 3+
# we do not annotate fragments wth charge 3+. All fragmets with charge 3+ convert to -1


#############
# ALPHABETS #
#############
Expand Down Expand Up @@ -380,36 +382,89 @@
"[UNIMOD:35]": "[Oxidation (O)]",
}

FRAGMENTATION_ENCODING = {"HCD": 2, "CID": 1}
FRAGMENTATION_ENCODING = {
"CID": 1,
"HCD": 2,
"ETD": 3,
"ETHCD": 4,
"ETCID": 5,
"UVPD": 6,
"EID": 7,
"ECD": 8,
"AIECD": 9,
}

########################
# RESCORING PARAMETERS #
########################


class RescoreType(Enum):
"""Class for rescoring types."""

PROSIT = "prosit"
ANDROMEDA = "andromeda"


#############
# ION TYPES #
#############
FORWARD_IONS = ["a", "b", "c"]
BACKWARDS_IONS = ["x", "y", "z", "z_r"] #
IONS = FORWARD_IONS + BACKWARDS_IONS

FRAGMENTATION_TO_IONS_BY_PAIRS = {
"HCD": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b
"CID": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b
"ETD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z_r,c
"ECD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z_r,c
"ETHCD": [BACKWARDS_IONS[1], FORWARD_IONS[1], BACKWARDS_IONS[-1], FORWARD_IONS[2]], # y,b,z_r,c
"ETCID": [BACKWARDS_IONS[1], FORWARD_IONS[1], BACKWARDS_IONS[-1], FORWARD_IONS[2]], # y,b,z_r,c
"UVPD": [
BACKWARDS_IONS[0],
FORWARD_IONS[0],
BACKWARDS_IONS[1],
FORWARD_IONS[1],
BACKWARDS_IONS[2],
FORWARD_IONS[2],
], # y,b,z,c,x,a
}

FRAGMENTATION_TO_IONS_BY_DIRECTION = {
"HCD": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b
"CID": [BACKWARDS_IONS[1], FORWARD_IONS[1]], # y,b
"ETD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z_r,c
"ECD": [BACKWARDS_IONS[-1], FORWARD_IONS[2]], # z_r,c
"ETHCD": [BACKWARDS_IONS[1], BACKWARDS_IONS[-1]] + FORWARD_IONS[1:], # y,z_r,b,c
"ETCID": [BACKWARDS_IONS[1], BACKWARDS_IONS[-1]] + FORWARD_IONS[1:], # y,z_r,b,c
"UVPD": BACKWARDS_IONS[:-1] + FORWARD_IONS, # y,z,x,b,c,a
}

ION_DELTAS = {
"a": -ATOM_MASSES["O"] - ATOM_MASSES["C"],
"b": 0.0,
"c": 3 * ATOM_MASSES["H"] + ATOM_MASSES["N"],
"x": 2 * ATOM_MASSES["O"] + ATOM_MASSES["C"],
"y": ATOM_MASSES["O"] + 2 * ATOM_MASSES["H"],
"z": ATOM_MASSES["O"] - ATOM_MASSES["N"] - ATOM_MASSES["H"],
"z_r": ATOM_MASSES["O"] - ATOM_MASSES["N"],
}

############################
# GENERATION OF ANNOTATION #
############################

IONS = ["y", "b"] # limited to single character unicode string when array is created
CHARGES = [1, 2, 3] # limited to uint8 (0-255) when array is created
POSITIONS = [x for x in range(1, 30)] # fragment numbers 1-29 -- limited to uint8 (0-255) when array is created

ANNOTATION_FRAGMENT_TYPE = []
ANNOTATION_FRAGMENT_CHARGE = []
ANNOTATION_FRAGMENT_NUMBER = []
for pos in POSITIONS:
for ion in IONS:
for ion in FRAGMENTATION_TO_IONS_BY_DIRECTION["HCD"]:
for charge in CHARGES:
ANNOTATION_FRAGMENT_TYPE.append(ion)
ANNOTATION_FRAGMENT_CHARGE.append(charge)
ANNOTATION_FRAGMENT_NUMBER.append(pos)

ANNOTATION = [ANNOTATION_FRAGMENT_TYPE, ANNOTATION_FRAGMENT_CHARGE, ANNOTATION_FRAGMENT_NUMBER]


########################
# RESCORING PARAMETERS #
########################


class RescoreType(Enum):
"""Class for rescoring types."""

PROSIT = "prosit"
ANDROMEDA = "andromeda"
119 changes: 74 additions & 45 deletions spectrum_fundamentals/fragments.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import itertools
import logging
import re
from operator import itemgetter
from typing import Dict, List, Optional, Tuple
from typing import Dict, List, Literal, Optional, Tuple, Union

import numpy as np
import pandas as pd

from .constants import AA_MASSES, ATOM_MASSES, MOD_MASSES, PARTICLE_MASSES
import spectrum_fundamentals.constants as c

from .mod_string import internal_without_mods

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -38,7 +40,7 @@ def _get_modifications(peptide_sequence: str, custom_mods: Optional[Dict[str, fl
pattern = re.compile(r"\[.{8}[^\]]*\]")
matches = pattern.finditer(peptide_sequence)

mod_masses = MOD_MASSES | (custom_mods or {})
mod_masses = c.MOD_MASSES | (custom_mods or {})

for match in matches:
start_pos, end_pos = match.span()
Expand All @@ -56,14 +58,14 @@ def compute_peptide_mass(sequence: str, custom_mods: Optional[Dict[str, float]]
:param custom_mods: Custom Modifications with the identifier, the unimod equivalent and the respective mass
:return: Theoretical mass of the sequence
"""
terminal_masses = 2 * ATOM_MASSES["H"] + ATOM_MASSES["O"] # add terminal masses HO- and H-
terminal_masses = 2 * c.ATOM_MASSES["H"] + c.ATOM_MASSES["O"] # add terminal masses HO- and H-

modification_deltas = _get_modifications(sequence, custom_mods=custom_mods)
if modification_deltas: # there were modifictions
sequence = internal_without_mods([sequence])[0]
terminal_masses += modification_deltas.get(-2, 0.0) # prime with n_term_mod delta if present

peptide_sum = sum([AA_MASSES[c] + modification_deltas.get(i, 0.0) for i, c in enumerate(sequence)])
peptide_sum = sum([c.AA_MASSES[aa] + modification_deltas.get(i, 0.0) for i, aa in enumerate(sequence)])

return terminal_masses + peptide_sum

Expand All @@ -90,21 +92,15 @@ def retrieve_ion_types(fragmentation_method: str) -> List[str]:
Given the fragmentation method the function returns all ion types that can result from it.
: param fragmentation_method: fragmentation method used during the MS
: raises ValueError: if fragmentation_method is other than one of HCD, CID, ETD, ECD, ETCID, ETHCD, UVPD
: return: list of possible ion types
:param fragmentation_method: fragmentation method used during the MS
:raises ValueError: if fragmentation_method is not supported
:return: list of possible ion types
"""
fragmentation_method = fragmentation_method.upper()
if fragmentation_method == "HCD" or fragmentation_method == "CID":
return ["y", "b"]
elif fragmentation_method == "ETD" or fragmentation_method == "ECD":
return ["z", "c"]
elif fragmentation_method == "ETCID" or fragmentation_method == "ETHCD":
return ["y", "b", "z", "c"]
elif fragmentation_method == "UVPD":
return ["y", "b", "z", "c", "x", "a"]
else:
ions = c.FRAGMENTATION_TO_IONS_BY_PAIRS.get(fragmentation_method, [])
if not ions:
raise ValueError(f"Unknown fragmentation method provided: {fragmentation_method}")
return ions


def retrieve_ion_types_for_peak_initialization(fragmentation_method: str) -> List[str]:
Expand All @@ -113,21 +109,15 @@ def retrieve_ion_types_for_peak_initialization(fragmentation_method: str) -> Lis
Given the fragmentation method the function returns all ion types that can result from it.
: param fragmentation_method: fragmentation method used during the MS
: raises ValueError: if fragmentation_method is other than one of HCD, CID, ETD, ECD, ETCID, ETHCD, UVPD
: return: list of possible ion types
:param fragmentation_method: fragmentation method used during the MS
:raises ValueError: if fragmentation_method is not supported
:return: list of possible ion types
"""
fragmentation_method = fragmentation_method.upper()
if fragmentation_method == "HCD" or fragmentation_method == "CID":
return ["y", "b"]
elif fragmentation_method == "ETD" or fragmentation_method == "ECD":
return ["z", "c"]
elif fragmentation_method == "ETCID" or fragmentation_method == "ETHCD":
return ["y", "z", "b", "c"]
elif fragmentation_method == "UVPD":
return ["x", "y", "z", "a", "b", "c"]
else:
ions = c.FRAGMENTATION_TO_IONS_BY_DIRECTION.get(fragmentation_method, [])
if not ions:
raise ValueError(f"Unknown fragmentation method provided: {fragmentation_method}")
return ions


def get_ion_delta(ion_types: List[str]) -> np.ndarray:
Expand All @@ -137,18 +127,7 @@ def get_ion_delta(ion_types: List[str]) -> np.ndarray:
:param ion_types: type of ions for which mass should be calculated
:return: numpy array with masses of the ions
"""
ion_type_offsets = {
"a": -ATOM_MASSES["O"] - ATOM_MASSES["C"],
"b": 0.0,
"c": 3 * ATOM_MASSES["H"] + ATOM_MASSES["N"],
"x": 2 * ATOM_MASSES["O"] + ATOM_MASSES["C"],
"y": ATOM_MASSES["O"] + 2 * ATOM_MASSES["H"],
"z": ATOM_MASSES["O"] - ATOM_MASSES["N"] - ATOM_MASSES["H"],
}
# I think list comprehension is fastest way
deltas = np.array([ion_type_offsets[ion_type] for ion_type in ion_types]).reshape(len(ion_types), 1)

return deltas
return np.array([c.ION_DELTAS[ion_type] for ion_type in ion_types]).reshape(len(ion_types), 1)


def initialize_peaks(
Expand Down Expand Up @@ -196,13 +175,13 @@ def initialize_peaks(

if modification_deltas: # there were modifictions
sequence = internal_without_mods([sequence])[0]
n_term_delta = modification_deltas.get(-2, 0.0)
n_term_delta = modification_deltas.pop(-2, 0.0) # directly pop it to avoid readding it later
if n_term_delta != 0:
n_term_mod = 2
# add n_term mass to first aa for easy processing in the following calculation
modification_deltas[0] = modification_deltas.get(0, 0.0) + n_term_delta

mass_arr = np.array([AA_MASSES[_] for _ in sequence])
mass_arr = np.array([c.AA_MASSES[_] for _ in sequence])
for pos, mod_mass in modification_deltas.items():
mass_arr[pos] += mod_mass

Expand All @@ -221,7 +200,7 @@ def initialize_peaks(
# calculate for m/z for charges 1, 2, 3
# shape of ion_mzs: (n_ions, n_fragments, max_charge)
charges = np.arange(1, max_charge + 1)
ion_mzs = (sum_array[..., np.newaxis] + charges * PARTICLE_MASSES["PROTON"]) / charges
ion_mzs = (sum_array[..., np.newaxis] + charges * c.PARTICLE_MASSES["PROTON"]) / charges

min_mzs, max_mzs = get_min_max_mass(mass_analyzer, ion_mzs, mass_tolerance, unit_mass_tolerance)

Expand All @@ -246,7 +225,7 @@ def initialize_peaks(
fragments_meta_data,
n_term_mod,
sequence,
(peptide_mass + ATOM_MASSES["O"] + 2 * ATOM_MASSES["H"]),
(peptide_mass + c.ATOM_MASSES["O"] + 2 * c.ATOM_MASSES["H"]),
)


Expand Down Expand Up @@ -422,3 +401,53 @@ def get_min_max_mass(
else:
raise ValueError(f"Unsupported mass_analyzer: {mass_analyzer}")
return (min_mass, max_mass)


FragmentIonComponent = Literal["ion_type", "position", "charge"]


def generate_fragment_ion_annotations(
ion_types: List[str], order: Tuple[FragmentIonComponent, FragmentIonComponent, FragmentIonComponent]
) -> List[Tuple[str, int, int]]:
"""Generate full list of fragment ions for permitted ion types and specified order.
:param ion_types: List of permitted ion types
:param order: What fragment ion parameters (ion type, position & charge) to group the annotations by
:return: List of (ion_type, position, charge) tuples sorted by specified component order
:raises ValueError: if invalid or unsupported ion types are specified or duplicate order keys are used
"""
fragment_ion_components: Dict[str, Union[List[str]]] = {
"ion_type": ion_types,
"position": [str(pos) for pos in c.POSITIONS],
"charge": [str(charge) for charge in c.CHARGES],
}

if len(set(ion_types)) != len(ion_types):
raise ValueError("Redundant ion types specified")
elif len(ion_types) == 0:
raise ValueError("No ion types specified")
if set(order) != {"ion_type", "position", "charge"}:
raise ValueError("Duplicate component used for ordering fragment ions")

raw_annotations = list(itertools.product(*[fragment_ion_components[component] for component in order]))

ordered_raw_annotations = [
(
str(combination[order.index("ion_type")]),
int(combination[order.index("position")]),
int(combination[order.index("charge")]),
)
for combination in raw_annotations
]

return ordered_raw_annotations


def format_fragment_ion_annotation(raw_annotation: Tuple[str, int, int]) -> str:
"""Transform (ion_type, position, charge) tuple into <ion_type><position>+<charge> string.
:param raw_annotation: `(ion_type, position, charge)` tuple
:returns: formatted annotation string
"""
ion_type, pos, charge = raw_annotation
return f"{ion_type}{pos}+{charge}"
Loading

0 comments on commit 040d3c5

Please sign in to comment.