Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/de duplication #84

Open
wants to merge 6 commits into
base: development
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions spectrum_fundamentals/metrics/percolator.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from scipy import interpolate
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

import spectrum_fundamentals.constants as cs

from . import fragments_ratio as fr
from . import similarity as sim
from .metric import Metric
Expand Down Expand Up @@ -40,6 +42,7 @@ class Percolator(Metric):
FRAGMENTATION: fragmentation method, e.g. HCD, CID
RETENTION_TIME: observed retention time
PREDICTED_RETENTION_TIME: predicted retention time by Prosit
PROTEINS
"""

metadata: pd.DataFrame
Expand Down Expand Up @@ -272,11 +275,16 @@ def add_percolator_metadata_columns(self):
self.metrics_val["Label"] = self.target_decoy_labels
self.metrics_val["ScanNr"] = self.metadata["SCAN_NUMBER"]
self.metrics_val["filename"] = self.metadata["RAW_FILE"]
# self.metrics_val.insert(self.metrics_val.columns.get_loc("filename") + 1, "ExpMass", self.metrics_val.pop("ExpMass"))
self.metrics_val["Peptide"] = self.metadata["MODIFIED_SEQUENCE"].apply(lambda x: "_." + x + "._")
# added a variable for proton mass
proton_mass = cs.PARTICLE_MASSES["PROTON"]
# added theorictical/expected (mass/charge) column including the charge

self.metrics_val["Proteins"] = self.metadata[
"MODIFIED_SEQUENCE"
] # we don't need the protein ID to get PSM / peptide results, fill with peptide sequence
self.metrics_val["ExpMass"] = (
self.metadata["CALCULATED_MASS"] + proton_mass * self.metadata["PRECURSOR_CHARGE"]
) / self.metadata["PRECURSOR_CHARGE"]
self.metrics_val["Proteins"] = self.metadata["PROTEINS"]

def apply_lda_and_get_indices_below_fdr(
self, initial_scoring_feature: str = "spectral_angle", fdr_cutoff: float = 0.01
Expand Down Expand Up @@ -371,7 +379,7 @@ def fdrs_to_qvals(fdrs: np.ndarray) -> np.ndarray:

def _reorder_columns_for_percolator(self):
all_columns = self.metrics_val.columns
first_columns = ["SpecId", "Label", "ScanNr", "filename"]
first_columns = ["SpecId", "Label", "ScanNr", "filename", "ExpMass"]
last_columns = ["Peptide", "Proteins"]
mid_columns = list(set(all_columns) - set(first_columns) - set(last_columns))
new_columns = first_columns + sorted(mid_columns) + last_columns
Expand Down
Loading