From 20086953bc2705f30e96aa422e51ae4d3504a676 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CKarim?= <“karimwael48@gmail.com”> Date: Mon, 22 Jan 2024 14:43:41 +0100 Subject: [PATCH 1/6] some issues fixed --- spectrum_fundamentals/metrics/percolator.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/spectrum_fundamentals/metrics/percolator.py b/spectrum_fundamentals/metrics/percolator.py index 3a71fba..44adda7 100644 --- a/spectrum_fundamentals/metrics/percolator.py +++ b/spectrum_fundamentals/metrics/percolator.py @@ -40,6 +40,7 @@ class Percolator(Metric): FRAGMENTATION: fragmentation method, e.g. HCD, CID RETENTION_TIME: observed retention time PREDICTED_RETENTION_TIME: predicted retention time by Prosit + PROTEINS """ metadata: pd.DataFrame @@ -272,11 +273,13 @@ def add_percolator_metadata_columns(self): self.metrics_val["Label"] = self.target_decoy_labels self.metrics_val["ScanNr"] = self.metadata["SCAN_NUMBER"] self.metrics_val["filename"] = self.metadata["RAW_FILE"] + # here you should ExpMass + self.metrics_val["ExpMass"] = self.metrics_val["Peptide"] = self.metadata["MODIFIED_SEQUENCE"].apply(lambda x: "_." + x + "._") self.metrics_val["Proteins"] = self.metadata[ - "MODIFIED_SEQUENCE" - ] # we don't need the protein ID to get PSM / peptide results, fill with peptide sequence + "PROTEINS" + ] # added proteins column for de-duplication purposes def apply_lda_and_get_indices_below_fdr( self, initial_scoring_feature: str = "spectral_angle", fdr_cutoff: float = 0.01 From bcb2adabf10bfff1ef788919562bf682c2cfe650 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CKarim?= <“karimwael48@gmail.com”> Date: Mon, 22 Jan 2024 14:44:56 +0100 Subject: [PATCH 2/6] added in percolator some lines for protein column --- spectrum_fundamentals/metrics/percolator.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/spectrum_fundamentals/metrics/percolator.py b/spectrum_fundamentals/metrics/percolator.py index 44adda7..19f1f55 100644 --- a/spectrum_fundamentals/metrics/percolator.py +++ b/spectrum_fundamentals/metrics/percolator.py @@ -273,13 +273,8 @@ def add_percolator_metadata_columns(self): self.metrics_val["Label"] = self.target_decoy_labels self.metrics_val["ScanNr"] = self.metadata["SCAN_NUMBER"] self.metrics_val["filename"] = self.metadata["RAW_FILE"] - # here you should ExpMass - self.metrics_val["ExpMass"] = self.metrics_val["Peptide"] = self.metadata["MODIFIED_SEQUENCE"].apply(lambda x: "_." + x + "._") - - self.metrics_val["Proteins"] = self.metadata[ - "PROTEINS" - ] # added proteins column for de-duplication purposes + self.metrics_val["Proteins"] = self.metadata["PROTEINS"] # added proteins column for de-duplication purposes def apply_lda_and_get_indices_below_fdr( self, initial_scoring_feature: str = "spectral_angle", fdr_cutoff: float = 0.01 From 98c8212a3a7cd9064f46e18e032b0306e637a271 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CKarim?= <“karimwael48@gmail.com”> Date: Mon, 22 Jan 2024 14:53:00 +0100 Subject: [PATCH 3/6] added some stuff --- spectrum_fundamentals/metrics/percolator.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/spectrum_fundamentals/metrics/percolator.py b/spectrum_fundamentals/metrics/percolator.py index 19f1f55..9669d37 100644 --- a/spectrum_fundamentals/metrics/percolator.py +++ b/spectrum_fundamentals/metrics/percolator.py @@ -10,6 +10,9 @@ from scipy import interpolate from sklearn.discriminant_analysis import LinearDiscriminantAnalysis +import spectrum_fundamentals +from spectrum_fundamentals import constants + from . import fragments_ratio as fr from . import similarity as sim from .metric import Metric @@ -273,6 +276,14 @@ def add_percolator_metadata_columns(self): self.metrics_val["Label"] = self.target_decoy_labels self.metrics_val["ScanNr"] = self.metadata["SCAN_NUMBER"] self.metrics_val["filename"] = self.metadata["RAW_FILE"] + # added a variable for proton mass + proton_mass = spectrum_fundamentals.constants.PARTICLE_MASSES["PROTON"] + # added theorictical/expected (mass/charge) column including the charge + + self.metrics_val["ExpMass"] = ( + self.metadata["CALCULATED_MASS"] + proton_mass * self.metadata["PRECURSOR_CHARGE"] + ) / self.metadata["PRECURSOR_CHARGE"] + self.metrics_val["Peptide"] = self.metadata["MODIFIED_SEQUENCE"].apply(lambda x: "_." + x + "._") self.metrics_val["Proteins"] = self.metadata["PROTEINS"] # added proteins column for de-duplication purposes From fc0ad31779df607fa3882c734855fa34e1dc9b14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CKarim?= <“karimwael48@gmail.com”> Date: Mon, 22 Jan 2024 15:31:39 +0100 Subject: [PATCH 4/6] added expmass in correct order and calculation --- spectrum_fundamentals/metrics/percolator.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/spectrum_fundamentals/metrics/percolator.py b/spectrum_fundamentals/metrics/percolator.py index 9669d37..b7c5b83 100644 --- a/spectrum_fundamentals/metrics/percolator.py +++ b/spectrum_fundamentals/metrics/percolator.py @@ -10,8 +10,7 @@ from scipy import interpolate from sklearn.discriminant_analysis import LinearDiscriminantAnalysis -import spectrum_fundamentals -from spectrum_fundamentals import constants +import spectrum_fundamentals.constants as cs from . import fragments_ratio as fr from . import similarity as sim @@ -277,13 +276,13 @@ def add_percolator_metadata_columns(self): self.metrics_val["ScanNr"] = self.metadata["SCAN_NUMBER"] self.metrics_val["filename"] = self.metadata["RAW_FILE"] # added a variable for proton mass - proton_mass = spectrum_fundamentals.constants.PARTICLE_MASSES["PROTON"] + proton_mass = cs.PARTICLE_MASSES["PROTON"] # added theorictical/expected (mass/charge) column including the charge self.metrics_val["ExpMass"] = ( self.metadata["CALCULATED_MASS"] + proton_mass * self.metadata["PRECURSOR_CHARGE"] ) / self.metadata["PRECURSOR_CHARGE"] - + # self.metrics_val.insert(self.metrics_val.columns.get_loc("filename") + 1, "ExpMass", self.metrics_val.pop("ExpMass")) self.metrics_val["Peptide"] = self.metadata["MODIFIED_SEQUENCE"].apply(lambda x: "_." + x + "._") self.metrics_val["Proteins"] = self.metadata["PROTEINS"] # added proteins column for de-duplication purposes @@ -380,7 +379,7 @@ def fdrs_to_qvals(fdrs: np.ndarray) -> np.ndarray: def _reorder_columns_for_percolator(self): all_columns = self.metrics_val.columns - first_columns = ["SpecId", "Label", "ScanNr", "filename"] + first_columns = ["SpecId", "Label", "ScanNr", "filename", "ExpMass"] last_columns = ["Peptide", "Proteins"] mid_columns = list(set(all_columns) - set(first_columns) - set(last_columns)) new_columns = first_columns + sorted(mid_columns) + last_columns From 4bea22ec1c6baf7dd0e8602d002eebc444fe6a8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CKarim?= <“karimwael48@gmail.com”> Date: Mon, 29 Jan 2024 10:04:41 +0100 Subject: [PATCH 5/6] added expmass after cid, added modified sequence in protein column --- spectrum_fundamentals/metrics/percolator.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/spectrum_fundamentals/metrics/percolator.py b/spectrum_fundamentals/metrics/percolator.py index b7c5b83..3e3ec96 100644 --- a/spectrum_fundamentals/metrics/percolator.py +++ b/spectrum_fundamentals/metrics/percolator.py @@ -275,6 +275,8 @@ def add_percolator_metadata_columns(self): self.metrics_val["Label"] = self.target_decoy_labels self.metrics_val["ScanNr"] = self.metadata["SCAN_NUMBER"] self.metrics_val["filename"] = self.metadata["RAW_FILE"] + # self.metrics_val.insert(self.metrics_val.columns.get_loc("filename") + 1, "ExpMass", self.metrics_val.pop("ExpMass")) + self.metrics_val["Peptide"] = self.metadata["MODIFIED_SEQUENCE"].apply(lambda x: "_." + x + "._") # added a variable for proton mass proton_mass = cs.PARTICLE_MASSES["PROTON"] # added theorictical/expected (mass/charge) column including the charge @@ -282,9 +284,7 @@ def add_percolator_metadata_columns(self): self.metrics_val["ExpMass"] = ( self.metadata["CALCULATED_MASS"] + proton_mass * self.metadata["PRECURSOR_CHARGE"] ) / self.metadata["PRECURSOR_CHARGE"] - # self.metrics_val.insert(self.metrics_val.columns.get_loc("filename") + 1, "ExpMass", self.metrics_val.pop("ExpMass")) - self.metrics_val["Peptide"] = self.metadata["MODIFIED_SEQUENCE"].apply(lambda x: "_." + x + "._") - self.metrics_val["Proteins"] = self.metadata["PROTEINS"] # added proteins column for de-duplication purposes + self.metrics_val["Proteins"] = self.metadata["MODIFIED_SEQUENCE"] def apply_lda_and_get_indices_below_fdr( self, initial_scoring_feature: str = "spectral_angle", fdr_cutoff: float = 0.01 @@ -379,7 +379,7 @@ def fdrs_to_qvals(fdrs: np.ndarray) -> np.ndarray: def _reorder_columns_for_percolator(self): all_columns = self.metrics_val.columns - first_columns = ["SpecId", "Label", "ScanNr", "filename", "ExpMass"] + first_columns = ["SpecId", "Label", "ScanNr", "filename", "CID", "ExpMass"] last_columns = ["Peptide", "Proteins"] mid_columns = list(set(all_columns) - set(first_columns) - set(last_columns)) new_columns = first_columns + sorted(mid_columns) + last_columns From 1e7fa93f4ad25a1fd16275a695057535bdcaafb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CKarim?= <“karimwael48@gmail.com”> Date: Tue, 30 Jan 2024 11:57:21 +0100 Subject: [PATCH 6/6] added correct protein column and correct sorting for expmass --- spectrum_fundamentals/metrics/percolator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spectrum_fundamentals/metrics/percolator.py b/spectrum_fundamentals/metrics/percolator.py index 3e3ec96..78773ec 100644 --- a/spectrum_fundamentals/metrics/percolator.py +++ b/spectrum_fundamentals/metrics/percolator.py @@ -284,7 +284,7 @@ def add_percolator_metadata_columns(self): self.metrics_val["ExpMass"] = ( self.metadata["CALCULATED_MASS"] + proton_mass * self.metadata["PRECURSOR_CHARGE"] ) / self.metadata["PRECURSOR_CHARGE"] - self.metrics_val["Proteins"] = self.metadata["MODIFIED_SEQUENCE"] + self.metrics_val["Proteins"] = self.metadata["PROTEINS"] def apply_lda_and_get_indices_below_fdr( self, initial_scoring_feature: str = "spectral_angle", fdr_cutoff: float = 0.01 @@ -379,7 +379,7 @@ def fdrs_to_qvals(fdrs: np.ndarray) -> np.ndarray: def _reorder_columns_for_percolator(self): all_columns = self.metrics_val.columns - first_columns = ["SpecId", "Label", "ScanNr", "filename", "CID", "ExpMass"] + first_columns = ["SpecId", "Label", "ScanNr", "filename", "ExpMass"] last_columns = ["Peptide", "Proteins"] mid_columns = list(set(all_columns) - set(first_columns) - set(last_columns)) new_columns = first_columns + sorted(mid_columns) + last_columns