Skip to content

Commit

Permalink
Accomodating for the internal rescoring pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
arminsl committed Jan 5, 2024
1 parent ae40d8b commit 8b68795
Showing 1 changed file with 31 additions and 1 deletion.
32 changes: 31 additions & 1 deletion spectrum_fundamentals/metrics/percolator.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ def __init__(
self.all_features_flag = all_features_flag
self.regression_method = regression_method
self.fdr_cutoff = fdr_cutoff
self.internal = True # TODO: Get this parameter from the caller

self._resolve_percolator_compatibility(percolator_version)
super().__init__(pred_intensities, true_intensities, mz)

@staticmethod
Expand Down Expand Up @@ -156,6 +159,29 @@ def get_aligned_predicted_retention_times(

return aligned_rts_predicted

@staticmethod
def get_scannr(metadata_subset: Union[pd.Series, Tuple[str, int]]) -> int:
"""
Creates a hash of the raw_file and scan number to use as a unique scan number in percolator.
:param metadata_subset: tuple of (raw_file, scan_number)
:return: hashed unique id
"""
raw_file, scan_number = metadata_subset
s = f"{raw_file}{scan_number}".encode()
return int(hashlib.sha224(s).hexdigest()[:6], 16)

@staticmethod
def get_scannr_internal(metadata_subset: Union[pd.Series, Tuple[str, int]]) -> int:
"""
Creates a hash of the raw_file and scan number to use as a unique scan number in percolator.
:param metadata_subset: tuple of (raw_file, scan_number)
:return: hashed unique id
"""
scan_event_number = metadata_subset
return int(scan_event_number)

@staticmethod
def get_delta_score(scores_df: pd.DataFrame, scoring_feature: str) -> np.ndarray:
"""
Expand Down Expand Up @@ -270,7 +296,11 @@ def add_percolator_metadata_columns(self):
spec_id_cols.append("SCAN_EVENT_NUMBER")
self.metrics_val["SpecId"] = self.metadata[spec_id_cols].apply(Percolator.get_specid, axis=1)
self.metrics_val["Label"] = self.target_decoy_labels
self.metrics_val["ScanNr"] = self.metadata["SCAN_NUMBER"]

if self.internal:
self.metrics_val["ScanNr"] = self.metadata[["SCAN_EVENT_NUMBER"]].apply(Percolator.get_scannr_internal, axis=1)
else:
self.metrics_val["ScanNr"] = self.metadata["SCAN_NUMBER"]
self.metrics_val["filename"] = self.metadata["RAW_FILE"]
self.metrics_val["Peptide"] = self.metadata["MODIFIED_SEQUENCE"].apply(lambda x: "_." + x + "._")

Expand Down

0 comments on commit 8b68795

Please sign in to comment.