Skip to content

Commit

Permalink
prepare for CCS prediction
Browse files Browse the repository at this point in the history
  • Loading branch information
RobbinBouwmeester committed Nov 9, 2023
1 parent df28ea6 commit daa1e46
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 3 deletions.
13 changes: 12 additions & 1 deletion deeplc/deeplc.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ class DeepLC:

library = {}

# TODO have a CCS flag here
def __init__(
self,
main_path=os.path.dirname(os.path.realpath(__file__)),
Expand Down Expand Up @@ -314,13 +315,14 @@ def do_f_extraction(self, seqs, mods, identifiers, charges=[]):
feature matrix
"""
list_of_psms = []
# TODO include charge here
if len(charges) > 0:
for seq,mod,ident in zip(seqs,mods,identifiers):
list_of_psms.append(PSM(peptide=peprec_to_proforma(seq,mod),spectrum_id=ident))
else:
for seq,mod,ident,z in zip(seqs,mods,identifiers,charges):
list_of_psms.append(PSM(peptide=peprec_to_proforma(seq,mod),spectrum_id=ident))

# TODO include charge here
psm_list = PSMList(psm_list=list_of_psms)

return self.f_extractor.full_feat_extract(psm_list)
Expand Down Expand Up @@ -352,6 +354,7 @@ def do_f_extraction_pd(self,
else:
for seq,mod,ident,z in zip(df_instances["seq"],df_instances["modifications"],df_instances.index,charges=df_instances["charges"]):
list_of_psms.append(PSM(peptide=peprec_to_proforma(seq,mod),spectrum_id=ident))
# TODO include charge here
psm_list = PSMList(psm_list=list_of_psms)

return self.f_extractor.full_feat_extract(psm_list)
Expand Down Expand Up @@ -627,12 +630,14 @@ def make_preds(self,
"""
if type(seq_df) == pd.core.frame.DataFrame:
list_of_psms = []
# TODO include charge here
for seq,mod,ident in zip(seq_df["seq"],seq_df["modifications"],seq_df.index):
list_of_psms.append(PSM(peptidoform=peprec_to_proforma(seq,mod),spectrum_id=ident))
psm_list = PSMList(psm_list=list_of_psms)

if len(infile) > 0:
psm_list = read_file(infile)
# TODO is charge included here?
if "msms" in infile and ".txt" in infile:
mapper = pd.read_csv(os.path.join(os.path.dirname(os.path.realpath(__file__)), "unimod/map_mq_file.csv"),index_col=0)["value"].to_dict()
psm_list.rename_modifications(mapper)
Expand Down Expand Up @@ -703,6 +708,7 @@ def calibrate_preds_func_pygam(self,
# TODO make sure either psm_list or seq_df is supplied
if type(seq_df) == pd.core.frame.DataFrame:
list_of_psms = []
# TODO include charge here
for seq,mod,ident,tr in zip(seq_df["seq"],seq_df["modifications"],seq_df.index,seq_df["tr"]):
list_of_psms.append(PSM(peptidoform=peprec_to_proforma(seq,mod),spectrum_id=ident,retention_time=tr))
psm_list = PSMList(psm_list=list_of_psms)
Expand Down Expand Up @@ -780,6 +786,7 @@ def calibrate_preds_func(self,
"""
if type(seq_df) == pd.core.frame.DataFrame:
list_of_psms = []
# TODO include charge here
for seq,mod,tr,ident in zip(seq_df["seq"],seq_df["modifications"],seq_df["tr"],seq_df.index):
list_of_psms.append(PSM(peptidoform=peprec_to_proforma(seq,mod),spectrum_id=ident,retention_time=tr))
psm_list = PSMList(psm_list=list_of_psms)
Expand Down Expand Up @@ -926,16 +933,19 @@ def calibrate_preds(self,
"""
if type(seq_df) == pd.core.frame.DataFrame:
list_of_psms = []
# TODO include charge here
for seq,mod,ident,tr in zip(seq_df["seq"],seq_df["modifications"],seq_df.index,seq_df["tr"]):
list_of_psms.append(PSM(peptidoform=peprec_to_proforma(seq,mod),spectrum_id=ident,retention_time=tr))
psm_list = PSMList(psm_list=list_of_psms)
elif psm_utils_obj:
# TODO include charge here
psm_list = psm_utils_obj

if isinstance(self.model, str):
self.model = [self.model]

if len(infile) > 0:
# TODO include charge here
psm_list = read_file(infile)
if "msms" in infile and ".txt" in infile:
mapper = pd.read_csv(os.path.join(os.path.dirname(os.path.realpath(__file__)), "unimod/map_mq_file.csv"),index_col=0)["value"].to_dict()
Expand Down Expand Up @@ -995,6 +1005,7 @@ def calibrate_preds(self,
self.model = models

if isinstance(sample_for_calibration_curve, int):
# TODO include charge here
psm_list = random.sample(list(psm_list), sample_for_calibration_curve)
measured_tr = [psm.retention_time for psm in psm_list]

Expand Down
7 changes: 5 additions & 2 deletions deeplc/feat_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,7 @@ def encode_atoms(self,
object :: pd.DataFrame
feature matrix (np.matrix) of summed composition
"""
# TODO param flag for CCS prediction
def rolling_sum(a, n=2):
ret = np.cumsum(a, axis=1, dtype=np.float32)
ret[:, n:] = ret[:, n:] - ret[:, :-n]
Expand All @@ -429,7 +430,7 @@ def rolling_sum(a, n=2):
ret_list["pos_matrix"] = {}
ret_list["matrix_hc"] = {}

# Reintroduce for CCS
# TODO Reintroduce for CCS, check CCS flag
#if len(charges) == 0:
# charges = [-1] * len(indexes)

Expand Down Expand Up @@ -541,7 +542,7 @@ def rolling_sum(a, n=2):
matrix_all = np.sum(matrix, axis=0)
matrix_all = np.append(matrix_all, seq_len)

# Reintroduce for CCS
# TODO Reintroduce for CCS, check CCS flag
#if charge != -1:
# matrix_all = np.append(matrix_all,(seq.count("H"))/float(seq_len))
# matrix_all = np.append(matrix_all,(seq.count("F")+seq.count("W")+seq.count("Y"))/float(seq_len))
Expand Down Expand Up @@ -584,6 +585,7 @@ def full_feat_extract(self,
pd.DataFrame
feature matrix
"""
# TODO Reintroduce for CCS, check CCS flag
if len(seqs) > 0:
list_of_psms = []
for seq,mod,id in zip(seqs,mods,identifiers):
Expand All @@ -593,6 +595,7 @@ def full_feat_extract(self,
if self.verbose:
t0 = time.time()

# TODO pass CCS flag
if self.add_sum_feat:
if self.verbose:
logger.debug(
Expand Down

0 comments on commit daa1e46

Please sign in to comment.