-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from BAMeScience/dev
Dev
- Loading branch information
Showing
16 changed files
with
349 additions
and
34 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
**/.idea | ||
**/cmake-* | ||
**/build | ||
**/pBuild | ||
**/.vscode |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
|
||
import pandas as pd | ||
import argparse | ||
import random | ||
from matplotlib import pyplot as plt | ||
|
||
def parse_args(): | ||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument("-t", "--target", | ||
help="target search results file (.pin) format", | ||
type=str,required=True) | ||
parser.add_argument("-d", "--decoy", | ||
help="decoy search results file (.pin) format", | ||
type=str,required=True) | ||
parser.add_argument("-o", "--output", | ||
help="output file (.pin) format", | ||
type=str, required=True) | ||
parser.add_argument("--score", | ||
help="discriminant score for target decoy competition", | ||
type=str, default="avg_bias_adjusted_similarity") | ||
parser.add_argument("--update_delta_scores", | ||
help="Update delta scores when target/decoy are 1st and 2nd ranked hit", | ||
action='store_true') | ||
parser.add_argument("--main_features", | ||
help="Track main features only. Otherwise all features will be tracked, which can reduce separation performance.", | ||
action='store_true') | ||
parser.add_argument("--drop_redundant_features", | ||
help="Track main features only. Otherwise all features will be tracked, which can reduce separation performance.", | ||
action='store_true') | ||
|
||
|
||
|
||
args = parser.parse_args() | ||
return args | ||
|
||
|
||
def update_delta_scores(df1, idx1, df2, idx2): | ||
if df1.at[idx1, "delta_avg"] > df1.at[idx1, "avg_bias_adjusted_similarity"] - df2.at[idx2, "avg_bias_adjusted_similarity"]: | ||
df1.at[idx1, "delta_similarity"] = df1.at[idx1, "similarity"] - df2.at[idx2, "similarity"] | ||
df1.at[idx1, "delta_dot"] = df1.at[idx1, "dot_product"] - df2.at[idx2, "dot_product"] | ||
df1.at[idx1, "delta_annotation_similarity"] = df1.at[idx1, "annotation_similarity"] - df2.at[idx2, "annotation_similarity"] | ||
df1.at[idx1, "delta_sim2"] = df1.at[idx1, "sim2"] - df2.at[idx2, "sim2"] | ||
df1.at[idx1, "delta_avg"] = df1.at[idx1, "avg_bias_adjusted_similarity"] - df2.at[idx2, "avg_bias_adjusted_similarity"] | ||
return | ||
|
||
def merge_files(args): | ||
|
||
print("+++ Merging target and decoy results (.pin format) +++") | ||
df = pd.read_csv(args.target, sep='\t', comment='#', low_memory=False) | ||
df_decoy = pd.read_csv(args.decoy, sep='\t', comment='#', low_memory=False) | ||
|
||
target_nans = df.isnull().any(axis=1).sum() | ||
decoy_nans = df_decoy.isnull().any(axis=1).sum() | ||
if target_nans > 0 or decoy_nans > 0: | ||
print(f"Waring: NaN values detected. Dropping {target_nans} target and {decoy_nans} decoy matches.") | ||
df.dropna(inplace=True) | ||
df_decoy.dropna(inplace=True) | ||
|
||
if not all(df["Label"].unique() == 1): | ||
print("Warning: Not all target labels match expected value of 1.") | ||
|
||
if not all(df_decoy["Label"].unique() == -1): | ||
print("Warning: Not all decoy labels match expected value of -1.") | ||
|
||
print(f"Detected {df.shape[0]} target and {df_decoy.shape[0]} decoy matches.") | ||
scans = df["ScanNr"].unique() | ||
|
||
for num in scans: | ||
decoy_match = df_decoy[df_decoy["ScanNr"] == num] | ||
if len(decoy_match) == 0: | ||
continue | ||
elif len(decoy_match) > 1: | ||
print("Error: multiple occurance of a ScanNr") | ||
exit(1) | ||
else: | ||
decoy_idx = decoy_match.index[0] | ||
decoy_match = decoy_match.iloc[0] | ||
|
||
target_match = df[df["ScanNr"] == num] | ||
target_idx = target_match.index[0] | ||
target_match = target_match.iloc[0] | ||
|
||
# Equal peptide -> Drop decoy | ||
if target_match["Peptide"].replace("L", "I") == decoy_match["Peptide"].replace("L", "I"): | ||
df_decoy.drop(decoy_idx, inplace=True) | ||
continue | ||
|
||
#print(target_idx, decoy_idx) | ||
# Compare score -> Keep higher scoring match | ||
if target_match[args.score] > decoy_match[args.score]: | ||
if args.update_delta_scores: | ||
update_delta_scores(df, target_idx, df_decoy, decoy_idx) | ||
df_decoy.drop(decoy_idx, inplace=True) | ||
else: | ||
if args.update_delta_scores: | ||
update_delta_scores(df_decoy, decoy_idx, df, target_idx) | ||
df.drop(target_idx, inplace=True) | ||
|
||
|
||
|
||
df = pd.concat([df, df_decoy], ignore_index=True) | ||
#df["sim2_half"] = df["sim2"] / 2.0 | ||
#df["sim2_double"] = 2.0 * df["sim2"] | ||
|
||
#cols = ["PSMId", "Label", "ScanNr", "sim2", "sim2_half", "sim2_double", "Peptide", "Proteins"] | ||
#df = df[cols] | ||
|
||
if args.main_features: | ||
features = ["charge", "similarity", "bias", "delta_similarity", "sim2", "delta_sim2", "annotation_similarity", "annotation_bias", "annotation_sim2", "delta_annotation_similarity", "peak_count_ref", "avg_bias_adjusted_similarity", "delta_avg", "abs_mass_difference", "ppm_difference", "peptide_length", "precursor_mz"] | ||
col = ["PSMId", "Label", "ScanNr"] + features + ["Peptide", "Proteins"] | ||
df = df[col] | ||
if args.drop_redundant_features: | ||
df.drop(columns=["x_score", "x_score_dot"], inplace=True) | ||
#df.drop(columns=["fragment_standard_deviation", "fragment_weighted_standard_deviation"], inplace=True) | ||
#if args.experimental: | ||
# df["exp1"] = | ||
#df.drop(columns=["x_score", "x_score_dot"], inplace=True) | ||
#df.drop(columns=["fragment_standard_deviation", "fragment_weighted_standard_deviation"], inplace=True) | ||
df.to_csv(args.output, sep="\t", index=False) | ||
|
||
num_targets = sum(df["Label"] == 1) | ||
num_decoys = sum(df["Label"] == -1) | ||
|
||
|
||
print(f"Files merged successfully! {num_targets} targets and {num_decoys} decoys remaining after competition.") | ||
|
||
|
||
|
||
def main(): | ||
args = parse_args() | ||
merge_files(args) | ||
|
||
if __name__ == "__main__": | ||
main() | ||
|
||
|
||
# bug fix: Remove -inf values | ||
# sed -i 's/-inf/-9999/g' yeast_td.pin |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
|
||
PSMId Label ScanNr charge similarity bias annotation_similarity annotation_bias avg_bias_adjusted_similarity dot_product delta_dot delta_similarity delta_annotation_similarity delta_sim2 delta_avg dot_contrast_angle similarity_contrast_angle annotation_contrast_angle mass_difference abs_mass_difference ppm_difference peptide_length precursor_mz peak_count_query peak_count_ref fragment_standard_deviation fragment_weighted_standard_deviation sim2 annotation_sim2 x_score x_score_dot x_lgamma x_lgamma_dot st_score st_score_dot Peptide Proteins | ||
64 1 189 2 1.00149 0.220921 0.823551 0.220921 0.710924 0.873902 0.444778 0.342503 0.202489 0.304451 0.248828 0.676836 0.61602 -0.000854492 0.000854492 1.30494 11 654.814 1000 142 0.408765 0.421254 0.780238 0.641611 0.0 0.0 560.171 560.034 0.73769 0.727923 X.YGRPPDSHHSR.X Unknown | ||
|
||
|
||
charge similarity bias annotation_similarity annotation_bias avg_bias_adjusted_similarity dot_product delta_dot delta_similarity delta_annotation_similarity delta_sim2delta_avg dot_contrast_angle similarity_contrast_angle annotation_contrast_angle mass_difference abs_mass_difference ppm_difference peptide_length precursor_mz peak_count_query peak_count_ref fragment_standard_deviation fragment_weighted_standard_deviation sim2 annotation_sim2 x_score x_score_dot x_lgamma x_lgamma_dot st_score st_score_dot m0 | ||
-0.0833 -0.2382 -0.1786 2.4159 -0.1786 -0.0354 1.3627 0.3950 -0.4690 -0.1501 0.1838 0.9446 0.0305 -1.3467 -0.4678 -0.0915 -0.0932 -0.4207 0.0715 0.1952 0.0000 1.7078 0.0120 0.0322 -0.7453 0.3660 0.1577 0.1706 0.0408 0.1718 0.3820 0.4089 -0.9830 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.