From 4990f3b8b279c1da9c793546fb6be2a063803b8f Mon Sep 17 00:00:00 2001
From: pgarrett <pgarrett@scripps.edu>
Date: Tue, 30 Jan 2024 17:37:23 -0800
Subject: [PATCH] removed spectra viewer

---
 app.py       | 311 ++++++++++++++++-----------------------------------
 constants.py |  27 ++---
 2 files changed, 106 insertions(+), 232 deletions(-)

diff --git a/app.py b/app.py
index 25a32a6..d376194 100644
--- a/app.py
+++ b/app.py
@@ -3,14 +3,12 @@
 import pandas as pd
 import peptacular.constants
 import streamlit as st
-from peptacular.fragment import build_fragments, Fragment
-from peptacular.score import hyper_score, binomial_score, compute_fragment_matches, FragmentMatch
+from peptacular.fragment import build_fragments
 from peptacular.sequence import strip_modifications, get_modifications
 import plotly.graph_objects as go
-import plotly.express as px
 
 from constants import *
-from utils import color_by_ion_type, COLOR_DICT, is_float, get_fragment_color
+from utils import COLOR_DICT, is_float, get_fragment_color
 
 # Parse query parameters
 params = st.query_params
@@ -20,18 +18,16 @@
 query_mass_type = params.get('mass_type', DEFAULT_MASS_TYPE)
 query_fragment_types = list(params.get('fragment_types', DEFAULT_FRAGMENT_TYPES))
 
-query_spectra = params.get('spectra', DEFAULT_SPECTRA)
-query_spectra = '\n'.join([f'{pair.split(":")[0]} {pair.split(":")[1]}' for pair in query_spectra.split(';')])
-
 st.set_page_config(page_title="peptidefragmenter", page_icon=":bomb:", layout="wide")
 
 # Sidebar: Peptide Fragmenter input
 with st.sidebar:
     st.title('Peptide Fragmenter :bomb:')
     st.markdown(
-        """This app takes an amino acid sequence and calculates the fragment ions for a given charge range. 
-        Modifications should be provided in parentheses with the mass difference in Daltons. Terminal modifications 
-        use square brackets.""")
+        """A simple peptide fragment ion claculator. Specify terminal PTMs with [] and internal PTMs with ()."""
+    )
+
+    st.markdown('Note that B, X, and Z residues have a mass of 0.0 Da.')
 
     peptide_sequence = st.text_input('Peptide Sequence',
                                      value=query_peptide_sequence,
@@ -47,7 +43,9 @@
 
     # Verify the input sequence is valid
     unmodified_sequence = strip_modifications(peptide_sequence)
-    if not all(aa in peptacular.constants.AMINO_ACIDS for aa in unmodified_sequence):
+    additional_aa = {'B', 'X', 'Z'}
+    valid_aa = additional_aa.union(peptacular.constants.AMINO_ACIDS)
+    if not all(valid_aa for aa in unmodified_sequence):
         st.error(f'Invalid amino acid(s) detected.')
         st.stop()
 
@@ -81,7 +79,8 @@
     mass_type = st.radio(label='Mass Type',
                          options=['monoisotopic', 'average'],
                          help='Mass type to use for fragment calculation',
-                         index=0 if query_mass_type == 'monoisotopic' else 1)
+                         index=0 if query_mass_type == 'monoisotopic' else 1,
+                         horizontal=True)
     is_monoisotopic = mass_type == 'monoisotopic'
 
     fragment_types = st.multiselect(label='Fragment Types',
@@ -89,10 +88,6 @@
                                     default=query_fragment_types,
                                     help='Fragment types to calculate')
 
-    internal_fragments = st.checkbox(label='Internal Fragments',
-                                     value=False,
-                                     help='Include internal fragments')
-
 
 def generate_app_url(sequence: str, min_charge: int, max_charge: int, mass_type: str, fragment_types: List[str]):
     # Generate the app URL
@@ -104,8 +99,7 @@ def generate_app_url(sequence: str, min_charge: int, max_charge: int, mass_type:
 
 st.write(f'##### [Analysis URL]({url}) (copy me and send to your friends!)')
 
-t1, t2, t3, t4 = st.tabs(['Results', 'Spectra', 'Wiki', 'Help'])
-
+t1, t3, t4 = st.tabs(['Results', 'Wiki', 'Help'])
 
 @st.cache_data
 def create_fragment_table(sequence: str, ion_types: List[str], charges: List[int], monoisotopic: bool,
@@ -114,7 +108,8 @@ def create_fragment_table(sequence: str, ion_types: List[str], charges: List[int
                                 ion_types=ion_types,
                                 charges=charges,
                                 monoisotopic=monoisotopic,
-                                internal=internal)
+                                internal=internal,
+                                aa_masses={aa : 0.0 for aa in additional_aa},)
 
     # convert list of dataclasses to list of dicts
     frag_df = pd.DataFrame([fragment.__dict__ for fragment in fragments])
@@ -137,34 +132,62 @@ def create_fragment_table(sequence: str, ion_types: List[str], charges: List[int
                                            ion_types=fragment_types,
                                            charges=list(range(min_charge, max_charge + 1)),
                                            monoisotopic=is_monoisotopic,
-                                           internal=internal_fragments)
+                                           internal=False)
 
 frag_df_downloaded = frag_df.to_csv(index=False)
 
-# make a plotly plot that will graph the segments end -> start on the y-axis, and mass on the x-axis
+
 traces = []
+seen = set()
 for idx, row in frag_df[frag_df['internal'] == False].iterrows():
-    traces.append(
-        go.Scatter(
+
+    # Determine the Scatter object based on the condition
+    if row['ion_type'] in 'abc':
+        scatter = go.Scatter(
             x=[row['mz'], row['mz']],
             y=[row['start'], row['end']],
             mode='lines',
             line=dict(color=get_fragment_color(row)),
-            name=row['label'],
+            name=row['ion_type'],
+            legendgroup=row['ion_type'],
+            showlegend=row['ion_type'] not in seen
+
+        )
+    else:
+        scatter = go.Scatter(
+            x=[row['mz'], row['mz']],
+            y=[row['start']+1, row['end']+1],
+            mode='lines',
+            line=dict(color=get_fragment_color(row)),
+            name=row['ion_type'],
+            legendgroup=row['ion_type'],
+            showlegend=row['ion_type'] not in seen
         )
-    )
 
-# Create layout for the plot
+    seen.add(row['ion_type'])
+
+    # Append the Scatter object to the traces list
+    traces.append(scatter)
+
+# Assuming traces is a list of go.Scatter objects
+min_x = min(trace['x'][0] for trace in traces)  # Find the smallest x-value
+max_x = max(trace['x'][1] for trace in traces)  # Find the largest x-value
+
+# Expand the x-axis range a bit
+padding = (max_x - min_x) * 0.01  # 1% padding on each side
+x_range = [min_x - padding, max_x + padding]
+
+# Create layout for the plot with updated x-axis range
 layout = go.Layout(
     title="Fragment Segments",
-    xaxis=dict(title='M/Z'),
+    xaxis=dict(title='M/Z', range=x_range),
     yaxis=dict(title='Sequence'),
-    showlegend=False
+    showlegend=True
 )
 
 # Create a Figure and add the traces
 fig = go.Figure(data=traces, layout=layout)
-fig.update_yaxes(ticktext=list(unmodified_sequence), tickvals=list(range(len(unmodified_sequence))))
+fig.update_yaxes(ticktext=['N-Term']+list(unmodified_sequence)+['C-Term'], tickvals=list(range(len(unmodified_sequence)+2)))
 
 dfs = []
 combined_data = {'AA': list(unmodified_sequence)}
@@ -186,11 +209,25 @@ def create_fragment_table(sequence: str, ion_types: List[str], charges: List[int
 
     # Displaying the table
     df = pd.DataFrame(data)
-    df['# (abc)'] = list(range(1, len(df) + 1))
-    df['# (xyz)'] = list(range(1, len(df) + 1))[::-1]
+    df['+#'] = list(range(1, len(df) + 1))
+    df['-#'] = list(range(1, len(df) + 1))[::-1]
+
+    # reorder columns so that # is first # +1 is last and AA is in the middle
+    combined_cols = df.columns.tolist()
+    combined_cols.remove('+#')
+    combined_cols.remove('-#')
+    combined_cols.remove('AA')
+    forward_cols = [col for col in combined_cols if 'a' in col or 'b' in col or 'c' in col]
+    reverse_cols = [col for col in combined_cols if 'x' in col or 'y' in col or 'z' in col]
+
+    # sort
+    forward_cols.sort()
+    reverse_cols.sort(reverse=True)
+
+    new_cols = ['+#'] + forward_cols + ['AA'] + reverse_cols + ['-#']
+    df = df[new_cols]
 
     # reorder columns so that # is first # +1 is last and AA is in the middle
-    df = df[['AA'] + ['# (abc)'] + [col for col in df.columns if col not in ['AA', '# (abc)', '# (xyz)']] + ['# (xyz)']]
     dfs.append(df)
 
 combined_df = pd.DataFrame(combined_data)
@@ -199,200 +236,40 @@ def create_fragment_table(sequence: str, ion_types: List[str], charges: List[int
 
 styled_dfs = []
 
-for df in dfs:
-    styled_df = df.style.apply(color_by_ion_type)
-
-    # Set table styles with increased horizontal padding for more space between columns,
-    # centered text, and no borders
-    styles = [
-        dict(selector="td", props=[("padding", "2px 2px"), ("text-align", "center"), ("border", "none")]),
-        dict(selector="th", props=[("padding", "2px 2px"), ("text-align", "center"), ("border", "none")])
-    ]
-    styled_df = styled_df.set_table_styles(styles)
-    styled_dfs.append(styled_df)
+def highlight_cells(data):
+    # Initialize empty DataFrame with same index and columns as original
+    styled = pd.DataFrame('', index=data.index, columns=data.columns)
+
+    # Iterate over cells and update `styled` based on cell position
+    for row in data.index:
+        for col in data.columns:
+            if col == 'AA' or col == '+#' or col == '-#':
+                styled.loc[
+                    row, col] = f'background-color: gainsboro; color: black; text-align: center; font-weight: bold;'
+                continue
 
-# CSS to inject contained in a string
-hide_table_row_index_and_adjust_padding = """
-            <style>
-            thead tr th:first-child {display:none}
-            tbody th {display:none}
-            td, th {padding: 0px}  /* Padding adjustment for all table cells */
-            </style>
-            """
+            styled.loc[
+                row, col] = f'color: {COLOR_DICT[col]}; text-align: center;'
 
-# Inject CSS with Markdown
-st.markdown(hide_table_row_index_and_adjust_padding, unsafe_allow_html=True)
+    return styled
 
-with t1:
-    st.header('Fragment Ions')
+for df in dfs:
+    styled_df = df.style.format(precision=4).apply(highlight_cells, axis=None)
+    styled_dfs.append(styled_df)
 
+with t1:
     for styled_df, charge in zip(styled_dfs, list(range(min_charge, max_charge + 1))):
         st.subheader(f'Charge {charge}')
-        st.table(styled_df)
+        st.dataframe(styled_df, height=(len(dfs[0]) + 1) * 35 + 3, hide_index=True)
 
     st.plotly_chart(fig, use_container_width=True)
 
-    with st.expander('Fragment Ion Data'):
-        st.dataframe(frag_df, use_container_width=True)
-        st.download_button(label='Download CSV', data=frag_df_downloaded, file_name='fragment_ions.csv',
-                           use_container_width=True)
-
-with t2:
-    st.header('Input Spectra')
-    st.caption('Add spectra to match fragment ions to. One per line. Format: {m/z} {intensity}')
+    frag_df.drop(columns=['parent_number', 'isotope', 'loss', 'aa_masses', 'parent_sequence', 'internal'], inplace=True)
 
-    c1, c2 = st.columns(2)
-    tolerance_type = c2.radio(label='Tolerance Type',
-                              options=TOLERANCE_OPTIONS,
-                              index=DEFAULT_TOLERANCE_TYPE_INDEX,
-                              help='Offset type to add to spectra')
-
-    tolerance = c1.number_input(label='Tolerance',
-                                value=DEFAULT_TOLERANCE_TH if tolerance_type == 'th' else DEFAULT_TOLERANCE_PPM,
-                                step=TOLERANCE_STEP_TH if tolerance_type == 'th' else TOLERANCE_STEP_PPM,
-                                min_value=MIN_TOLERANCE_VALUE,
-                                max_value=MAX_TOLERANCE_VALUE_TH if tolerance_type == 'th' else MAX_TOLERANCE_VALUE_PPM,
-                                help='Tolerance to use when matching fragment ions to spectra')
-
-    min_intensity = st.number_input(label='Min Intensity',
-                                    value=DEFAULT_MIN_INTENSITY,
-                                    step=1.0,
-                                    min_value=0.0)
-    spectra = st.text_area(label='Spectra',
-                           value=query_spectra,
-                           help='Spectra to match fragment ions to. One per line. Format: {m/z} {intensity}\\n',
-                           max_chars=30_000)
-
-    if spectra:
-
-        mz_values, intensity_values = [], []
-
-        for line in spectra.split('\n'):
-            mz, intensity = line.split(' ')
-            mz = float(mz)
-            intensity = float(intensity)
-
-            if intensity <= min_intensity:
-                continue
+    st.dataframe(frag_df, use_container_width=True)
 
-            mz_values.append(mz)
-            intensity_values.append(intensity)
-
-        max_spectra_mz = max(mz_values)
-
-        fragment_matches = compute_fragment_matches(fragments, mz_values, intensity_values, tolerance, tolerance_type)
-        fragment_matches.sort(key=lambda x: abs(x.error), reverse=True)
-        fragment_matches = {fm.mz: fm for fm in fragment_matches}  # keep the best error for each fragment
+with t3:
+    st.markdown(WIKI)
 
-        data = []
-
-        for mz, i in zip(mz_values, intensity_values):
-            fm = fragment_matches.get(mz, None)
-
-            if fm:
-                data.append(
-                    {'sequence': fm.fragment.sequence, 'charge': fm.fragment.charge, 'ion_type': fm.fragment.ion_type,
-                     'number': fm.fragment.number, 'internal': fm.fragment.internal,
-                     'parent_number': fm.fragment.parent_number, 'monoisotopic': fm.fragment.monoisotopic, 'mz': mz,
-                     'intensity': i, 'error': fm.error, 'abs_error': abs(fm.error)})
-
-            else:
-                data.append({'sequence': '', 'charge': 0, 'ion_type': '', 'number': 0, 'internal': False,
-                             'parent_number': 0, 'monoisotopic': True, 'mz': mz,
-                             'intensity': i, 'error': 0, 'abs_error': 0})
-
-        spectra_df = pd.DataFrame(data)
-
-        # for keep only the lowest abs_error for ion_type, charge, num
-        spectra_df.sort_values(by='abs_error', inplace=True)
-
-        spectra_df['ion_color_type'] = spectra_df['ion_type']
-        spectra_df.loc[spectra_df['internal'] == True, 'ion_color_type'] = 'i'
-
-        ion_labels = []
-        for _, row in spectra_df.iterrows():
-
-            try:
-                charge_str = '+' * int(row['charge'])
-                ion_type_str = row['ion_type']
-                parent_number_str = str(int(row['parent_number']))
-            except ValueError:
-                charge_str = ''
-                ion_type_str = ''
-                parent_number_str = ''
-
-            ion_labels.append(f"{charge_str}{ion_type_str}{parent_number_str}")
-
-        spectra_df['ion_label'] = ion_labels
-        spectra_df.loc[spectra_df['internal'] == True, 'ion_label'] += 'i'
-
-        COLOR_DICT.setdefault('', 'grey')
-        fig = px.bar(spectra_df, x='mz', y='intensity', color='ion_color_type',
-                     hover_data=['charge', 'error', 'sequence'],
-                     color_discrete_map=COLOR_DICT)
-        fig.update_layout(title='Spectra Plot', xaxis_title='M/Z', yaxis_title='Intensity')
-
-        for i, row in spectra_df.iterrows():
-            if row['ion_type']:
-                fig.add_annotation(
-                    x=row['mz'],
-                    y=row['intensity'],
-                    text=row['ion_label'],
-                    showarrow=False,
-                    yshift=10,
-                    font=dict(
-                        size=13,
-                        color=COLOR_DICT[row['ion_color_type']]
-                    ),
-                )
-
-        st.plotly_chart(fig, use_container_width=True)
-
-        spectra_df.sort_values(by='mz', inplace=True)
-
-        st.caption('Score are under development and may not be accurate')
-        hs = hyper_score(fragments, spectra_df['mz'].tolist(), spectra_df['intensity'].tolist(), tolerance,
-                         tolerance_type)
-        st.metric(f'Hyperscore', hs)
-        bs = binomial_score(fragments, spectra_df['mz'].tolist(), spectra_df['intensity'].tolist(), tolerance,
-                            tolerance_type)
-        st.metric(f'Binomial Score', bs)
-
-
-        def highlight_cells(data):
-            # Initialize empty DataFrame with same index and columns as original
-            styled = pd.DataFrame('', index=data.index, columns=data.columns)
-
-            # Iterate over cells and update `styled` based on cell position
-            for row in data.index:
-                for col in data.columns:
-                    if col == 'AA':
-                        continue
-                    label = '+' * int(col[1:]) + col[0] + str(row + 1)
-                    if label in accepted_normal_ions:
-                        styled.loc[row, col] = 'background-color: yellow'
-                    elif label + 'i' in accepted_internal_ions:
-                        styled.loc[row, col] = 'background-color: magenta'
-
-            return styled
-
-
-        matched_ions = spectra_df[spectra_df['ion_type'] != '']
-        accepted_normal_ions = matched_ions[matched_ions['internal'] == False]['ion_label'].tolist()
-        accepted_internal_ions = matched_ions[matched_ions['internal'] == True]['ion_label'].tolist()
-
-        combined_df = combined_df.style.apply(highlight_cells, axis=None)
-        st.table(combined_df)
-
-        with st.expander('Spectra Data'):
-            spectra_df.sort_values(by=['mz'], inplace=True)
-            st.dataframe(spectra_df)
-            st.download_button(label='Download CSV', data=spectra_df.to_csv(index=False),
-                               file_name='spectra_results.csv',
-                               use_container_width=True)
-
-    with t3:
-        st.markdown(WIKI)
-
-    with t4:
-        st.markdown(HELP)
+with t4:
+    st.markdown(HELP)
diff --git a/constants.py b/constants.py
index 89cd188..afe1bd5 100644
--- a/constants.py
+++ b/constants.py
@@ -21,9 +21,9 @@ def get_env_str(var_name, default):
 DEFAULT_FRAGMENT_TYPES = 'by'
 
 MIN_PEPTIDE_CHARGE = 1
-MAX_PEPTIDE_CHARGE = get_env_int('MAX_PEPTIDE_CHARGE', 5)
-MAX_PEPTIDE_AA_COUNT = get_env_int('MAX_PEPTIDE_AA_COUNT', 50)
-MAX_PEPTIDE_LENGTH = get_env_int('MAX_PEPTIDE_LENGTH', 1000)
+MAX_PEPTIDE_CHARGE = get_env_int('MAX_PEPTIDE_CHARGE', 20)
+MAX_PEPTIDE_AA_COUNT = get_env_int('MAX_PEPTIDE_AA_COUNT', 150)
+MAX_PEPTIDE_LENGTH = get_env_int('MAX_PEPTIDE_LENGTH', 2000)
 
 # SPECTRA PARAMS
 DEFAULT_TOLERANCE_TH = 0.5
@@ -50,7 +50,7 @@ def get_env_str(var_name, default):
 
 ## Introduction
 
-Peptide fragmentation refers to the process by which peptides (short chains of amino acid monomers linked by peptide bonds) are broken into smaller fragments. This occurs during mass spectrometry analysis to provide useful information about the peptide's sequence, structure, and identity. The process uses methods such as collision-induced dissociation (CID), electron-transfer dissociation (ETD), or higher-energy collisional dissociation (HCD).
+Peptide fragmentation refers to the process by which peptides (short chains of amino acid monomers linked by peptide bonds) are broken into smaller fragments. This occurs during mass spectrometry analysis to provide useful information about the peptide's sequence and structure. The process uses methods such as collision-induced dissociation (CID), electron-transfer dissociation (ETD), or higher-energy collisional dissociation (HCD).
 
 ## Basic Concept
 
@@ -62,12 +62,11 @@ def get_env_str(var_name, default):
 
 When peptides are subjected to fragmentation in mass spectrometry, the peptide bonds are targeted. The energy imparted to the molecule causes these bonds to break, creating different fragment ions. These fragments are typically classified into six types based on the location of the bond break: a, b, and c ions are the N-terminal fragments, and x, y, and z ions are the C-terminal fragments.
 
-
 ### Example abcxyz Fragment Ions
 
 For example, consider the following peptide sequence: PEPTIDE. It can prodice the the following theoretical fragment ion sequences.
 ```
-a/b/c ions (start from the front, just like the alphabet)
+a/b/c ions start from the front (just like the alphabet)
 1 - P
 2 - PE
 3 - PEP
@@ -76,7 +75,7 @@ def get_env_str(var_name, default):
 6 - PEPTID
 7 - PEPTIDE
 
-x/y/z ions (start from the back, just like the alphabet)
+while x/y/z ions start from the back
 1 - E
 2 - DE
 3 - IDE
@@ -106,9 +105,7 @@ def get_env_str(var_name, default):
 
 ## Analysis
 
-The resulting fragment ions are then analyzed by the mass spectrometer. By examining the m/z (mass-to-charge) ratio of the fragment ions, researchers can deduce the amino acid sequence of the original peptide. This process is crucial in protein identification and characterization.
-
-
+The resulting fragment ions are then analyzed by the mass spectrometer. By examining the m/z (mass-to-charge) ratio of the fragment ions, researchers can deduce the amino acid sequence of the original peptide. 
 
 ## Understanding Internal Fragment Ions
 
@@ -154,19 +151,19 @@ def get_env_str(var_name, default):
 
 - **Fragment Types**: Select the types of fragments to calculate: 'a', 'b', 'c', 'x', 'y', 'z'. 
 
-- **Internal Fragments**: Check this box if you want to include internal fragments in the calculation. 
-
 ## Outputs
 
 - **Results Tab**: This tab presents the calculated fragment ions in a table, and a plot that shows the fragment segments in a sequence versus mass plot. You can download the fragment ion data as a CSV file.
 
-- **Spectra Tab**: This tab presents an input form to add spectra to match fragment ions to. The format for spectra is: {m/z} {intensity}, one per line. There is also an option to adjust the offset and its type (Da or ppm), and the minimum intensity. If spectra data is provided, the tab displays a plot with matching ions marked and a table of the spectra data. This data can also be downloaded as a CSV file.
-
 - **Wiki Tab**: This tab presents a wiki page with general information on peptide fragmentation.
 
-- **Help Tab**: This tab presents a help page on how to use the application (You're currently here).
+- **Help Tab**: This tab presents a help page on how to use the application.
 
 If you encounter any issues or have suggestions for improvement, please contact pgarrett@scripps.edu.
 
 This is a work in progress and your feedback is greatly appreciated!
+
+## Some Modifications
+
+https://web.expasy.org/findmod/findmod_masses.html
 """
\ No newline at end of file