diff --git a/app.py b/app.py index 25a32a6..d376194 100644 --- a/app.py +++ b/app.py @@ -3,14 +3,12 @@ import pandas as pd import peptacular.constants import streamlit as st -from peptacular.fragment import build_fragments, Fragment -from peptacular.score import hyper_score, binomial_score, compute_fragment_matches, FragmentMatch +from peptacular.fragment import build_fragments from peptacular.sequence import strip_modifications, get_modifications import plotly.graph_objects as go -import plotly.express as px from constants import * -from utils import color_by_ion_type, COLOR_DICT, is_float, get_fragment_color +from utils import COLOR_DICT, is_float, get_fragment_color # Parse query parameters params = st.query_params @@ -20,18 +18,16 @@ query_mass_type = params.get('mass_type', DEFAULT_MASS_TYPE) query_fragment_types = list(params.get('fragment_types', DEFAULT_FRAGMENT_TYPES)) -query_spectra = params.get('spectra', DEFAULT_SPECTRA) -query_spectra = '\n'.join([f'{pair.split(":")[0]} {pair.split(":")[1]}' for pair in query_spectra.split(';')]) - st.set_page_config(page_title="peptidefragmenter", page_icon=":bomb:", layout="wide") # Sidebar: Peptide Fragmenter input with st.sidebar: st.title('Peptide Fragmenter :bomb:') st.markdown( - """This app takes an amino acid sequence and calculates the fragment ions for a given charge range. - Modifications should be provided in parentheses with the mass difference in Daltons. Terminal modifications - use square brackets.""") + """A simple peptide fragment ion claculator. Specify terminal PTMs with [] and internal PTMs with ().""" + ) + + st.markdown('Note that B, X, and Z residues have a mass of 0.0 Da.') peptide_sequence = st.text_input('Peptide Sequence', value=query_peptide_sequence, @@ -47,7 +43,9 @@ # Verify the input sequence is valid unmodified_sequence = strip_modifications(peptide_sequence) - if not all(aa in peptacular.constants.AMINO_ACIDS for aa in unmodified_sequence): + additional_aa = {'B', 'X', 'Z'} + valid_aa = additional_aa.union(peptacular.constants.AMINO_ACIDS) + if not all(valid_aa for aa in unmodified_sequence): st.error(f'Invalid amino acid(s) detected.') st.stop() @@ -81,7 +79,8 @@ mass_type = st.radio(label='Mass Type', options=['monoisotopic', 'average'], help='Mass type to use for fragment calculation', - index=0 if query_mass_type == 'monoisotopic' else 1) + index=0 if query_mass_type == 'monoisotopic' else 1, + horizontal=True) is_monoisotopic = mass_type == 'monoisotopic' fragment_types = st.multiselect(label='Fragment Types', @@ -89,10 +88,6 @@ default=query_fragment_types, help='Fragment types to calculate') - internal_fragments = st.checkbox(label='Internal Fragments', - value=False, - help='Include internal fragments') - def generate_app_url(sequence: str, min_charge: int, max_charge: int, mass_type: str, fragment_types: List[str]): # Generate the app URL @@ -104,8 +99,7 @@ def generate_app_url(sequence: str, min_charge: int, max_charge: int, mass_type: st.write(f'##### [Analysis URL]({url}) (copy me and send to your friends!)') -t1, t2, t3, t4 = st.tabs(['Results', 'Spectra', 'Wiki', 'Help']) - +t1, t3, t4 = st.tabs(['Results', 'Wiki', 'Help']) @st.cache_data def create_fragment_table(sequence: str, ion_types: List[str], charges: List[int], monoisotopic: bool, @@ -114,7 +108,8 @@ def create_fragment_table(sequence: str, ion_types: List[str], charges: List[int ion_types=ion_types, charges=charges, monoisotopic=monoisotopic, - internal=internal) + internal=internal, + aa_masses={aa : 0.0 for aa in additional_aa},) # convert list of dataclasses to list of dicts frag_df = pd.DataFrame([fragment.__dict__ for fragment in fragments]) @@ -137,34 +132,62 @@ def create_fragment_table(sequence: str, ion_types: List[str], charges: List[int ion_types=fragment_types, charges=list(range(min_charge, max_charge + 1)), monoisotopic=is_monoisotopic, - internal=internal_fragments) + internal=False) frag_df_downloaded = frag_df.to_csv(index=False) -# make a plotly plot that will graph the segments end -> start on the y-axis, and mass on the x-axis + traces = [] +seen = set() for idx, row in frag_df[frag_df['internal'] == False].iterrows(): - traces.append( - go.Scatter( + + # Determine the Scatter object based on the condition + if row['ion_type'] in 'abc': + scatter = go.Scatter( x=[row['mz'], row['mz']], y=[row['start'], row['end']], mode='lines', line=dict(color=get_fragment_color(row)), - name=row['label'], + name=row['ion_type'], + legendgroup=row['ion_type'], + showlegend=row['ion_type'] not in seen + + ) + else: + scatter = go.Scatter( + x=[row['mz'], row['mz']], + y=[row['start']+1, row['end']+1], + mode='lines', + line=dict(color=get_fragment_color(row)), + name=row['ion_type'], + legendgroup=row['ion_type'], + showlegend=row['ion_type'] not in seen ) - ) -# Create layout for the plot + seen.add(row['ion_type']) + + # Append the Scatter object to the traces list + traces.append(scatter) + +# Assuming traces is a list of go.Scatter objects +min_x = min(trace['x'][0] for trace in traces) # Find the smallest x-value +max_x = max(trace['x'][1] for trace in traces) # Find the largest x-value + +# Expand the x-axis range a bit +padding = (max_x - min_x) * 0.01 # 1% padding on each side +x_range = [min_x - padding, max_x + padding] + +# Create layout for the plot with updated x-axis range layout = go.Layout( title="Fragment Segments", - xaxis=dict(title='M/Z'), + xaxis=dict(title='M/Z', range=x_range), yaxis=dict(title='Sequence'), - showlegend=False + showlegend=True ) # Create a Figure and add the traces fig = go.Figure(data=traces, layout=layout) -fig.update_yaxes(ticktext=list(unmodified_sequence), tickvals=list(range(len(unmodified_sequence)))) +fig.update_yaxes(ticktext=['N-Term']+list(unmodified_sequence)+['C-Term'], tickvals=list(range(len(unmodified_sequence)+2))) dfs = [] combined_data = {'AA': list(unmodified_sequence)} @@ -186,11 +209,25 @@ def create_fragment_table(sequence: str, ion_types: List[str], charges: List[int # Displaying the table df = pd.DataFrame(data) - df['# (abc)'] = list(range(1, len(df) + 1)) - df['# (xyz)'] = list(range(1, len(df) + 1))[::-1] + df['+#'] = list(range(1, len(df) + 1)) + df['-#'] = list(range(1, len(df) + 1))[::-1] + + # reorder columns so that # is first # +1 is last and AA is in the middle + combined_cols = df.columns.tolist() + combined_cols.remove('+#') + combined_cols.remove('-#') + combined_cols.remove('AA') + forward_cols = [col for col in combined_cols if 'a' in col or 'b' in col or 'c' in col] + reverse_cols = [col for col in combined_cols if 'x' in col or 'y' in col or 'z' in col] + + # sort + forward_cols.sort() + reverse_cols.sort(reverse=True) + + new_cols = ['+#'] + forward_cols + ['AA'] + reverse_cols + ['-#'] + df = df[new_cols] # reorder columns so that # is first # +1 is last and AA is in the middle - df = df[['AA'] + ['# (abc)'] + [col for col in df.columns if col not in ['AA', '# (abc)', '# (xyz)']] + ['# (xyz)']] dfs.append(df) combined_df = pd.DataFrame(combined_data) @@ -199,200 +236,40 @@ def create_fragment_table(sequence: str, ion_types: List[str], charges: List[int styled_dfs = [] -for df in dfs: - styled_df = df.style.apply(color_by_ion_type) - - # Set table styles with increased horizontal padding for more space between columns, - # centered text, and no borders - styles = [ - dict(selector="td", props=[("padding", "2px 2px"), ("text-align", "center"), ("border", "none")]), - dict(selector="th", props=[("padding", "2px 2px"), ("text-align", "center"), ("border", "none")]) - ] - styled_df = styled_df.set_table_styles(styles) - styled_dfs.append(styled_df) +def highlight_cells(data): + # Initialize empty DataFrame with same index and columns as original + styled = pd.DataFrame('', index=data.index, columns=data.columns) + + # Iterate over cells and update `styled` based on cell position + for row in data.index: + for col in data.columns: + if col == 'AA' or col == '+#' or col == '-#': + styled.loc[ + row, col] = f'background-color: gainsboro; color: black; text-align: center; font-weight: bold;' + continue -# CSS to inject contained in a string -hide_table_row_index_and_adjust_padding = """ - - """ + styled.loc[ + row, col] = f'color: {COLOR_DICT[col]}; text-align: center;' -# Inject CSS with Markdown -st.markdown(hide_table_row_index_and_adjust_padding, unsafe_allow_html=True) + return styled -with t1: - st.header('Fragment Ions') +for df in dfs: + styled_df = df.style.format(precision=4).apply(highlight_cells, axis=None) + styled_dfs.append(styled_df) +with t1: for styled_df, charge in zip(styled_dfs, list(range(min_charge, max_charge + 1))): st.subheader(f'Charge {charge}') - st.table(styled_df) + st.dataframe(styled_df, height=(len(dfs[0]) + 1) * 35 + 3, hide_index=True) st.plotly_chart(fig, use_container_width=True) - with st.expander('Fragment Ion Data'): - st.dataframe(frag_df, use_container_width=True) - st.download_button(label='Download CSV', data=frag_df_downloaded, file_name='fragment_ions.csv', - use_container_width=True) - -with t2: - st.header('Input Spectra') - st.caption('Add spectra to match fragment ions to. One per line. Format: {m/z} {intensity}') + frag_df.drop(columns=['parent_number', 'isotope', 'loss', 'aa_masses', 'parent_sequence', 'internal'], inplace=True) - c1, c2 = st.columns(2) - tolerance_type = c2.radio(label='Tolerance Type', - options=TOLERANCE_OPTIONS, - index=DEFAULT_TOLERANCE_TYPE_INDEX, - help='Offset type to add to spectra') - - tolerance = c1.number_input(label='Tolerance', - value=DEFAULT_TOLERANCE_TH if tolerance_type == 'th' else DEFAULT_TOLERANCE_PPM, - step=TOLERANCE_STEP_TH if tolerance_type == 'th' else TOLERANCE_STEP_PPM, - min_value=MIN_TOLERANCE_VALUE, - max_value=MAX_TOLERANCE_VALUE_TH if tolerance_type == 'th' else MAX_TOLERANCE_VALUE_PPM, - help='Tolerance to use when matching fragment ions to spectra') - - min_intensity = st.number_input(label='Min Intensity', - value=DEFAULT_MIN_INTENSITY, - step=1.0, - min_value=0.0) - spectra = st.text_area(label='Spectra', - value=query_spectra, - help='Spectra to match fragment ions to. One per line. Format: {m/z} {intensity}\\n', - max_chars=30_000) - - if spectra: - - mz_values, intensity_values = [], [] - - for line in spectra.split('\n'): - mz, intensity = line.split(' ') - mz = float(mz) - intensity = float(intensity) - - if intensity <= min_intensity: - continue + st.dataframe(frag_df, use_container_width=True) - mz_values.append(mz) - intensity_values.append(intensity) - - max_spectra_mz = max(mz_values) - - fragment_matches = compute_fragment_matches(fragments, mz_values, intensity_values, tolerance, tolerance_type) - fragment_matches.sort(key=lambda x: abs(x.error), reverse=True) - fragment_matches = {fm.mz: fm for fm in fragment_matches} # keep the best error for each fragment +with t3: + st.markdown(WIKI) - data = [] - - for mz, i in zip(mz_values, intensity_values): - fm = fragment_matches.get(mz, None) - - if fm: - data.append( - {'sequence': fm.fragment.sequence, 'charge': fm.fragment.charge, 'ion_type': fm.fragment.ion_type, - 'number': fm.fragment.number, 'internal': fm.fragment.internal, - 'parent_number': fm.fragment.parent_number, 'monoisotopic': fm.fragment.monoisotopic, 'mz': mz, - 'intensity': i, 'error': fm.error, 'abs_error': abs(fm.error)}) - - else: - data.append({'sequence': '', 'charge': 0, 'ion_type': '', 'number': 0, 'internal': False, - 'parent_number': 0, 'monoisotopic': True, 'mz': mz, - 'intensity': i, 'error': 0, 'abs_error': 0}) - - spectra_df = pd.DataFrame(data) - - # for keep only the lowest abs_error for ion_type, charge, num - spectra_df.sort_values(by='abs_error', inplace=True) - - spectra_df['ion_color_type'] = spectra_df['ion_type'] - spectra_df.loc[spectra_df['internal'] == True, 'ion_color_type'] = 'i' - - ion_labels = [] - for _, row in spectra_df.iterrows(): - - try: - charge_str = '+' * int(row['charge']) - ion_type_str = row['ion_type'] - parent_number_str = str(int(row['parent_number'])) - except ValueError: - charge_str = '' - ion_type_str = '' - parent_number_str = '' - - ion_labels.append(f"{charge_str}{ion_type_str}{parent_number_str}") - - spectra_df['ion_label'] = ion_labels - spectra_df.loc[spectra_df['internal'] == True, 'ion_label'] += 'i' - - COLOR_DICT.setdefault('', 'grey') - fig = px.bar(spectra_df, x='mz', y='intensity', color='ion_color_type', - hover_data=['charge', 'error', 'sequence'], - color_discrete_map=COLOR_DICT) - fig.update_layout(title='Spectra Plot', xaxis_title='M/Z', yaxis_title='Intensity') - - for i, row in spectra_df.iterrows(): - if row['ion_type']: - fig.add_annotation( - x=row['mz'], - y=row['intensity'], - text=row['ion_label'], - showarrow=False, - yshift=10, - font=dict( - size=13, - color=COLOR_DICT[row['ion_color_type']] - ), - ) - - st.plotly_chart(fig, use_container_width=True) - - spectra_df.sort_values(by='mz', inplace=True) - - st.caption('Score are under development and may not be accurate') - hs = hyper_score(fragments, spectra_df['mz'].tolist(), spectra_df['intensity'].tolist(), tolerance, - tolerance_type) - st.metric(f'Hyperscore', hs) - bs = binomial_score(fragments, spectra_df['mz'].tolist(), spectra_df['intensity'].tolist(), tolerance, - tolerance_type) - st.metric(f'Binomial Score', bs) - - - def highlight_cells(data): - # Initialize empty DataFrame with same index and columns as original - styled = pd.DataFrame('', index=data.index, columns=data.columns) - - # Iterate over cells and update `styled` based on cell position - for row in data.index: - for col in data.columns: - if col == 'AA': - continue - label = '+' * int(col[1:]) + col[0] + str(row + 1) - if label in accepted_normal_ions: - styled.loc[row, col] = 'background-color: yellow' - elif label + 'i' in accepted_internal_ions: - styled.loc[row, col] = 'background-color: magenta' - - return styled - - - matched_ions = spectra_df[spectra_df['ion_type'] != ''] - accepted_normal_ions = matched_ions[matched_ions['internal'] == False]['ion_label'].tolist() - accepted_internal_ions = matched_ions[matched_ions['internal'] == True]['ion_label'].tolist() - - combined_df = combined_df.style.apply(highlight_cells, axis=None) - st.table(combined_df) - - with st.expander('Spectra Data'): - spectra_df.sort_values(by=['mz'], inplace=True) - st.dataframe(spectra_df) - st.download_button(label='Download CSV', data=spectra_df.to_csv(index=False), - file_name='spectra_results.csv', - use_container_width=True) - - with t3: - st.markdown(WIKI) - - with t4: - st.markdown(HELP) +with t4: + st.markdown(HELP) diff --git a/constants.py b/constants.py index 89cd188..afe1bd5 100644 --- a/constants.py +++ b/constants.py @@ -21,9 +21,9 @@ def get_env_str(var_name, default): DEFAULT_FRAGMENT_TYPES = 'by' MIN_PEPTIDE_CHARGE = 1 -MAX_PEPTIDE_CHARGE = get_env_int('MAX_PEPTIDE_CHARGE', 5) -MAX_PEPTIDE_AA_COUNT = get_env_int('MAX_PEPTIDE_AA_COUNT', 50) -MAX_PEPTIDE_LENGTH = get_env_int('MAX_PEPTIDE_LENGTH', 1000) +MAX_PEPTIDE_CHARGE = get_env_int('MAX_PEPTIDE_CHARGE', 20) +MAX_PEPTIDE_AA_COUNT = get_env_int('MAX_PEPTIDE_AA_COUNT', 150) +MAX_PEPTIDE_LENGTH = get_env_int('MAX_PEPTIDE_LENGTH', 2000) # SPECTRA PARAMS DEFAULT_TOLERANCE_TH = 0.5 @@ -50,7 +50,7 @@ def get_env_str(var_name, default): ## Introduction -Peptide fragmentation refers to the process by which peptides (short chains of amino acid monomers linked by peptide bonds) are broken into smaller fragments. This occurs during mass spectrometry analysis to provide useful information about the peptide's sequence, structure, and identity. The process uses methods such as collision-induced dissociation (CID), electron-transfer dissociation (ETD), or higher-energy collisional dissociation (HCD). +Peptide fragmentation refers to the process by which peptides (short chains of amino acid monomers linked by peptide bonds) are broken into smaller fragments. This occurs during mass spectrometry analysis to provide useful information about the peptide's sequence and structure. The process uses methods such as collision-induced dissociation (CID), electron-transfer dissociation (ETD), or higher-energy collisional dissociation (HCD). ## Basic Concept @@ -62,12 +62,11 @@ def get_env_str(var_name, default): When peptides are subjected to fragmentation in mass spectrometry, the peptide bonds are targeted. The energy imparted to the molecule causes these bonds to break, creating different fragment ions. These fragments are typically classified into six types based on the location of the bond break: a, b, and c ions are the N-terminal fragments, and x, y, and z ions are the C-terminal fragments. - ### Example abcxyz Fragment Ions For example, consider the following peptide sequence: PEPTIDE. It can prodice the the following theoretical fragment ion sequences. ``` -a/b/c ions (start from the front, just like the alphabet) +a/b/c ions start from the front (just like the alphabet) 1 - P 2 - PE 3 - PEP @@ -76,7 +75,7 @@ def get_env_str(var_name, default): 6 - PEPTID 7 - PEPTIDE -x/y/z ions (start from the back, just like the alphabet) +while x/y/z ions start from the back 1 - E 2 - DE 3 - IDE @@ -106,9 +105,7 @@ def get_env_str(var_name, default): ## Analysis -The resulting fragment ions are then analyzed by the mass spectrometer. By examining the m/z (mass-to-charge) ratio of the fragment ions, researchers can deduce the amino acid sequence of the original peptide. This process is crucial in protein identification and characterization. - - +The resulting fragment ions are then analyzed by the mass spectrometer. By examining the m/z (mass-to-charge) ratio of the fragment ions, researchers can deduce the amino acid sequence of the original peptide. ## Understanding Internal Fragment Ions @@ -154,19 +151,19 @@ def get_env_str(var_name, default): - **Fragment Types**: Select the types of fragments to calculate: 'a', 'b', 'c', 'x', 'y', 'z'. -- **Internal Fragments**: Check this box if you want to include internal fragments in the calculation. - ## Outputs - **Results Tab**: This tab presents the calculated fragment ions in a table, and a plot that shows the fragment segments in a sequence versus mass plot. You can download the fragment ion data as a CSV file. -- **Spectra Tab**: This tab presents an input form to add spectra to match fragment ions to. The format for spectra is: {m/z} {intensity}, one per line. There is also an option to adjust the offset and its type (Da or ppm), and the minimum intensity. If spectra data is provided, the tab displays a plot with matching ions marked and a table of the spectra data. This data can also be downloaded as a CSV file. - - **Wiki Tab**: This tab presents a wiki page with general information on peptide fragmentation. -- **Help Tab**: This tab presents a help page on how to use the application (You're currently here). +- **Help Tab**: This tab presents a help page on how to use the application. If you encounter any issues or have suggestions for improvement, please contact pgarrett@scripps.edu. This is a work in progress and your feedback is greatly appreciated! + +## Some Modifications + +https://web.expasy.org/findmod/findmod_masses.html """ \ No newline at end of file