From 6573e172fb03296cf0f5b0089e6f1ecb1b17687b Mon Sep 17 00:00:00 2001 From: blancoapa Date: Fri, 1 Mar 2024 18:22:09 +0100 Subject: [PATCH] Add script to standarize reference data sets, add plotting functionality to reproduce the paper data --- mantainer/standarize_data.py | 62 ++++++++ samples/Beyer2024/create_paper_data.py | 141 +++++++++++++++--- samples/Beyer2024/peptide.py | 2 +- testsuite/data/{ => src}/1beb-10mM-torres.dat | 0 testsuite/data/{ => src}/1f6s-10mM-torres.dat | 0 testsuite/data/{ => src}/Glu-HisMSDE.csv | 0 testsuite/data/{ => src}/Lys-AspMSDE.csv | 0 testsuite/data/{ => src}/data_landsgesell.csv | 0 .../{ => src}/excess_chemical_potential.dat | 0 .../data/{ => src}/histatin5_SoftMatter.txt | 0 10 files changed, 184 insertions(+), 21 deletions(-) create mode 100644 mantainer/standarize_data.py rename testsuite/data/{ => src}/1beb-10mM-torres.dat (100%) rename testsuite/data/{ => src}/1f6s-10mM-torres.dat (100%) rename testsuite/data/{ => src}/Glu-HisMSDE.csv (100%) rename testsuite/data/{ => src}/Lys-AspMSDE.csv (100%) rename testsuite/data/{ => src}/data_landsgesell.csv (100%) rename testsuite/data/{ => src}/excess_chemical_potential.dat (100%) rename testsuite/data/{ => src}/histatin5_SoftMatter.txt (100%) diff --git a/mantainer/standarize_data.py b/mantainer/standarize_data.py new file mode 100644 index 0000000..f448abf --- /dev/null +++ b/mantainer/standarize_data.py @@ -0,0 +1,62 @@ +# Import pyMBE and other libraries +import pyMBE +import numpy as np +import pandas as pd +import argparse + +# Create an instance of pyMBE library +pmb = pyMBE.pymbe_library() + +# Expected inputs +supported_filenames=["Glu-HisMSDE.csv", + "Lys-AspMSDE.csv", + "histatin5_SoftMatter.txt"] + +parser = argparse.ArgumentParser(description='Script to standarize the data from various authors') +parser.add_argument('--src_filename', + type=str, + required= True, + help='File name from testsuite/data/src. Currently supported {supported_filenames}') +args = parser.parse_args() + +# Inputs +filename=args.src_filename + +# Outputs +output_filenames={"Lys-AspMSDE.csv": "Lunkad2021a.csv", + "Glu-HisMSDE.csv": "Lunkad2021b.csv", + "histatin5_SoftMatter.txt": "Blanco2020a.csv"} + +# Sanity checks +if filename not in supported_filenames: + ValueError(f"Filename {filename} not supported, supported files are {supported_filenames}") + +# Extact the data from Ref. +ref_path=pmb.get_resource(f"testsuite/data/src/{filename}") +Refs_lunkad=["Glu-HisMSDE.csv","Lys-AspMSDE.csv"] +Ref_blanco=["histatin5_SoftMatter.txt"] + +if filename in Refs_lunkad: + data=pd.read_csv(ref_path) + Z_ref = 5*-1*data['aaa']+5*data['aab'] + # Error propagation calculation + # 1/4 factor added to correct for bug in the original calculation of the error reported by the authors + Z_ref_err = 5/4*np.sqrt((data['eaa'])**2+(data['eab'])**2) + +elif filename in Ref_blanco: + data=np.loadtxt(ref_path, delimiter=",") + Z_ref=data[:,1] + Z_ref_err=data[:,2] +else: + raise RuntimeError() + +pH_range = np.linspace(2, 12, num=21) + +# Store the data +data=pd.DataFrame({"pH": pH_range, + "charge": Z_ref, + "charge_error": Z_ref_err}) + +data_path=pmb.get_resource(f"testsuite/data") +data.to_csv(f"{data_path}/{output_filenames[filename]}", + index=False) \ No newline at end of file diff --git a/samples/Beyer2024/create_paper_data.py b/samples/Beyer2024/create_paper_data.py index 28b3991..d0085de 100644 --- a/samples/Beyer2024/create_paper_data.py +++ b/samples/Beyer2024/create_paper_data.py @@ -1,42 +1,37 @@ -# Load espresso, sugar and other necessary libraries -import sys +# Import pyMBE and other libraries +import pyMBE +from lib import analysis import os -import inspect -import espressomd import numpy as np import pandas as pd import argparse -from tqdm import tqdm -from espressomd.io.writer import vtf -from espressomd import interactions -from espressomd import electrostatics - -# Import pyMBE -import pyMBE -from lib import analysis -from lib import handy_functions as hf # Create an instance of pyMBE library pmb = pyMBE.pymbe_library() +valid_fig_labels=["6a", "6b", "6c"] + parser = argparse.ArgumentParser(description='Script to create the data from Beyer2024') parser.add_argument('--fig_label', type=str, required= True, - help='Label of the corresponding figure in Beyer2024, currently supported: 6a, 6b, 6c.') + help=f'Label of the corresponding figure in Beyer2024, currently supported: {valid_fig_labels}') parser.add_argument('--mode', type=str, default= "long-run", - help='sets for how long the simulation runs, valid modes are "short-run" and "long-run"') + help='Sets for how long the simulation runs, valid modes are "short-run" and "long-run"') +parser.add_argument('--plot', + type=bool, + default= False, + help='Switch to activate/deactivate to plot the data"') args = parser.parse_args() # Inputs fig_label=args.fig_label mode=args.mode +plot=args.plot # Sanity checks -valid_fig_labels=["6a", "6b", "6c"] - if fig_label not in valid_fig_labels: raise ValueError(f"The figure label {fig_label} is not supported. Supported figure labels are {valid_fig_labels}") @@ -45,7 +40,10 @@ raise ValueError(f"Mode {mode} is not currently supported, valid modes are {valid_modes}") ## Peptide plots (Fig. 6) -if fig_label in ["6a", "6b", "6c"]: + +labels_fig6=["6a", "6b", "6c"] + +if fig_label in labels_fig6: script_path=pmb.get_resource(f"samples/Beyer2024/peptide.py") if fig_label == "6a": sequence="K"*5+"D"*5 @@ -79,8 +77,12 @@ # Get the observables for binning analysis time_series_data=analysis.read_csv_file(path=f"{time_series_folder_path}/{subitem.name}") analyzed_data=analysis.block_analyze(full_data=time_series_data) - file_data=pd.concat([file_data,analyzed_data]) - data = pd.concat([data,file_data]) + data_dict.update(analyzed_data.to_dict()) + data = analysis.add_data_to_df(df=data, + data_dict=data_dict, + index=[len(data)]) + for param in data_dict.keys(): + analyzed_data[param]=data_dict[param] data_path=pmb.get_resource("samples/Beyer2024/")+"data" if not os.path.exists(data_path): @@ -88,7 +90,106 @@ data.to_csv(f"{data_path}/fig{fig_label}.csv") +# Plot the data +if plot: + + # Import matplotlib for plotting + import matplotlib.pyplot as plt + import matplotlib as mpl + + plt.rcParams["font.family"] = "serif" + plt.tight_layout() + mpl.rc('axes', linewidth=1) + mpl.rcParams['lines.markersize'] = 5 + mpl.rcParams['lines.linewidth'] = 1.0 + + width = 10 + height = 10 + plt.rc('font', size=22) + plt.figure(figsize=(width,height), dpi=100) + plt.grid(which='major', + color='#CCCCCC', + linestyle='--', + linewidth=0.6) + + # Set labels for the axes + if fig_label in labels_fig6: + plt.ylabel(r"Net charge $Z/e$") + plt.xlabel(r"pH in the solution") + else: + raise RuntimeError() + # Load pka set + if fig_label in ["6a","6b"]: + pka_path=pmb.get_resource("parameters/pka_sets/CRC1991.txt") + elif fig_label == "6c": + pka_path=pmb.get_resource("parameters/pka_sets/Nozaki1967.txt") + else: + raise RuntimeError() + pmb.load_pka_set (filename=pka_path) + + # Load ref data + if fig_label == "6a": + ref_path=pmb.get_resource("testsuite/data/Lunkad2021a.csv") + elif fig_label == "6b": + ref_path=pmb.get_resource("testsuite/data/Lunkad2021b.csv") + elif fig_label == "6c": + ref_path=pmb.get_resource("testsuite/data/Blanco2020a.csv") + else: + raise RuntimeError() + + ref_data=analysis.read_csv_file(path=ref_path) + + # Calculate Henderson-Hasselbalch (HH + if fig_label in labels_fig6: + pmb.define_peptide (name=sequence, + sequence=sequence, + model="1beadAA") + + pH_range_HH = np.linspace(2, 12, num=1000) + Z_HH = pmb.calculate_HH(object_name=sequence, + pH_list=pH_range_HH) + + # Plot HH + plt.plot(pH_range_HH, + Z_HH, + label=r"HH", + color="black") + + # Plot Ref data + ref_data=ref_data.sort_values(by="pH",ascending=True) + plt.errorbar(ref_data["pH"], + ref_data["charge"], + ref_data["charge_error"], + linestyle="none", + marker="s", + label="Lunkad et al.", + ms=15, + color="C0") + + # Style for Blanco linestyle="none", marker="^", label="Blanco et al.", color="green", markeredgewidth=1.5 + # Plot data produced by pyMBE + + data=data.astype({'pH': 'float'}).sort_values(by="pH") + plt.errorbar(data["pH"], + data["mean","charge"], + yerr=data["err_mean","charge"], + linestyle="none", + marker="o", + label="pyMBE", + color="C1", + fillstyle="none", + ms=15, + markeredgewidth=1.5) + + # Save plot + fig_path=pmb.get_resource("samples/Beyer2024")+"/figs" + if not os.path.exists(fig_path): + os.makedirs(fig_path) + plt.legend() + plt.savefig(f"{fig_path}/{fig_label}.png", + bbox_inches='tight') + plt.close() diff --git a/samples/Beyer2024/peptide.py b/samples/Beyer2024/peptide.py index 73179b0..24b8d91 100644 --- a/samples/Beyer2024/peptide.py +++ b/samples/Beyer2024/peptide.py @@ -205,7 +205,7 @@ time_series=pd.DataFrame(time_series) filename=analysis.built_output_name(input_dict=inputs) -time_series.to_csv(f"{data_path}/{filename}_time_series.csv") +time_series.to_csv(f"{data_path}/{filename}_time_series.csv", index=False) if args.verbose: print("*** DONE ***") diff --git a/testsuite/data/1beb-10mM-torres.dat b/testsuite/data/src/1beb-10mM-torres.dat similarity index 100% rename from testsuite/data/1beb-10mM-torres.dat rename to testsuite/data/src/1beb-10mM-torres.dat diff --git a/testsuite/data/1f6s-10mM-torres.dat b/testsuite/data/src/1f6s-10mM-torres.dat similarity index 100% rename from testsuite/data/1f6s-10mM-torres.dat rename to testsuite/data/src/1f6s-10mM-torres.dat diff --git a/testsuite/data/Glu-HisMSDE.csv b/testsuite/data/src/Glu-HisMSDE.csv similarity index 100% rename from testsuite/data/Glu-HisMSDE.csv rename to testsuite/data/src/Glu-HisMSDE.csv diff --git a/testsuite/data/Lys-AspMSDE.csv b/testsuite/data/src/Lys-AspMSDE.csv similarity index 100% rename from testsuite/data/Lys-AspMSDE.csv rename to testsuite/data/src/Lys-AspMSDE.csv diff --git a/testsuite/data/data_landsgesell.csv b/testsuite/data/src/data_landsgesell.csv similarity index 100% rename from testsuite/data/data_landsgesell.csv rename to testsuite/data/src/data_landsgesell.csv diff --git a/testsuite/data/excess_chemical_potential.dat b/testsuite/data/src/excess_chemical_potential.dat similarity index 100% rename from testsuite/data/excess_chemical_potential.dat rename to testsuite/data/src/excess_chemical_potential.dat diff --git a/testsuite/data/histatin5_SoftMatter.txt b/testsuite/data/src/histatin5_SoftMatter.txt similarity index 100% rename from testsuite/data/histatin5_SoftMatter.txt rename to testsuite/data/src/histatin5_SoftMatter.txt