Skip to content

Commit

Permalink
Add script to standarize reference data sets, add plotting functional…
Browse files Browse the repository at this point in the history
…ity to reproduce the paper data
  • Loading branch information
pm-blanco committed Mar 1, 2024
1 parent d8a67be commit 6573e17
Show file tree
Hide file tree
Showing 10 changed files with 184 additions and 21 deletions.
62 changes: 62 additions & 0 deletions mantainer/standarize_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Import pyMBE and other libraries
import pyMBE
import numpy as np
import pandas as pd
import argparse

# Create an instance of pyMBE library
pmb = pyMBE.pymbe_library()

# Expected inputs
supported_filenames=["Glu-HisMSDE.csv",
"Lys-AspMSDE.csv",
"histatin5_SoftMatter.txt"]

parser = argparse.ArgumentParser(description='Script to standarize the data from various authors')
parser.add_argument('--src_filename',
type=str,
required= True,
help='File name from testsuite/data/src. Currently supported {supported_filenames}')
args = parser.parse_args()

# Inputs
filename=args.src_filename

# Outputs
output_filenames={"Lys-AspMSDE.csv": "Lunkad2021a.csv",
"Glu-HisMSDE.csv": "Lunkad2021b.csv",
"histatin5_SoftMatter.txt": "Blanco2020a.csv"}

# Sanity checks
if filename not in supported_filenames:
ValueError(f"Filename {filename} not supported, supported files are {supported_filenames}")

# Extact the data from Ref.
ref_path=pmb.get_resource(f"testsuite/data/src/{filename}")
Refs_lunkad=["Glu-HisMSDE.csv","Lys-AspMSDE.csv"]
Ref_blanco=["histatin5_SoftMatter.txt"]

if filename in Refs_lunkad:
data=pd.read_csv(ref_path)
Z_ref = 5*-1*data['aaa']+5*data['aab']
# Error propagation calculation
# 1/4 factor added to correct for bug in the original calculation of the error reported by the authors
Z_ref_err = 5/4*np.sqrt((data['eaa'])**2+(data['eab'])**2)

elif filename in Ref_blanco:
data=np.loadtxt(ref_path, delimiter=",")
Z_ref=data[:,1]
Z_ref_err=data[:,2]
else:
raise RuntimeError()

pH_range = np.linspace(2, 12, num=21)

# Store the data
data=pd.DataFrame({"pH": pH_range,
"charge": Z_ref,
"charge_error": Z_ref_err})

data_path=pmb.get_resource(f"testsuite/data")
data.to_csv(f"{data_path}/{output_filenames[filename]}",
index=False)
141 changes: 121 additions & 20 deletions samples/Beyer2024/create_paper_data.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,37 @@
# Load espresso, sugar and other necessary libraries
import sys
# Import pyMBE and other libraries
import pyMBE
from lib import analysis
import os
import inspect
import espressomd
import numpy as np
import pandas as pd
import argparse
from tqdm import tqdm
from espressomd.io.writer import vtf
from espressomd import interactions
from espressomd import electrostatics

# Import pyMBE
import pyMBE
from lib import analysis
from lib import handy_functions as hf

# Create an instance of pyMBE library
pmb = pyMBE.pymbe_library()

valid_fig_labels=["6a", "6b", "6c"]

parser = argparse.ArgumentParser(description='Script to create the data from Beyer2024')
parser.add_argument('--fig_label',
type=str,
required= True,
help='Label of the corresponding figure in Beyer2024, currently supported: 6a, 6b, 6c.')
help=f'Label of the corresponding figure in Beyer2024, currently supported: {valid_fig_labels}')
parser.add_argument('--mode',
type=str,
default= "long-run",
help='sets for how long the simulation runs, valid modes are "short-run" and "long-run"')
help='Sets for how long the simulation runs, valid modes are "short-run" and "long-run"')
parser.add_argument('--plot',
type=bool,
default= False,
help='Switch to activate/deactivate to plot the data"')
args = parser.parse_args()

# Inputs
fig_label=args.fig_label
mode=args.mode
plot=args.plot

# Sanity checks
valid_fig_labels=["6a", "6b", "6c"]

if fig_label not in valid_fig_labels:
raise ValueError(f"The figure label {fig_label} is not supported. Supported figure labels are {valid_fig_labels}")

Expand All @@ -45,7 +40,10 @@
raise ValueError(f"Mode {mode} is not currently supported, valid modes are {valid_modes}")

## Peptide plots (Fig. 6)
if fig_label in ["6a", "6b", "6c"]:

labels_fig6=["6a", "6b", "6c"]

if fig_label in labels_fig6:
script_path=pmb.get_resource(f"samples/Beyer2024/peptide.py")
if fig_label == "6a":
sequence="K"*5+"D"*5
Expand Down Expand Up @@ -79,16 +77,119 @@
# Get the observables for binning analysis
time_series_data=analysis.read_csv_file(path=f"{time_series_folder_path}/{subitem.name}")
analyzed_data=analysis.block_analyze(full_data=time_series_data)
file_data=pd.concat([file_data,analyzed_data])
data = pd.concat([data,file_data])
data_dict.update(analyzed_data.to_dict())
data = analysis.add_data_to_df(df=data,
data_dict=data_dict,
index=[len(data)])
for param in data_dict.keys():
analyzed_data[param]=data_dict[param]

data_path=pmb.get_resource("samples/Beyer2024/")+"data"
if not os.path.exists(data_path):
os.makedirs(data_path)

data.to_csv(f"{data_path}/fig{fig_label}.csv")

# Plot the data
if plot:

# Import matplotlib for plotting
import matplotlib.pyplot as plt
import matplotlib as mpl

plt.rcParams["font.family"] = "serif"
plt.tight_layout()
mpl.rc('axes', linewidth=1)
mpl.rcParams['lines.markersize'] = 5
mpl.rcParams['lines.linewidth'] = 1.0

width = 10
height = 10
plt.rc('font', size=22)
plt.figure(figsize=(width,height), dpi=100)
plt.grid(which='major',
color='#CCCCCC',
linestyle='--',
linewidth=0.6)

# Set labels for the axes
if fig_label in labels_fig6:
plt.ylabel(r"Net charge $Z/e$")
plt.xlabel(r"pH in the solution")
else:
raise RuntimeError()

# Load pka set
if fig_label in ["6a","6b"]:
pka_path=pmb.get_resource("parameters/pka_sets/CRC1991.txt")
elif fig_label == "6c":
pka_path=pmb.get_resource("parameters/pka_sets/Nozaki1967.txt")
else:
raise RuntimeError()
pmb.load_pka_set (filename=pka_path)

# Load ref data
if fig_label == "6a":
ref_path=pmb.get_resource("testsuite/data/Lunkad2021a.csv")
elif fig_label == "6b":
ref_path=pmb.get_resource("testsuite/data/Lunkad2021b.csv")
elif fig_label == "6c":
ref_path=pmb.get_resource("testsuite/data/Blanco2020a.csv")
else:
raise RuntimeError()

ref_data=analysis.read_csv_file(path=ref_path)

# Calculate Henderson-Hasselbalch (HH
if fig_label in labels_fig6:
pmb.define_peptide (name=sequence,
sequence=sequence,
model="1beadAA")

pH_range_HH = np.linspace(2, 12, num=1000)
Z_HH = pmb.calculate_HH(object_name=sequence,
pH_list=pH_range_HH)

# Plot HH
plt.plot(pH_range_HH,
Z_HH,
label=r"HH",
color="black")

# Plot Ref data
ref_data=ref_data.sort_values(by="pH",ascending=True)
plt.errorbar(ref_data["pH"],
ref_data["charge"],
ref_data["charge_error"],
linestyle="none",
marker="s",
label="Lunkad et al.",
ms=15,
color="C0")

# Style for Blanco linestyle="none", marker="^", label="Blanco et al.", color="green", markeredgewidth=1.5
# Plot data produced by pyMBE

data=data.astype({'pH': 'float'}).sort_values(by="pH")
plt.errorbar(data["pH"],
data["mean","charge"],
yerr=data["err_mean","charge"],
linestyle="none",
marker="o",
label="pyMBE",
color="C1",
fillstyle="none",
ms=15,
markeredgewidth=1.5)

# Save plot
fig_path=pmb.get_resource("samples/Beyer2024")+"/figs"
if not os.path.exists(fig_path):
os.makedirs(fig_path)
plt.legend()
plt.savefig(f"{fig_path}/{fig_label}.png",
bbox_inches='tight')
plt.close()



Expand Down
2 changes: 1 addition & 1 deletion samples/Beyer2024/peptide.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@
time_series=pd.DataFrame(time_series)
filename=analysis.built_output_name(input_dict=inputs)

time_series.to_csv(f"{data_path}/{filename}_time_series.csv")
time_series.to_csv(f"{data_path}/{filename}_time_series.csv", index=False)

if args.verbose:
print("*** DONE ***")
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 comments on commit 6573e17

Please sign in to comment.