Skip to content

Commit

Permalink
Add CI test for all peptide cases
Browse files Browse the repository at this point in the history
  • Loading branch information
pm-blanco committed Mar 4, 2024
1 parent 7338fa6 commit ab716fa
Show file tree
Hide file tree
Showing 9 changed files with 141 additions and 271 deletions.
18 changes: 2 additions & 16 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
.PHONY: sample
.PHONY: visual
.PHONY: clean
.PHONY: tests
.PHONY: testsuite
.PHONY: docs

Expand All @@ -12,7 +11,7 @@ docs:
pdoc ./pyMBE.py -o ./docs --docformat google

testsuite:
${ESPResSo_build_path}/pypresso testsuite/LYS_ASP_peptide.py
${ESPResSo_build_path}/pypresso testsuite/peptide_tests.py

sample:
${ESPResSo_build_path}/pypresso sample_scripts/peptide_simulation_example.py
Expand All @@ -22,17 +21,4 @@ visual:
vmd -e visualization.tcl

tutorial:
${ESPResSo_build_path}/ipypresso notebook sugar_tutorial.ipynb

tests_peptide:
${ESPResSo_build_path}/pypresso tests/LYS_ASP_peptide.py
${ESPResSo_build_path}/pypresso tests/GLU_HIS_peptide.py
${ESPResSo_build_path}/pypresso tests/histatin5_peptide.py

tests_globular_protein:
python3 tests/run_test_protein.py --pdb_code 1beb --run_command "${ESPResSo_build_path}/pypresso sample_scripts/globular_protein.py --pdb 1beb --path_to_cg reference_parameters/coarse_grained_structures/1beb.vtf"
python3 tests/run_test_protein.py --pdb_code 1f6s --run_command "${ESPResSo_build_path}/pypresso sample_scripts/globular_protein.py --pdb 1f6s --metal_ion_name Ca --metal_ion_charge 2 --path_to_cg reference_parameters/coarse_grained_structures/1f6s.vtf"

tests:
make tests_peptide
make tests_globular_protein
${ESPResSo_build_path}/ipypresso notebook sugar_tutorial.ipynb
73 changes: 39 additions & 34 deletions lib/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,39 @@ def add_data_to_df(df, data_dict, index):
index=index)])
return updated_df

def analyze_time_series(path_to_datafolder):
"""
Analyzes all time series stored in `path_to_datafolder` using the block binning method.
Args:
path_to_datafolder(`str`): path to the folder with the files with the time series
Returns:
(`obj`): pandas dataframe with
"""
data=pd.DataFrame()
with os.scandir(path_to_datafolder) as subdirectory:
# Gather all data
for subitem in subdirectory:
if subitem.is_file():
if 'time_series' in subitem.name:
# Get parameters from the file name
data_dict=get_params_from_dir_name(subitem.name.replace('_time_series.csv', ''))
# Get the observables for binning analysis
time_series_data=read_csv_file(path=f"{path_to_datafolder}/{subitem.name}")
analyzed_data=block_analyze(full_data=time_series_data)
value_list=[]
index_list=[]
for key in data_dict.keys():
value_list.append(data_dict[key])
index_list.append((key,))
analyzed_data = pd.concat([pd.Series(value_list, index=index_list), analyzed_data])
data = add_data_to_df(df=data,
data_dict=analyzed_data.to_dict(),
index=[len(data)])
return data

def do_binning_analysis(list_time_series_df, frac_data_to_discard=0.3):
"""
Does a binning analysis of all Pandas DataFrame objects in `list_time_series_df`.
Expand Down Expand Up @@ -77,9 +110,6 @@ def merge_time_series_dfs(list_time_series_df,frac_data_to_discard=0,rescale_tim
binning_df["time"]+=index*binning_df.shape[0]*dt
list_time_series_df[index]=binning_df




# Join all the dataframes for binning analysis
gathered_binning_df=pd.concat(list_time_series_df)

Expand Down Expand Up @@ -154,8 +184,6 @@ def split_dataframe(df,n_blocks):


# Blocks of size 2 (s2) = df.shape[0]//n_blocks

n_blocks_s2= n_blocks - n_blocks_s1
block_size_s2=df.shape[0]//n_blocks
blocks+=split_dataframe_in_equal_blocks(df=df,
start_row=n_blocks_s1*block_size_s1,
Expand All @@ -173,40 +201,30 @@ def get_time_series_from_average_df(pd_series, label):
Returns:
(`obj`): PandasDataFrame with the time series and their statistical error
Authors:
- Pablo M. Blanco, Norwegian University of Science and Technology
"""
import numpy as np
dist_dict={}
expected_labels=["mean", "errmean", "nsamples"]

for data_str in pd_series[label]:
clean_data_str=data_str.strip("{").strip("}")
for data_set in clean_data_str.split("]"):
data_list=data_set.split(":")
if len(data_list) == 1:
continue

continue
# Parse data and label
data=list(data_list[1][2:].split(","))
data=np.array([float(x) for x in data])

clean_label=data_list[0].strip("'").strip(",").strip()
label_sts=clean_label.split("_")[-1]
label_qty=clean_label[:-len(label_sts)-1].strip("'")

label_qty=clean_label[:-len(label_sts)-1].strip("'")
if label_sts not in expected_labels:
raise ValueError(f"Error while parsing the df, found label for stats {label_sts}")

if label_qty in dist_dict.keys():
dist_dict[label_qty][label_sts]=data
else:
dist_dict[label_qty]={}
dist_dict[label_qty][label_sts]=data



dist_dict[label_qty][label_sts]=data
return pd.DataFrame.from_dict(dist_dict)


Expand All @@ -224,13 +242,6 @@ def block_analyze(full_data, n_blocks=16, time_col = "time", equil=0.1, columns
Returns:
`result`: pandas dataframe with the mean (mean), statistical error (err_mean), number of effective samples (n_eff) and correlation time (tau_int) of each observable.
Note:
This function should not be modified without consulting first its authors.
Authors:
- Peter Kosovan, Charles University
- Pablo M. Blanco, Norwegian University of Science and Technology
"""

dt = get_dt(full_data) # check that the data was stored with the same time interval dt
Expand All @@ -255,8 +266,6 @@ def block_analyze(full_data, n_blocks=16, time_col = "time", equil=0.1, columns
print(f"n_blocks b = {n_blocks}, block_size k = {block_size}")

# calculate the mean per each block

series_data = pd.Series(data.iat[0, 0], index=data.columns)
blocks = split_dataframe(df=data,
n_blocks=n_blocks)

Expand All @@ -271,9 +280,10 @@ def block_analyze(full_data, n_blocks=16, time_col = "time", equil=0.1, columns
err_mean = np.sqrt(var_blocks/n_blocks) # standard error of the mean by eq.(37) of Janke
tau_int = dt*block_size * var_blocks / var_all /2.# eq.(38) of Janke
n_eff = n_samples / (2*tau_int/dt) # effective number of samples in the whole simulation using eq.(28) of Janke

# concatenate the observables and atribute a key for each (unique index)
result = pd.concat( [ mean, err_mean, n_eff, tau_int], keys= [ "mean", "err_mean", "n_eff", "tau_int" ])
result = pd.concat( [ pd.Series({"n_blocks":n_blocks,"block_size":block_size}), result] )
result = pd.concat( [ mean, err_mean, n_eff, tau_int], keys= [ "mean", "err_mean", "n_eff", "tau_int" ], join="inner")
result = pd.concat( [ pd.Series([n_blocks,block_size], index=[('n_blocks',),('block_size',)]), result])
return result

def get_dt(data):
Expand Down Expand Up @@ -373,7 +383,6 @@ def create_histogram_df_from_distribution_list(distribution_list, start, end, nb
cnt+=5000
return pd.DataFrame.from_dict(dict_hist)


def find_index_with_value_in_df(df,column_name, value, tol=0.01):
"""
Finds the index in the pandas DataFrame `df` with a column `column_name` and a row `value`.
Expand All @@ -386,7 +395,6 @@ def find_index_with_value_in_df(df,column_name, value, tol=0.01):
Returns:
index (int): Index found.
"""
index = np.where(abs(df[column_name]-value)/value < tol)
return index[0]
Expand All @@ -403,9 +411,6 @@ def built_output_name(input_dict):
Note:
The standard formatting rule is parametername1-parametervalue1_parametername2-parametervalue2
Authors:
- Pablo M. Blanco, Norwegian University of Science and Technology (NTNU)
"""
output_name=""
for label in input_dict:
Expand Down
28 changes: 3 additions & 25 deletions samples/Beyer2024/create_paper_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
raise ValueError(f"Mode {mode} is not currently supported, valid modes are {valid_modes}")

## Peptide plots (Fig. 6)

labels_fig6=["6a", "6b", "6c"]

if fig_label in labels_fig6:
Expand All @@ -54,40 +53,19 @@
else:
raise RuntimeError()
pH_range = np.linspace(2, 12, num=21)

for pH in pH_range:
run_command=f"python3 {script_path} --sequence {sequence} --pH {pH} --mode {mode}"
print(run_command)
os.system(run_command)

# Read all files in the subdir
data_files=[]
# Analyze all time series
time_series_folder_path=pmb.get_resource(f"samples/Beyer2024/time_series")
data=analysis.analyze_time_series(path_to_datafolder=time_series_folder_path)

data=pd.DataFrame()

with os.scandir(time_series_folder_path) as subdirectory:
# Gather all data
for subitem in subdirectory:
if subitem.is_file():
if 'time_series' in subitem.name:
# Get parameters from the file name
data_dict=analysis.get_params_from_dir_name(subitem.name.replace('_time_series.csv', ''))
file_data=pd.DataFrame(data_dict, index=[0])
# Get the observables for binning analysis
time_series_data=analysis.read_csv_file(path=f"{time_series_folder_path}/{subitem.name}")
analyzed_data=analysis.block_analyze(full_data=time_series_data)
data_dict.update(analyzed_data.to_dict())
data = analysis.add_data_to_df(df=data,
data_dict=data_dict,
index=[len(data)])
for param in data_dict.keys():
analyzed_data[param]=data_dict[param]

# Store mean values and other statistics
data_path=pmb.get_resource("samples/Beyer2024/")+"data"
if not os.path.exists(data_path):
os.makedirs(data_path)

data.to_csv(f"{data_path}/fig{fig_label}.csv")

# Plot the data
Expand Down
9 changes: 6 additions & 3 deletions samples/Beyer2024/peptide.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
# Create an instance of pyMBE library
pmb = pyMBE.pymbe_library()

valid_modes=["short-run","long-run", "test"]
parser = argparse.ArgumentParser(description='Script to run the peptide test cases for pyMBE')
parser.add_argument('--sequence',
type=str,
Expand All @@ -35,7 +36,7 @@
parser.add_argument('--mode',
type=str,
default= "short-run",
help='sets for how long the simulation runs, valid modes are "short-run" and "long-run"')
help='sets for how long the simulation runs, valid modes are {valid_modes}')
args = parser.parse_args()

# Inputs
Expand All @@ -46,7 +47,6 @@

mode=args.mode

valid_modes=["short-run","long-run"]
if mode not in valid_modes:
raise ValueError(f"Mode {mode} is not currently supported, valid modes are {valid_modes}")

Expand All @@ -55,9 +55,12 @@
Nsamples = 1000
MD_steps_per_sample = 1000

if mode == "long-run":
elif mode == "long-run":
Nsamples = 5000
MD_steps_per_sample = 5000
elif mode == "test":
Nsamples = 500
MD_steps_per_sample = 700

SEED = 100
dt = 0.01
Expand Down
Loading

0 comments on commit ab716fa

Please sign in to comment.