diff --git a/.github/workflows/testsuite.yml b/.github/workflows/testsuite.yml index 0eef7f8..d6abf51 100644 --- a/.github/workflows/testsuite.yml +++ b/.github/workflows/testsuite.yml @@ -29,6 +29,6 @@ jobs: run: | module load ESPResSo/4.2.1-foss-2023a source pymbe/bin/activate - make testsuite + make tests deactivate shell: bash diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c417682..7a75235 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -10,7 +10,7 @@ To contribute to the pyMBE library, first one needs to be added as a member of t If you want to contribute to the development of the pyMBE library, please contact Dr. Pablo M. Blanco (pablb@ntnu.no) ## Rules to contribute to pyMBE -Any new version of the code must reproduce all the data stored in `reference_data/`. -The scripts provided in `tests/` can be used to quickly set-up simulations with pyMBE to reproduce such data sets. +Any new version of the code must reproduce all the data stored in `testsuite/data`. +The scripts provided in `testsuite/` and in `samples/Beyer2024` can be used to quickly set-up simulations with pyMBE to reproduce such data sets. All new code will be reviewed by at least one member of the pyMBE development team before being merged into the stable version of the library to ensure that a functional version of the code is always available. Class methods are sorted in alphabetical order. diff --git a/Makefile b/Makefile index a33ea06..8cec49e 100644 --- a/Makefile +++ b/Makefile @@ -2,15 +2,13 @@ .PHONY: sample .PHONY: visual .PHONY: clean -.PHONY: tests -.PHONY: testsuite .PHONY: docs docs: pdoc ./pyMBE.py -o ./docs --docformat google -testsuite: - python3 testsuite/LYS_ASP_peptide.py +tests: + python3 testsuite/peptide_tests.py sample: python3 sample_scripts/peptide_simulation_example.py @@ -19,15 +17,5 @@ visual: python3 handy_scripts/vmd-traj.py vmd -e visualization.tcl -tests_peptide: - python3 tests/LYS_ASP_peptide.py - python3 tests/GLU_HIS_peptide.py - python3 tests/histatin5_peptide.py - -tests_globular_protein: - python3 tests/run_test_protein.py --pdb_code 1beb --run_command "python3 sample_scripts/globular_protein.py --pdb 1beb --path_to_cg reference_parameters/coarse_grained_structures/1beb.vtf" - python3 tests/run_test_protein.py --pdb_code 1f6s --run_command "python3 sample_scripts/globular_protein.py --pdb 1f6s --metal_ion_name Ca --metal_ion_charge 2 --path_to_cg reference_parameters/coarse_grained_structures/1f6s.vtf" - -tests: - make tests_peptide - make tests_globular_protein +tutorial: + jupyter-lab tutorials/pyMBE_tutorial.ipynb diff --git a/README.md b/README.md index 395b0bd..3a411e1 100644 --- a/README.md +++ b/README.md @@ -20,13 +20,11 @@ A deprecated version of pyMBE compatible with ESPResSo v4.1.4 (under the histori - `docs/`: folder with the API documentation of pyMBE. - `figs/`: folder with various images used in the tutorials of pyMBE. -- `handy_scripts/`: folder with various handy scripts and libraries. +- `libs/`: folder with various libraries. - `logo/`: folder with the logo of pyMBE. -- `mantainer/`: folder with various scripts used by the mantainers. -- `reference_data/`: folder with various reference data set used to validate pyMBE. -- `reference_parameters/`: folder with various sets of parameters from previous works. -- `sample_scripts/`: folder with various sample scripts showcasing how to use pyMBE to setup different systems. -- `tests/`: folder with several test scripts to check that new developments do not break pyMBE. +- `maintainer/`: folder with various scripts used by the maintainers. +- `parameters/`: folder with various sets of parameters from previous works. +- `samples/`: folder with various sample scripts showcasing how to use pyMBE to setup different systems. - `testsuite/`: folder with several test scripts and data for continous integration of the library. - `tutorials/`: folder with the available tutorials on pyMBE. - `visualization/`: folder with helper scripts to aid the visualization of vtf trajectories from constant pH and Grand reaction simulations with [VMD](https://www.ks.uiuc.edu/Research/vmd/). @@ -34,6 +32,7 @@ A deprecated version of pyMBE compatible with ESPResSo v4.1.4 (under the histori - `CONTRIBUTING`: rules on how to contribute to pyMBE. - `LICENSE.md`: license of pyMBE. - `pyMBE.py`: source code of pyMBE +- `requirements.txt`: list of required libraries to use pyMBE. ## Usage diff --git a/handy_scripts/data_analysis.py b/handy_scripts/data_analysis.py deleted file mode 100644 index 7f5b9ad..0000000 --- a/handy_scripts/data_analysis.py +++ /dev/null @@ -1,177 +0,0 @@ -import numpy as np -import pandas as pd -import matplotlib.pyplot as plt -import os -from pathlib import Path -import argparse -import sys - -parser = argparse.ArgumentParser(description='Data analysis') -parser.add_argument('folder', type=str, help='Folder to read the files', default=sys.path[0]) -parser.add_argument('--plot',help='Folder to read the files',default=False,action='store_true') -parser.add_argument('--columns_to_plot', type=str, help='Folder to read the files', default=None) - -args = parser.parse_args () - -folder = args.folder -plot = args.plot -columns_to_plot = args.columns_to_plot - - -def plot_time_trace(full_data,filename, columns="all", mean_values = None, save_pdf = False): - if(columns == "all"): - columns = full_data.columns.to_list() - for x in ['time', 'step', 'sweep']: - if(x in columns): - columns.remove(x) - if(save_pdf): - directory = "analysis_outputs" - if(os.path.isdir(directory)==False): - os.mkdir(directory) - n_cols = len(columns) - if 'time' in full_data.columns.to_list(): - x_data = full_data['time'] - elif 'MC sweep' in full_data.columns.to_list(): - x_data = full_data['MC sweep'] - else: - raise ValueError("columns: "+str( full_data.columns.to_list() ) ) - for col in columns: - fig = plt.figure(figsize = (20,7)) - plt.plot( - x_data, - full_data[col], - linewidth = 1, - marker = 'o', - markersize = 3, - alpha = 0.8, - color = 'blue', - label = col - ) - plt.xlabel('time [internal units]') - plt.ylabel(col) - plt.title(filename) - if(save_pdf): - pdf_name = filename.name.replace("observables.dat", "_"+col+".pdf").replace("csv","pdf") - plt.savefig(os.path.join(directory,pdf_name)) - plt.show() - -def get_params_from_file_name(file_name): - system_name = file_name.parts[-1].replace('_observables.dat', '') - #print(system_name) - entries = system_name.split('_') - #print(entries) - params = {} - for entry in entries: - sp_entry = entry.split('-') - if(sp_entry[0]=='N'): - params[sp_entry[0]] = int(sp_entry[-1]) - else: - params[sp_entry[0]] = sp_entry[-1] - return params - -def rename_columns(data_frame, suffix, skip = ['time','step', "MC sweep"]): - new_index = [] - for index in data_frame.index: - if(index in skip): - new_index.append(index) - else: - new_index.append(index+suffix) - data_frame.index = new_index - return data_frame - -def get_dt(data): - if 'time' in data: - time = data['time'] - elif 'MC sweep' in data: - time = data['MC sweep'] - else: - raise ValueError("neither 'time' nor 'MC sweep' column found in data, got " + str(data)) - imax = data.shape[0] - dt_init = time[1] - time[0] - warn_lines = []; - for i in range(1,imax): - dt = time[i] - time[i-1] - if(np.abs((dt_init - dt)/dt) > 0.01 ): - warn_lines.append(f"Row {i} dt = {dt} = {time[i]} - {time[i-1]} not equal to dt_init = {dt_init}") - if(len(warn_lines) > 20): - print("\n") - for line in warn_lines: - print(line) - return dt - -def block_analyze(full_data,filename, n_blocks=16, equil=0.1): - params = get_params_from_file_name(filename) - dt = get_dt(full_data) # check that the data was stored with the same time interval dt - drop_rows = int(full_data.shape[0]*equil) # calculate how many rows should be dropped as equilibration - # drop the rows that will be discarded as equlibration - data = full_data.drop(range(0,drop_rows)) - # drop the columns step, time and MC sweep - for column_name in ['time', 'step', 'MC sweep']: - if(column_name in data.columns): - data = data.drop(columns=column_name) - # first, do the required operations on the remaining data - n_samples = data.shape[0] # number of samples to be analyzed - block_size = n_samples/n_blocks # mean block size - mean = data.mean() # calculate the mean values of each column - var_all = data.var() # calculate the variance of each column - params['Blocks'] = n_blocks - params['B_size'] = block_size - print("b:", n_blocks, "k:", block_size) - - # calculate the mean per each block - blocks = np.array_split(data,n_blocks) # split the data array into blocks of (almost) equal size - block_means = [] # empty list that we will use to store the means per each block - for block in blocks: - block_mean = block.mean() # mean values of each column within a given block - block_means.append(block_mean) - block_means = pd.concat(block_means, axis=1).transpose() - - # perform calculations using averages or individual data blocks - var_mean = block_means.var() # variance of the block averages = variance of the mean - err_mean = np.sqrt(var_mean) # standard error of the mean - n_eff = n_blocks*var_all/var_mean # effective number of samples in the whole simulation using eq.(28) and eq.(38) from Janke - tau_int = dt*n_samples/(2*n_eff) # autocorrelation time using eq.(28) from Janke - - # modify the column names of the temporary results - err_mean = rename_columns(err_mean,"Err") - n_eff = rename_columns(n_eff,"Nef") - tau_int = rename_columns(tau_int,"Tau") - # first, concatenate the observables and alphabetically sort them by column names - result = pd.concat( [ mean, err_mean, n_eff, tau_int ] ).sort_index(axis=0) - # next, concatenate the results with simulation parameters to produce a human-readable output - result = pd.concat( [ pd.Series(params), result] ) - return result - -######################## -# Key parameters - -n_blocks = [2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048] # uncomment this line if you want to see how n_blocks affects the result -n_blocks = [16] # default number of blocks in the block analysis -equil = 0.1 # fraction of of all data that should be discarded as equilibration - -filenames = list(Path(folder).glob('*.csv')) # file names to be analyzed - -columns_to_plot = [columns_to_plot] - -results = [] # empty list that we will use to store the results -print("filenames: ", filenames) - -for n_b in n_blocks: - for filename in filenames: - print("filename: ", filename) - full_data = pd.read_csv(filename) # read the full trajectory as pandas dataframe - - if(plot): - plot_time_trace(full_data,filename, columns=columns_to_plot, save_pdf = True) - tmp_result = block_analyze( - full_data = full_data, - filename = filename, - n_blocks = n_b, - equil = equil, - ) - results.append(tmp_result) - -results = pd.concat(results, axis=1).transpose() # re-shape the matrix with data to prepare it for writing to the csv file -print("final_results:\n", results) -results.to_csv(open("analyzed_observables.csv","w")) # save the results csv file that can be imported to spreadsheet calculators -print("###\nFinished\n###") \ No newline at end of file diff --git a/lib/analysis.py b/lib/analysis.py new file mode 100644 index 0000000..1b69a25 --- /dev/null +++ b/lib/analysis.py @@ -0,0 +1,424 @@ +import os +import pandas as pd +import numpy as np + +def add_data_to_df(df, data_dict, index): + """ + Adds the data in data_dict in the df pandas dataframe + + Args: + df(`obj`): pandas dataframe + data_dict(`dict`): dictionary with the data to be added + index(`lst`): index of the df where the data should be added + Returns: + updated_df(`obj`): pandas dataframe updated with data_dict + + """ + if df.empty: + # if the dataframe is empty, load the first set of data + updated_df = pd.DataFrame(data_dict, + index=index) + else: + # if the dataframe has data, concatenate the new data + updated_df = pd.concat([df, pd.DataFrame(data_dict, + index=index)]) + return updated_df + +def analyze_time_series(path_to_datafolder): + """ + Analyzes all time series stored in `path_to_datafolder` using the block binning method. + + Args: + path_to_datafolder(`str`): path to the folder with the files with the time series + + Returns: + (`obj`): pandas dataframe with + + """ + data=pd.DataFrame() + with os.scandir(path_to_datafolder) as subdirectory: + # Gather all data + for subitem in subdirectory: + if subitem.is_file(): + if 'time_series' in subitem.name: + # Get parameters from the file name + data_dict=get_params_from_dir_name(subitem.name.replace('_time_series.csv', '')) + # Get the observables for binning analysis + time_series_data=read_csv_file(path=f"{path_to_datafolder}/{subitem.name}") + analyzed_data=block_analyze(full_data=time_series_data) + value_list=[] + index_list=[] + for key in data_dict.keys(): + value_list.append(data_dict[key]) + index_list.append((key,"value")) + analyzed_data = pd.concat([pd.Series(value_list, index=index_list), analyzed_data]) + data = add_data_to_df(df=data, + data_dict=analyzed_data.to_dict(), + index=[len(data)]) + return data + +def do_binning_analysis(list_time_series_df, frac_data_to_discard=0.3): + """ + Does a binning analysis of all Pandas DataFrame objects in `list_time_series_df`. + + Args: + list_time_series_df(`lst`): List of Pandas DataFrame objects + frac_data_to_discard(`float`, optional): Fraction of data to be discarted in each Pandas DataFrame. Defaults to 0.3. + + Returns: + mean_series(`obj`): PandasSeries object with the mean, error of the mean, number of effective samples and correlation time. + + Note: + - All Pandas DataFrame objects in `list_time_series_df` are merged into a single Pandas DataFrame object for the binning analysis. + - All Pandas DataFrame objects in `list_time_series_df` are assumed to have the same observables. + - The fraction of data to be discarted as equilibration is discarted in each of the Pandas DataFrame objects. + """ + # Estimate the time interval in which the data was stored + gathered_binning_df=merge_time_series_dfs(list_time_series_df=list_time_series_df, + frac_data_to_discard=frac_data_to_discard) + # Clean up the dataframe + if "Unnamed: 0" in gathered_binning_df.columns: + gathered_binning_df=gathered_binning_df.drop(['Unnamed: 0'], axis=1) + # Do the binning analysis + mean_series=block_analyze(gathered_binning_df) + return mean_series + +def merge_time_series_dfs(list_time_series_df,frac_data_to_discard=0,rescale_time=False): + """ + Merges all Pandas DataFrame objects in `list_time_series_df` and rescales the time. + + Args: + list_time_series_df(`lst`): List of Pandas DataFrame objects + frac_data_to_discard(`float`, optional): Fraction of data to be discarted in each Pandas DataFrame. Defaults to 0. + + Returns: + gathered_binning_df(`obj`): DataFrame object with all data. + + Note: + - All Pandas DataFrame objects in `list_time_series_df` are assumed to have the same observables. + - The fraction of data to be discarted as equilibration is discarted in each of the Pandas DataFrame objects. + """ + # Estimate the time interval in which the data was stored + dt = get_dt(list_time_series_df[0]) + for index in range(len(list_time_series_df)): + binning_df=list_time_series_df[index] + # Drop the data corresponding to equilibration + drop_rows = int(binning_df.shape[0]*frac_data_to_discard) + binning_df = binning_df.drop(range(0,drop_rows)) + if rescale_time: + # Make a continous time evolution + binning_df["time"]+=index*binning_df.shape[0]*dt + list_time_series_df[index]=binning_df + + # Join all the dataframes for binning analysis + gathered_binning_df=pd.concat(list_time_series_df) + + if 'Unnamed: 0' in gathered_binning_df.columns: + # Clean up the dataframe + gathered_binning_df=gathered_binning_df.drop(['Unnamed: 0'], axis=1) + + # Clean up the dataframe + gathered_binning_df.reset_index(drop=True, inplace=True) + return gathered_binning_df + + +def get_params_from_dir_name(name): + """ + Gets the parameters from name assuming a structure + name=obsname1-value1_obsname2-value2... + + Args: + name(`str`): name of the directory + + Returns: + params(`dict`): dictionary with the labels and values of the parameters + """ + entries = name.split('_') + params = {} + for entry in entries: + sp_entry = entry.split('-') + params[sp_entry[0]] = sp_entry[-1] #float(sp_entry[-1]) # creates a dictionary of parameters and their values. + return params + + +def split_dataframe_in_equal_blocks(df, start_row, end_row, block_size): + """ + Splits a Pandas dataframe in equally spaced blocks. + + Args: + - df (`obj`): PandasDataframe + - start_row (`int`): index of the first row + - end_row (`int`): index of the last row + - block_size (`int`): number of rows per block + + Returns: + - (`list`): array of PandasDataframe of equal size + """ + return [df[row:row+block_size] for row in range(start_row,end_row,block_size)] + +def split_dataframe(df,n_blocks): + """ + Splits a Pandas Dataframe in n_blocks of approximately the same size. + + Args: + - df (`obj`): PandasDataframe + - n_blocks (`int`): Number of blocks + + Returns: + - (`list`): array of PandasDataframe + + Notes: + - For a `df` of length `l` that should be split into n_blocks, it returns l % n_blocks sub-arrays of size l//n_blocks + 1 and the rest of size l//n_blocks. + - The behaviour of this function is the same as numpy.array_split for backwards compatibility, see [docs](https://numpy.org/doc/stable/reference/generated/numpy.array_split.html) + + """ + + # Blocks of size 1 (s1) = df.shape[0]//n_blocks+1 + + n_blocks_s1=df.shape[0] % n_blocks + block_size_s1=df.shape[0]//n_blocks+1 + blocks=split_dataframe_in_equal_blocks(df=df, + start_row=0, + end_row=n_blocks_s1*block_size_s1, + block_size=block_size_s1) + + + # Blocks of size 2 (s2) = df.shape[0]//n_blocks + block_size_s2=df.shape[0]//n_blocks + blocks+=split_dataframe_in_equal_blocks(df=df, + start_row=n_blocks_s1*block_size_s1, + end_row=df.shape[0], + block_size=block_size_s2) + return blocks + +def get_time_series_from_average_df(pd_series, label): + """ + Gets the time series from a PandasSeries object + + Args: + pd_series (`obj`): PandasSeries object + label (`str`): column name where the time series are stored + + Returns: + (`obj`): PandasDataFrame with the time series and their statistical error + """ + import numpy as np + dist_dict={} + expected_labels=["mean", "errmean", "nsamples"] + for data_str in pd_series[label]: + clean_data_str=data_str.strip("{").strip("}") + for data_set in clean_data_str.split("]"): + data_list=data_set.split(":") + if len(data_list) == 1: + continue + # Parse data and label + data=list(data_list[1][2:].split(",")) + data=np.array([float(x) for x in data]) + clean_label=data_list[0].strip("'").strip(",").strip() + label_sts=clean_label.split("_")[-1] + label_qty=clean_label[:-len(label_sts)-1].strip("'") + if label_sts not in expected_labels: + raise ValueError(f"Error while parsing the df, found label for stats {label_sts}") + + if label_qty in dist_dict.keys(): + dist_dict[label_qty][label_sts]=data + else: + dist_dict[label_qty]={} + dist_dict[label_qty][label_sts]=data + return pd.DataFrame.from_dict(dist_dict) + + +def block_analyze(full_data, n_blocks=16, time_col = "time", equil=0.1, columns_to_analyze = "all", verbose = False): + """ + Analyzes the data in `full_data` using a binning procedure. + + Args: + - full_data(`obj`): pandas dataframe with the observables time series + - n_blocks(`int`): number of blocks used in the binning procedure. + - time_col(`str`): column name where the time is stored in `full_data`. Defaults to `time`. + - equil(`float`,opt): fraction of the data discarded as part of the equilibration. Defaults to 0.1. + - columns_to_analyze(`list`): array of column names to be analyzed. Defaults to "all". + - verbose(`bool`): switch to activate/deactivate printing the block size. Defaults to False. + + Returns: + `result`: pandas dataframe with the mean (mean), statistical error (err_mean), number of effective samples (n_eff) and correlation time (tau_int) of each observable. + """ + + dt = get_dt(full_data) # check that the data was stored with the same time interval dt + drop_rows = int(full_data.shape[0]*equil) # calculate how many rows should be dropped as equilibration + # drop the rows that will be discarded as equlibration + data = full_data.drop(range(0,drop_rows)) + # drop the columns step, time and MC sweep + if time_col in data.columns : + data = data.drop(columns = time_col) + else: + raise ValueError(f"could not find the time column {time_col} in the data") + if columns_to_analyze != "all": + for column_name in data.columns: + if column_name not in columns_to_analyze: + data = data.drop(columns=column_name) + # first, do the required operations on the remaining data + n_samples = data.shape[0] # number of samples to be analyzed + block_size = n_samples/n_blocks # mean block size + mean = data.mean() # calculate the mean values of each column + var_all = data.var() # calculate the variance of each column + if verbose: + print(f"n_blocks b = {n_blocks}, block_size k = {block_size}") + + # calculate the mean per each block + blocks = split_dataframe(df=data, + n_blocks=n_blocks) + + block_means = [] # empty list that we will use to store the means per each block + for block in blocks: + block_mean = block.mean() # mean values of each column within a given block + block_means.append(block_mean) + block_means = pd.concat(block_means, axis=1).transpose() + + # perform calculations using averages or individual data blocks + var_blocks = (n_blocks)/(n_blocks-1)*block_means.var() # variance of the block averages = variance of the mean + err_mean = np.sqrt(var_blocks/n_blocks) # standard error of the mean by eq.(37) of Janke + tau_int = dt*block_size * var_blocks / var_all /2.# eq.(38) of Janke + n_eff = n_samples / (2*tau_int/dt) # effective number of samples in the whole simulation using eq.(28) of Janke + + # concatenate the observables and atribute a key for each (unique index) + result = pd.concat( [ mean, err_mean, n_eff, tau_int], keys= [ "mean", "err_mean", "n_eff", "tau_int" ], join="inner") + result = pd.concat( [ pd.Series([n_blocks,block_size], index=[('n_blocks',),('block_size',)]), result]) + return result + +def get_dt(data): + """ + Sorts data to calculate the time step of the simulation. + + Args: + - data (`obj`): pandas dataframe with the observables time series + + Returns: + dt (`float`): simulation time step. + """ + if 'time' in data: + time = data['time'] + elif 'MC sweep' in data: + time = data['MC sweep'] + else: + raise ValueError("neither 'time' nor 'MC sweep' column found in data, got " + str(data)) + imax = data.shape[0] + dt_init = time[1] - time[0] + warn_lines = [] + for i in range(1,imax): + dt = time[i] - time[i-1] + if(np.abs((dt_init - dt)/dt) > 0.01 ): + warn_lines.append("Row {} dt = {} = {} - {} not equal to dt_init = {}") + if(len(warn_lines) > 20): + print("\n") + for line in warn_lines: + print(line) + return dt + +def read_csv_file(path): + """ + Reads the csv file in path. + + Args: + - path (`str`): path to the csv file + + Returns: + - `obj`: pandas dataframe with the information stored in the csv file + + """ + if os.path.exists(path): + return pd.read_csv(filepath_or_buffer=path) + else: + return None + +def get_distribution_from_df(df, key): + """ + Gets a list stored in a pandas dataframe `df`. + + Args: + - df (`obj`): pandas dataframe + - key (`str`): column name where the list is stored + + Returns: + distribution_list (`lst`): list stored under `key` + + """ + import pandas as pd + distribution_list=[] + for row in df[key]: + if pd.isnull(row): + continue + clean_row=row.strip("[").strip("]") + row_list=[] + for element in clean_row.split(): + row_list.append(float(element)) + distribution_list.append(row_list) + return distribution_list + +def create_histogram_df_from_distribution_list(distribution_list, start, end, nbins): + """ + Does a histogram from the data stored in `distribution_list` and stores it into a pandas dataframe. + + Args: + start (`float`): start point of the histogram + end (`float`): end point of the histogram + nbins (`int`): number of bins in the histogram + + Returns: + `obj`: Pandas dataframe with the histogram counts + """ + + counts, x_bins = np.histogram(distribution_list[0], bins=nbins, range=(start,end)) + dict_hist={} + for index in range(len(counts)): + dict_hist[x_bins[index]]=[] + dict_hist["time"]=[] + cnt=1 + for data_set in distribution_list: + counts, _ = np.histogram(data_set, bins=nbins, range=(start,end)) + norm=sum(counts) + for index in range(len(counts)): + dict_hist[x_bins[index]].append(counts[index]/norm) + dict_hist["time"].append(cnt) + cnt+=5000 + return pd.DataFrame.from_dict(dict_hist) + +def find_index_with_value_in_df(df,column_name, value, tol=0.01): + """ + Finds the index in the pandas DataFrame `df` with a column `column_name` and a row `value`. + + Args: + df (`obj`): Pandas DataFrame. + column_name (`str`): Label of the column. + value (`any`): Value to be found in `df[column_name]`. + tol (`float`): Tolerance in value. + + Returns: + index (int): Index found. + """ + index = np.where(abs(df[column_name]-value)/value < tol) + return index[0] + +def built_output_name(input_dict): + """ + Builts the output name for a given set of input parameters. + + Args: + input_dict (`dict`): dictionary with all terminal inputs. + + Returns: + output_name (`str`): name used for the output files + + Note: + The standard formatting rule is parametername1-parametervalue1_parametername2-parametervalue2 + """ + output_name="" + for label in input_dict: + if type(input_dict[label]) in [str,bool]: + formatted_variable=f"{input_dict[label]:}" + else: + formatted_variable=f"{input_dict[label]:.3g}" + output_name+=f"{label}-{formatted_variable}_" + return output_name[:-1] + + diff --git a/handy_scripts/create_cg_from_pdb.py b/lib/create_cg_from_pdb.py similarity index 100% rename from handy_scripts/create_cg_from_pdb.py rename to lib/create_cg_from_pdb.py diff --git a/handy_scripts/handy_functions.py b/lib/handy_functions.py similarity index 60% rename from handy_scripts/handy_functions.py rename to lib/handy_functions.py index a1e4db2..7f5f178 100644 --- a/handy_scripts/handy_functions.py +++ b/lib/handy_functions.py @@ -1,7 +1,7 @@ -def setup_electrostatic_interactions (units, espresso_system, kT, c_salt=None, solvent_permittivity=78.5, method='p3m', tune_p3m=True, accuracy=1e-3): +def setup_electrostatic_interactions (units, espresso_system, kT, c_salt=None, solvent_permittivity=78.5, method='p3m', tune_p3m=True, accuracy=1e-3,verbose=True): """ - Setups electrostatic interactions in espressomd. + Sets up electrostatic interactions in espressomd. Inputs: system: instance of espressmd system class c_salt: Added salt concentration. If provided, the program outputs the debye screening length. It is a mandatory parameter for the Debye-Huckel method. @@ -9,6 +9,7 @@ def setup_electrostatic_interactions (units, espresso_system, kT, c_salt=None, s method: method prefered for computing the electrostatic interactions. Currently only P3M (label = p3m) and Debye-Huckel (label = DH) are implemented tune_p3m: If true (default), tunes the p3m parameters to improve efficiency accuracy: desired accuracy for electrostatics, by default 1e-3 + verbose (`bool`): switch to activate/deactivate verbose. Defaults to True. """ import espressomd.electrostatics import numpy as np @@ -29,8 +30,8 @@ def setup_electrostatic_interactions (units, espresso_system, kT, c_salt=None, s N_A=6.02214076e23 / units.mol BJERRUM_LENGTH = e.to('reduced_charge')**2 / (4 * units.pi * units.eps0 * solvent_permittivity * kT.to('reduced_energy')) - - print('\n Bjerrum length ', BJERRUM_LENGTH.to('nm'), '=', BJERRUM_LENGTH.to('reduced_length')) + if verbose: + print(f"\n Bjerrum length {BJERRUM_LENGTH.to('nm')} = {BJERRUM_LENGTH.to('reduced_length')}") COULOMB_PREFACTOR=BJERRUM_LENGTH.to('reduced_length') * kT.to('reduced_energy') @@ -48,13 +49,16 @@ def setup_electrostatic_interactions (units, espresso_system, kT, c_salt=None, s raise ValueError('Unknown units for c_salt, please provided it in [mol / volume] or [particle / volume]', c_salt) - - print('Debye kappa ', KAPPA.to('nm'), '=', KAPPA.to('reduced_length'), ) + if verbose: + print(f"Debye kappa {KAPPA.to('nm')} = {KAPPA.to('reduced_length')}") print() if method == 'p3m': - coulomb = espressomd.electrostatics.P3M(prefactor = COULOMB_PREFACTOR.magnitude, accuracy=accuracy) + coulomb = espressomd.electrostatics.P3M(prefactor = COULOMB_PREFACTOR.magnitude, + accuracy=accuracy, + verbose=verbose, + tune=tune_p3m) if tune_p3m: espresso_system.time_step=0.01 @@ -82,7 +86,8 @@ def setup_electrostatic_interactions (units, espresso_system, kT, c_salt=None, s espresso_system.actors.add(coulomb) - print("\n Electrostatics successfully added to the system \n") + if verbose: + print("\n Electrostatics successfully added to the system \n") return @@ -97,12 +102,10 @@ def minimize_espresso_system_energy(espresso_system, skin=1, gamma=1, Nsteps=100 Nsteps: total number of steps of the minimization (Default=10000) time_step: Time step used for the energy minimization (Default=1e-2) max_displacement: maximum particle displacement allowed (Default=0.1 reduced length) + verbose (`bool`): switch to activate/deactivate verbose. Defaults to True. """ - if verbose: - - print("\n*** Minimazing system energy... ***\n") - + print("\n*** Minimizing system energy... ***\n") espresso_system.cell_system.skin = skin espresso_system.time_step=time_step if verbose: @@ -114,13 +117,11 @@ def minimize_espresso_system_energy(espresso_system, skin=1, gamma=1, Nsteps=100 espresso_system.integrator.set_vv() # to switch back to velocity Verlet espresso_system.integrator.run(int(Nsteps/2)) espresso_system.thermostat.turn_off() - # Reset the time of the system to 0 if reset: espresso_system.time = 0. if verbose: print("\n Minimization finished \n") - return def setup_langevin_dynamics(espresso_system, kT, SEED,time_step=1e-2, gamma=1, tune_skin=True, min_skin=1, max_skin=None, tolerance=1e-3, int_steps=200, adjust_max_skin=True): @@ -160,110 +161,6 @@ def create_random_seed(): print('\n The chosen seed for the random number generator is ', SEED) return SEED -def block_analyze(input_data, n_blocks=16): - ''' - Performs a binning analysis of input_data. - Divides the samples in ``n_blocks`` equispaced blocks - and returns the mean, its uncertainty, the correlation time - and the block size - ''' - # NOTE: Depracted function, check soft_matter_wiki - import numpy as np - data = np.asarray(input_data) - block = 0 - # this number of blocks is recommended by Janke as a reasonable compromise - # between the conflicting requirements on block size and number of blocks - block_size = int(data.shape[1] / n_blocks) - print(f"block_size: {block_size}") - # initialize the array of per-block averages - block_average = np.zeros((n_blocks, data.shape[0])) - # calculate averages per each block - for block in range(n_blocks): - block_average[block] = np.average(data[:, block * block_size: (block + 1) * block_size], axis=1) - # calculate the average and average of the square - av_data = np.average(data, axis=1) - av2_data = np.average(data * data, axis=1) - # calculate the variance of the block averages - block_var = np.var(block_average, axis=0) - # calculate standard error of the mean - err_data = np.sqrt(block_var / (n_blocks - 1)) - # estimate autocorrelation time using the formula given by Janke - # this assumes that the errors have been correctly estimated - tau_data = np.zeros(av_data.shape) - for val in range(av_data.shape[0]): - if av_data[val] == 0: - # unphysical value marks a failure to compute tau - tau_data[val] = -1.0 - else: - tau_data[val] = 0.5 * block_size * n_blocks / (n_blocks - 1) * block_var[val] \ - / (av2_data[val] - av_data[val] * av_data[val]) - - # check if the blocks contain enough data for reliable error estimates - print("uncorrelated samples per block:\nblock_size/tau = ", - block_size/tau_data) - threshold = 10. # block size should be much greater than the correlation time - if np.any(block_size / tau_data < threshold): - print("\nWarning: some blocks may contain less than ", threshold, "uncorrelated samples." - "\nYour error estimated may be unreliable." - "\nPlease, check them using a more sophisticated method or run a longer simulation.") - print("? block_size/tau > threshold ? :", block_size/tau_data > threshold) - else: - print("\nAll blocks seem to contain more than ", threshold, "uncorrelated samples.\ - Error estimates should be OK.") - - return av_data, err_data, tau_data, block_size - -def write_progress(step, total_steps, initial_simulation_time, units): - """ - Writes the progress of the loop and estimates the time for its completion - - Args: - step(int): current step in the production loop - total_steps(int): total number of steps in the production loop - initial_simulation_time(float): computer time when the simulation started, in seconds - units(obj): UnitRegistry object from the pint library - - Note: - It assumes that the simulation starts with step = 0 - """ - import time - - time_act=time.time()*units.s - perc_sim=100 *(step+1) / (total_steps) - time_per_step= (time_act - initial_simulation_time)/(step+1) - remaining_time=(total_steps - step +1)*time_per_step - elapsed_time=time_act-initial_simulation_time - - def find_right_time_units(time): - """ - Given a pint variable with time units, it returns in which time scale it is - """ - - if (time.to('s').magnitude/60 < 1): - - time_unit='s' - - elif (time.to('s').magnitude/3600 < 1): - - time_unit='min' - - elif (time.to('s').magnitude/(3600*24) < 1): - - time_unit='hour' - - else: - - time_unit='day' - - return time_unit - - time_unit_elapsed_time=find_right_time_units(elapsed_time) - time_unit_remaining_time=find_right_time_units(remaining_time) - - print("{0:.2g}% done, elapsed time {1:.2g}s; estimated completion in {2:.2g}s".format(perc_sim,elapsed_time.to(time_unit_elapsed_time),remaining_time.to(time_unit_remaining_time))) - - return - def visualize_espresso_system(espresso_system): """ Uses espresso visualizator for displaying the current state of the espresso_system diff --git a/maintainer/standarize_data.py b/maintainer/standarize_data.py new file mode 100644 index 0000000..f448abf --- /dev/null +++ b/maintainer/standarize_data.py @@ -0,0 +1,62 @@ +# Import pyMBE and other libraries +import pyMBE +import numpy as np +import pandas as pd +import argparse + +# Create an instance of pyMBE library +pmb = pyMBE.pymbe_library() + +# Expected inputs +supported_filenames=["Glu-HisMSDE.csv", + "Lys-AspMSDE.csv", + "histatin5_SoftMatter.txt"] + +parser = argparse.ArgumentParser(description='Script to standarize the data from various authors') +parser.add_argument('--src_filename', + type=str, + required= True, + help='File name from testsuite/data/src. Currently supported {supported_filenames}') +args = parser.parse_args() + +# Inputs +filename=args.src_filename + +# Outputs +output_filenames={"Lys-AspMSDE.csv": "Lunkad2021a.csv", + "Glu-HisMSDE.csv": "Lunkad2021b.csv", + "histatin5_SoftMatter.txt": "Blanco2020a.csv"} + +# Sanity checks +if filename not in supported_filenames: + ValueError(f"Filename {filename} not supported, supported files are {supported_filenames}") + +# Extact the data from Ref. +ref_path=pmb.get_resource(f"testsuite/data/src/{filename}") +Refs_lunkad=["Glu-HisMSDE.csv","Lys-AspMSDE.csv"] +Ref_blanco=["histatin5_SoftMatter.txt"] + +if filename in Refs_lunkad: + data=pd.read_csv(ref_path) + Z_ref = 5*-1*data['aaa']+5*data['aab'] + # Error propagation calculation + # 1/4 factor added to correct for bug in the original calculation of the error reported by the authors + Z_ref_err = 5/4*np.sqrt((data['eaa'])**2+(data['eab'])**2) + +elif filename in Ref_blanco: + data=np.loadtxt(ref_path, delimiter=",") + Z_ref=data[:,1] + Z_ref_err=data[:,2] +else: + raise RuntimeError() + +pH_range = np.linspace(2, 12, num=21) + +# Store the data +data=pd.DataFrame({"pH": pH_range, + "charge": Z_ref, + "charge_error": Z_ref_err}) + +data_path=pmb.get_resource(f"testsuite/data") +data.to_csv(f"{data_path}/{output_filenames[filename]}", + index=False) \ No newline at end of file diff --git a/reference_parameters/coarse_grained_structures/1beb.vtf b/parameters/globular_proteins/1beb.vtf similarity index 100% rename from reference_parameters/coarse_grained_structures/1beb.vtf rename to parameters/globular_proteins/1beb.vtf diff --git a/reference_parameters/coarse_grained_structures/1f6s.vtf b/parameters/globular_proteins/1f6s.vtf similarity index 100% rename from reference_parameters/coarse_grained_structures/1f6s.vtf rename to parameters/globular_proteins/1f6s.vtf diff --git a/reference_parameters/interaction_parameters/Avogadro_parametrization.txt b/parameters/peptides/Avogadro_parametrization.txt similarity index 100% rename from reference_parameters/interaction_parameters/Avogadro_parametrization.txt rename to parameters/peptides/Avogadro_parametrization.txt diff --git a/parameters/peptides/Blanco2020.txt b/parameters/peptides/Blanco2020.txt new file mode 100644 index 0000000..3d47869 --- /dev/null +++ b/parameters/peptides/Blanco2020.txt @@ -0,0 +1,35 @@ +# Parameters from Blanco et al. Soft Matter, 17(3), 655-669, 2021. +{"object_type":"particle", "name": "D", "acidity": "acidic", "diameter": {"value":0.4, "units":"nm"}, "epsilon":{"value":1, "units":"reduced_energy"}} +{"object_type":"particle", "name": "E", "acidity": "acidic", "diameter": {"value":0.4, "units":"nm"}, "epsilon":{"value":1, "units":"reduced_energy"}} +{"object_type":"particle", "name": "n", "acidity": "basic", "diameter": {"value":0.4, "units":"nm"}, "epsilon":{"value":1, "units":"reduced_energy"}} +{"object_type":"particle", "name": "S", "q":0, "acidity": "inert", "diameter": {"value":0.4, "units":"nm"}, "epsilon":{"value":1, "units":"reduced_energy"}} +{"object_type":"particle", "name": "H", "acidity": "basic", "diameter": {"value":0.4, "units":"nm"}, "epsilon":{"value":1, "units":"reduced_energy"}} +{"object_type":"particle", "name": "A", "q":0,"acidity": "inert", "diameter": {"value":0.4, "units":"nm"}, "epsilon":{"value":1, "units":"reduced_energy"}} +{"object_type":"particle", "name": "K", "acidity": "basic", "diameter": {"value":0.4, "units":"nm"}, "epsilon":{"value":1, "units":"reduced_energy"}} +{"object_type":"particle", "name": "Y", "acidity": "acidic", "diameter": {"value":0.4, "units":"nm"}, "epsilon":{"value":1, "units":"reduced_energy"}} +{"object_type":"particle", "name": "R", "acidity": "basic", "diameter": {"value":0.4, "units":"nm"}, "epsilon":{"value":1, "units":"reduced_energy"}} +{"object_type":"particle", "name": "G", "q":0,"acidity": "inert", "diameter": {"value":0.4, "units":"nm"}, "epsilon":{"value":1, "units":"reduced_energy"}} +{"object_type":"particle", "name": "F", "q":0,"acidity": "inert", "diameter": {"value":0.4, "units":"nm"}, "epsilon":{"value":1, "units":"reduced_energy"}} +{"object_type":"particle", "name": "c", "acidity": "acidic", "diameter": {"value":0.4, "units":"nm"}, "epsilon":{"value":1, "units":"reduced_energy"}} +{"object_type":"bond", "name1": "n", "name2": "D", "bond_type": "harmonic", "r_0": {"value":0.4, "units":"nm"}, "k": {"value": 0.41, "units":"N / m"}} +{"object_type":"bond", "name1": "S", "name2": "D", "bond_type": "harmonic", "r_0": {"value":0.4, "units":"nm"}, "k": {"value": 0.41, "units":"N / m"}} +{"object_type":"bond", "name1": "S", "name2": "H", "bond_type": "harmonic", "r_0": {"value":0.4, "units":"nm"}, "k": {"value": 0.41, "units":"N / m"}} +{"object_type":"bond", "name1": "H", "name2": "A", "bond_type": "harmonic", "r_0": {"value":0.4, "units":"nm"}, "k": {"value": 0.41, "units":"N / m"}} +{"object_type":"bond", "name1": "A", "name2": "K", "bond_type": "harmonic", "r_0": {"value":0.4, "units":"nm"}, "k": {"value": 0.41, "units":"N / m"}} +{"object_type":"bond", "name1": "E", "name2": "H", "bond_type": "harmonic", "r_0": {"value":0.4, "units":"nm"}, "k": {"value": 0.41, "units":"N / m"}} +{"object_type":"bond", "name1": "E", "name2": "K", "bond_type": "harmonic", "r_0": {"value":0.4, "units":"nm"}, "k": {"value": 0.41, "units":"N / m"}} +{"object_type":"bond", "name1": "K", "name2": "R", "bond_type": "harmonic", "r_0": {"value":0.4, "units":"nm"}, "k": {"value": 0.41, "units":"N / m"}} +{"object_type":"bond", "name1": "K", "name2": "H", "bond_type": "harmonic", "r_0": {"value":0.4, "units":"nm"}, "k": {"value": 0.41, "units":"N / m"}} +{"object_type":"bond", "name1": "R", "name2": "H", "bond_type": "harmonic", "r_0": {"value":0.4, "units":"nm"}, "k": {"value": 0.41, "units":"N / m"}} +{"object_type":"bond", "name1": "H", "name2": "H", "bond_type": "harmonic", "r_0": {"value":0.4, "units":"nm"}, "k": {"value": 0.41, "units":"N / m"}} +{"object_type":"bond", "name1": "H", "name2": "G", "bond_type": "harmonic", "r_0": {"value":0.4, "units":"nm"}, "k": {"value": 0.41, "units":"N / m"}} +{"object_type":"bond", "name1": "G", "name2": "Y", "bond_type": "harmonic", "r_0": {"value":0.4, "units":"nm"}, "k": {"value": 0.41, "units":"N / m"}} +{"object_type":"bond", "name1": "Y", "name2": "K", "bond_type": "harmonic", "r_0": {"value":0.4, "units":"nm"}, "k": {"value": 0.41, "units":"N / m"}} +{"object_type":"bond", "name1": "R", "name2": "K", "bond_type": "harmonic", "r_0": {"value":0.4, "units":"nm"}, "k": {"value": 0.41, "units":"N / m"}} +{"object_type":"bond", "name1": "K", "name2": "F", "bond_type": "harmonic", "r_0": {"value":0.4, "units":"nm"}, "k": {"value": 0.41, "units":"N / m"}} +{"object_type":"bond", "name1": "H", "name2": "S", "bond_type": "harmonic", "r_0": {"value":0.4, "units":"nm"}, "k": {"value": 0.41, "units":"N / m"}} +{"object_type":"bond", "name1": "H", "name2": "F", "bond_type": "harmonic", "r_0": {"value":0.4, "units":"nm"}, "k": {"value": 0.41, "units":"N / m"}} +{"object_type":"bond", "name1": "H", "name2": "R", "bond_type": "harmonic", "r_0": {"value":0.4, "units":"nm"}, "k": {"value": 0.41, "units":"N / m"}} +{"object_type":"bond", "name1": "R", "name2": "G", "bond_type": "harmonic", "r_0": {"value":0.4, "units":"nm"}, "k": {"value": 0.41, "units":"N / m"}} +{"object_type":"bond", "name1": "Y", "name2": "G", "bond_type": "harmonic", "r_0": {"value":0.4, "units":"nm"}, "k": {"value": 0.41, "units":"N / m"}} +{"object_type":"bond", "name1": "Y", "name2": "c", "bond_type": "harmonic", "r_0": {"value":0.4, "units":"nm"}, "k": {"value": 0.41, "units":"N / m"}} diff --git a/reference_parameters/interaction_parameters/Lunkad2021.txt b/parameters/peptides/Lunkad2021.txt similarity index 100% rename from reference_parameters/interaction_parameters/Lunkad2021.txt rename to parameters/peptides/Lunkad2021.txt diff --git a/reference_parameters/pka_sets/Bienkiewicz1999.txt b/parameters/pka_sets/Bienkiewicz1999.txt similarity index 100% rename from reference_parameters/pka_sets/Bienkiewicz1999.txt rename to parameters/pka_sets/Bienkiewicz1999.txt diff --git a/reference_parameters/pka_sets/CRC1991.txt b/parameters/pka_sets/CRC1991.txt similarity index 100% rename from reference_parameters/pka_sets/CRC1991.txt rename to parameters/pka_sets/CRC1991.txt diff --git a/reference_parameters/pka_sets/Dobrev2020.txt b/parameters/pka_sets/Dobrev2020.txt similarity index 100% rename from reference_parameters/pka_sets/Dobrev2020.txt rename to parameters/pka_sets/Dobrev2020.txt diff --git a/reference_parameters/pka_sets/Hass2015.txt b/parameters/pka_sets/Hass2015.txt similarity index 100% rename from reference_parameters/pka_sets/Hass2015.txt rename to parameters/pka_sets/Hass2015.txt diff --git a/reference_parameters/pka_sets/Nozaki1967.txt b/parameters/pka_sets/Nozaki1967.txt similarity index 100% rename from reference_parameters/pka_sets/Nozaki1967.txt rename to parameters/pka_sets/Nozaki1967.txt diff --git a/reference_parameters/pka_sets/Platzer2014.txt b/parameters/pka_sets/Platzer2014.txt similarity index 100% rename from reference_parameters/pka_sets/Platzer2014.txt rename to parameters/pka_sets/Platzer2014.txt diff --git a/reference_parameters/pka_sets/Thurlkill2006.txt b/parameters/pka_sets/Thurlkill2006.txt similarity index 100% rename from reference_parameters/pka_sets/Thurlkill2006.txt rename to parameters/pka_sets/Thurlkill2006.txt diff --git a/pyMBE.py b/pyMBE.py index 330ada3..8e17db2 100644 --- a/pyMBE.py +++ b/pyMBE.py @@ -550,7 +550,7 @@ def copy_df_entry(self, name, column_name, number_of_copies): self.df = self.pd.concat ([self.df,df_by_name_repeated], ignore_index=True) return - def create_added_salt (self, espresso_system, cation_name, anion_name, c_salt): + def create_added_salt (self, espresso_system, cation_name, anion_name, c_salt, verbose=True): """ Creates a `c_salt` concentration of `cation_name` and `anion_name` ions into the `espresso_system`. @@ -558,7 +558,8 @@ def create_added_salt (self, espresso_system, cation_name, anion_name, c_salt): espresso_system (`obj`): instance of an espresso system object. cation_name(`str`): `name` of a particle with a positive charge. anion_name(`str`): `name` of a particle with a negative charge. - c_salt (float): Salt concentration. + c_salt(`float`): Salt concentration. + verbose(`bool`): switch to activate/deactivate verbose. Defaults to True. Returns: c_salt_calculated (float): Calculated salt concentration added to `espresso_system`. @@ -583,10 +584,11 @@ def create_added_salt (self, espresso_system, cation_name, anion_name, c_salt): N_anion = N_ions*abs(cation_name_charge) self.create_particle(espresso_system=espresso_system, name=cation_name, number_of_particles=N_cation) self.create_particle(espresso_system=espresso_system, name=anion_name, number_of_particles=N_anion) - print('\n Added salt concentration of ', c_salt_calculated.to('mol/L'), 'given by ', N_cation, ' cations and ', N_anion, ' anions') + if verbose: + print(f"\n Added salt concentration of {c_salt_calculated.to('mol/L')} given by {N_cation} cations and {N_anion} anions") return c_salt_calculated - def create_counterions(self, object_name, cation_name, anion_name, espresso_system): + def create_counterions(self, object_name, cation_name, anion_name, espresso_system,verbose=True): """ Creates particles of `cation_name` and `anion_name` in `espresso_system` to counter the net charge of `pmb_object`. @@ -595,6 +597,7 @@ def create_counterions(self, object_name, cation_name, anion_name, espresso_syst espresso_system(`obj`): Instance of a system object from the espressomd library. cation_name(`str`): `name` of a particle with a positive charge. anion_name(`str`): `name` of a particle with a negative charge. + verbose(`bool`): switch to activate/deactivate verbose. Defaults to True. Returns: counterion_number (dict): {"name": number} @@ -627,9 +630,10 @@ def create_counterions(self, object_name, cation_name, anion_name, espresso_syst self.create_particle(espresso_system=espresso_system, name=anion_name, number_of_particles=counterion_number[anion_name]) else: counterion_number[anion_name] = 0 - print('The following counter-ions have been created: ') - for name in counterion_number.keys(): - print(f'Ion type: {name} created number: {counterion_number[name]}') + if verbose: + print('The following counter-ions have been created: ') + for name in counterion_number.keys(): + print(f'Ion type: {name} created number: {counterion_number[name]}') return counterion_number def create_molecule(self, name, number_of_molecules, espresso_system, first_residue_position=None, use_default_bond=False): @@ -2115,9 +2119,13 @@ def search_bond(self, particle_name1, particle_name2, hard_check=False, use_defa - If `use_default_bond`=True and no bond is defined between `particle_name1` and `particle_name2`, it returns the default bond defined in `pmb.df`. - If `hard_check`=`True` stops the code when no bond is found. """ + bond_key = self.find_bond_key(particle_name1=particle_name1, particle_name2=particle_name2, use_default_bond=use_default_bond) + if use_default_bond: + if not self.check_if_name_is_defined_in_df(name="default",pmb_type_to_be_defined='bond'): + raise ValueError(f"use_default_bond is set to {use_default_bond} but no default bond has been defined. Please define a default bond with pmb.define_default_bond") if bond_key: return self.df[self.df['name']==bond_key].bond_object.values[0] else: @@ -2640,7 +2648,7 @@ def setup_df (self): return columns_names - def setup_lj_interactions (self, espresso_system, cutoff=None, shift='auto', combining_rule='Lorentz-Berthelot'): + def setup_lj_interactions (self, espresso_system, cutoff=None, shift='auto', combining_rule='Lorentz-Berthelot', warnings=True): """ Sets up the Lennard-Jones (LJ) potential between all pairs of particle types with values for `diameter` and `epsilon` stored in `pymbe.df`. Stores the parameters loaded into ESPResSo for each type pair in `pymbe.df`. @@ -2652,6 +2660,7 @@ def setup_lj_interactions (self, espresso_system, cutoff=None, shift='auto', com cutoff(`float`, optional): cut-off length of the LJ potential. Defaults to None. shift (`string`, optional): If set to `auto` shifts the potential to be continous at `cutoff`. Defaults to `auto`. combining_rule (`string`, optional): combining rule used to calculate `sigma` and `epsilon` for the potential betwen a pair of particles. Defaults to 'Lorentz-Berthelot'. + warning (`bool`): switch to activate/deactivate warning messages. Defaults to True. Note: If no `cutoff` is given, its value is set to 2**(1./6.) in reduced_length units, corresponding to a purely steric potential. @@ -2720,7 +2729,7 @@ def setup_lj_interactions (self, espresso_system, cutoff=None, shift='auto', com self.add_value_to_df(index=index, key=('parameters_of_the_potential',''), new_value=self.json.dumps(lj_params)) - if non_parametrized_labels: + if non_parametrized_labels and warnings: print(f'WARNING: No LJ interaction has been added in ESPResSo for particles with labels: {non_parametrized_labels}. Please, check your pmb.df to ensure if this is your desired setup.') return @@ -2763,5 +2772,3 @@ def write_output_vtf_file(self, espresso_system, filename): for particle in espresso_system.part: coordinates.write (f'{particle.id} \t {particle.pos[0]} \t {particle.pos[1]} \t {particle.pos[2]}\n') return - - diff --git a/samples/Beyer2024/create_paper_data.py b/samples/Beyer2024/create_paper_data.py new file mode 100644 index 0000000..f9e5457 --- /dev/null +++ b/samples/Beyer2024/create_paper_data.py @@ -0,0 +1,187 @@ +# Import pyMBE and other libraries +import pyMBE +from lib import analysis +import os +import numpy as np +import pandas as pd +import argparse + +# Create an instance of pyMBE library +pmb = pyMBE.pymbe_library() + +valid_fig_labels=["6a", "6b", "6c"] +valid_modes=["short-run","long-run", "test"] + +parser = argparse.ArgumentParser(description='Script to create the data from Beyer2024') +parser.add_argument('--fig_label', + type=str, + required= True, + help=f'Label of the corresponding figure in Beyer2024, currently supported: {valid_fig_labels}') +parser.add_argument('--mode', + type=str, + default= "long-run", + help='Sets for how long the simulation runs, valid modes are {valid_modes}') +parser.add_argument('--restart', action='store_true', help="Switch to plot the data") +args = parser.parse_args() + +# Inputs +fig_label=args.fig_label +mode=args.mode +plot=args.plot +print(f"plot {args.plot}") +# Sanity checks +if fig_label not in valid_fig_labels: + raise ValueError(f"The figure label {fig_label} is not supported. Supported figure labels are {valid_fig_labels}") + + +if mode not in valid_modes: + raise ValueError(f"Mode {mode} is not currently supported, valid modes are {valid_modes}") + +## Peptide plots (Fig. 6) +labels_fig6=["6a", "6b", "6c"] + +if fig_label in labels_fig6: + script_path=pmb.get_resource(f"samples/Beyer2024/peptide.py") + if fig_label == "6a": + sequence="K"*5+"D"*5 + elif fig_label == "6b": + sequence="E"*5+"H"*5 + elif fig_label == "6c": + sequence="nDSHAKRHHGYKRKFHEKHHSHRGYc" + else: + raise RuntimeError() + pH_range = np.linspace(2, 12, num=21) + for pH in pH_range: + run_command=f"python3 {script_path} --sequence {sequence} --pH {pH} --mode {mode}" + print(run_command) + os.system(run_command) + +# Analyze all time series +time_series_folder_path=pmb.get_resource(f"samples/Beyer2024/time_series") +data=analysis.analyze_time_series(path_to_datafolder=time_series_folder_path) + +# Store mean values and other statistics +data_path=pmb.get_resource("samples/Beyer2024/")+"data" +if not os.path.exists(data_path): + os.makedirs(data_path) +data.to_csv(f"{data_path}/fig{fig_label}.csv") + +# Plot the data +if plot: + + # Import matplotlib for plotting + import matplotlib.pyplot as plt + import matplotlib as mpl + + plt.rcParams["font.family"] = "serif" + plt.tight_layout() + mpl.rc('axes', linewidth=1) + mpl.rcParams['lines.markersize'] = 5 + mpl.rcParams['lines.linewidth'] = 1.0 + + width = 10 + height = 10 + plt.rc('font', size=22) + plt.figure(figsize=(width,height), dpi=100) + plt.grid(which='major', + color='#CCCCCC', + linestyle='--', + linewidth=0.6) + + # Set labels for the axes + if fig_label in labels_fig6: + plt.ylabel(r"Net charge $Z/e$") + plt.xlabel(r"pH in the solution") + else: + raise RuntimeError() + + # Load pka set + if fig_label in ["6a","6b"]: + pka_path=pmb.get_resource("parameters/pka_sets/CRC1991.txt") + elif fig_label == "6c": + pka_path=pmb.get_resource("parameters/pka_sets/Nozaki1967.txt") + # FIXME: this is only necessary due to an undesired feature in calculate_HH + # that forces to have all particles defined in pyMBE + par_path=pmb.get_resource("parameters/peptides/Blanco2020.txt") + pmb.load_interaction_parameters(par_path) + else: + raise RuntimeError() + pmb.load_pka_set (filename=pka_path) + + # Load ref data + if fig_label == "6a": + ref_path=pmb.get_resource("testsuite/data/Lunkad2021a.csv") + elif fig_label == "6b": + ref_path=pmb.get_resource("testsuite/data/Lunkad2021b.csv") + elif fig_label == "6c": + ref_path=pmb.get_resource("testsuite/data/Blanco2020a.csv") + else: + raise RuntimeError() + + ref_data=analysis.read_csv_file(path=ref_path) + + # Calculate Henderson-Hasselbalch (HH + if fig_label in labels_fig6: + pmb.define_peptide (name=sequence, + sequence=sequence, + model="1beadAA") + + pH_range_HH = np.linspace(2, 12, num=1000) + Z_HH = pmb.calculate_HH(object_name=sequence, + pH_list=pH_range_HH) + + # Plot HH + plt.plot(pH_range_HH, + Z_HH, + label=r"HH", + color="black") + + # Plot Ref data + ref_data=ref_data.sort_values(by="pH",ascending=True) + + if fig_label in ["6a","6b"]: + style={"linestyle":"none", + "marker":"s", + "label":"Lunkad et al.", + "ms":15, + "color":"C0"} + else: + style={"linestyle":"none", + "marker":"^", + "label":"Blanco et al.", + "ms":15, + "color":"green", + "markeredgewidth":1.5} + + plt.errorbar(ref_data["pH"], + ref_data["charge"], + ref_data["charge_error"], + **style) + + # Plot data produced by pyMBE + data=data.astype({("pH","value"): 'float'}).sort_values(by=("pH","value")) + data=data[data.sequence.value == f"{sequence}"] + plt.errorbar(data["pH"]["value"], + data["mean","charge"], + yerr=data["err_mean","charge"], + linestyle="none", + marker="o", + label="pyMBE", + color="C1", + fillstyle="none", + ms=15, + markeredgewidth=1.5) + + # Save plot + fig_path=pmb.get_resource("samples/Beyer2024")+"/figs" + if not os.path.exists(fig_path): + os.makedirs(fig_path) + plt.legend() + plt.savefig(f"{fig_path}/{fig_label}.png", + bbox_inches='tight') + plt.close() + + + + + diff --git a/sample_scripts/globular_protein.py b/samples/Beyer2024/globular_protein.py similarity index 100% rename from sample_scripts/globular_protein.py rename to samples/Beyer2024/globular_protein.py diff --git a/samples/Beyer2024/peptide.py b/samples/Beyer2024/peptide.py new file mode 100644 index 0000000..00821bb --- /dev/null +++ b/samples/Beyer2024/peptide.py @@ -0,0 +1,227 @@ +# Load espresso, sugar and other necessary libraries +import sys +import os +import inspect +import espressomd +import numpy as np +import pandas as pd +import argparse +import tqdm +from espressomd.io.writer import vtf +from espressomd import interactions +from espressomd import electrostatics + +# Import pyMBE +import pyMBE +from lib import analysis +from lib import handy_functions as hf + +# Create an instance of pyMBE library +pmb = pyMBE.pymbe_library() + +valid_modes=["short-run","long-run", "test"] +parser = argparse.ArgumentParser(description='Script to run the peptide test cases for pyMBE') +parser.add_argument('--sequence', + type=str, + required= True, + help='sequence of the peptide') +parser.add_argument('--pH', + type=float, + required= True, + help='pH of the solution') +parser.add_argument('--mode', + type=str, + default= "short-run", + help='sets for how long the simulation runs, valid modes are {valid_modes}') +parser.add_argument('--output', + type=str, + required= False, + help='output directory') +parser.add_argument('--no_verbose', action='store_false', help="Switch to deactivate verbose") +args = parser.parse_args() + +# Inputs +sequence=args.sequence +pH=args.pH +inputs={"pH": args.pH, + "sequence": args.sequence} +mode=args.mode +verbose=args.no_verbose + +if mode not in valid_modes: + raise ValueError(f"Mode {mode} is not currently supported, valid modes are {valid_modes}") + +SEED = 100 +dt = 0.01 +solvent_permitivity = 78.3 +pep_concentration = 5.56e-4 *pmb.units.mol/pmb.units.L + +# Sanity check +Lunkad_test_sequences=["E"*5+"H"*5,"K"*5+"D"*5] +Blanco_test_sequence=["nDSHAKRHHGYKRKFHEKHHSHRGYc"] + +valid_sequences=Lunkad_test_sequences+Blanco_test_sequence + +if sequence not in valid_sequences: + raise ValueError(f"ERROR: the only valid peptide sequence for this test script are {valid_sequences}") + +if sequence in Lunkad_test_sequences: + pmb.load_interaction_parameters (filename='parameters/peptides/Lunkad2021.txt') + pmb.load_pka_set (filename='parameters/pka_sets/CRC1991.txt') + model = '2beadAA' # Model with 2 beads per each aminoacid + N_peptide_chains = 4 + diameter_Na=0.35*pmb.units.nm + diameter_Cl=0.35*pmb.units.nm + c_salt=1e-2 * pmb.units.mol/ pmb.units.L + chain_length=len(sequence)*2 + +elif sequence in Blanco_test_sequence: + pmb.load_interaction_parameters (pmb.get_resource(path='parameters/peptides/Blanco2020.txt')) + pmb.load_pka_set (pmb.get_resource(path='parameters/pka_sets/Nozaki1967.txt')) + model = '1beadAA' + N_peptide_chains = 1 + c_salt = 5e-3 * pmb.units.mol/ pmb.units.L + diameter_Na=0.2*pmb.units.nm + diameter_Cl=0.36*pmb.units.nm + chain_length=len(sequence) + +# Simulation parameters +if mode == "short-run": + Nsamples = 1000 + MD_steps_per_sample = 1000 +elif mode == "long-run": + Nsamples = 5000 + MD_steps_per_sample = 5000 +elif mode == "test": + Nsamples = 500 + MD_steps_per_sample = 700 + c_salt = 5e-3 * pmb.units.mol/ pmb.units.L + N_peptide_chains = 1 +else: + raise RuntimeError() + + +pmb.define_peptide (name=sequence, sequence=sequence, model=model) + +# Solution parameters +cation_name = 'Na' +anion_name = 'Cl' +c_salt=5e-3 * pmb.units.mol/ pmb.units.L + +pmb.define_particle(name=cation_name, + q=1, + diameter=0.35*pmb.units.nm, + epsilon=1*pmb.units('reduced_energy')) + +pmb.define_particle(name=anion_name, + q=-1, + diameter=0.35*pmb.units.nm, + epsilon=1*pmb.units('reduced_energy')) + +# System parameters +volume = N_peptide_chains/(pmb.N_A*pep_concentration) +L = volume ** (1./3.) # Side of the simulation box + +# Create an instance of an espresso system +espresso_system=espressomd.System (box_l = [L.to('reduced_length').magnitude]*3) + +# Add all bonds to espresso system +pmb.add_bonds_to_espresso(espresso_system=espresso_system) + +# Create your molecules into the espresso system +pmb.create_pmb_object(name=sequence, + number_of_objects= N_peptide_chains, + espresso_system=espresso_system) + +# Create counterions for the peptide chains +pmb.create_counterions(object_name=sequence, + cation_name=cation_name, + anion_name=anion_name, + espresso_system=espresso_system, + verbose=verbose) + +c_salt_calculated = pmb.create_added_salt(espresso_system=espresso_system, + cation_name=cation_name, + anion_name=anion_name, + c_salt=c_salt, + verbose=verbose) + +RE, successful_reactions_labels = pmb.setup_cpH(counter_ion=cation_name, + constant_pH=pH, + SEED=SEED) + +if verbose: + print(f"The box length of your system is {L.to('reduced_length')} = {L.to('nm')}") + print(f"The acid-base reaction has been successfully setup for {successful_reactions_labels}") + +# Setup espresso to track the ionization of the acid/basic groups in peptide +type_map =pmb.get_type_map() +espresso_system.setup_type_map(type_list = list(type_map.values())) + +# Setup the non-interacting type for speeding up the sampling of the reactions +non_interacting_type = max(type_map.values())+1 +RE.set_non_interacting_type (type=non_interacting_type) +if verbose: + print(f"The non-interacting type is set to {non_interacting_type}") + +#Setup the potential energy +pmb.setup_lj_interactions (espresso_system=espresso_system, + warnings=verbose) +hf.minimize_espresso_system_energy (espresso_system=espresso_system, + verbose=verbose) +hf.setup_electrostatic_interactions(units=pmb.units, + espresso_system=espresso_system, + kT=pmb.kT, + verbose=verbose) +hf.minimize_espresso_system_energy (espresso_system=espresso_system, + verbose=verbose) + + +hf.setup_langevin_dynamics(espresso_system=espresso_system, + kT = pmb.kT, + SEED = SEED, + time_step=dt, + tune_skin=False) + +espresso_system.cell_system.skin=0.4 + +# Main loop + +labels_obs=["time","charge","rg"] +time_series={} + +for label in labels_obs: + time_series[label]=[] + +for sample in tqdm.trange(Nsamples,disable=not verbose): + # Run LD + espresso_system.integrator.run(steps=MD_steps_per_sample) + # Run MC + RE.reaction(reaction_steps=len(sequence)) + # Sample observables + charge_dict=pmb.calculate_net_charge(espresso_system=espresso_system, + molecule_name=sequence) + + Rg = espresso_system.analysis.calc_rg(chain_start=0, + number_of_chains=N_peptide_chains, + chain_length=chain_length) + # Store observables + time_series["time"].append(espresso_system.time) + time_series["charge"].append(charge_dict["mean"]) + time_series["rg"].append(Rg[0]) + +data_path = args.output +if data_path is None: + data_path=pmb.get_resource(path="samples/Beyer2024")+"/time_series" + +if not os.path.exists(data_path): + os.makedirs(data_path) + +time_series=pd.DataFrame(time_series) +filename=analysis.built_output_name(input_dict=inputs) + +time_series.to_csv(f"{data_path}/{filename}_time_series.csv", index=False) + +if verbose: + print("*** DONE ***") + diff --git a/tests/plotting_script_grxmc.py b/samples/Beyer2024/plotting_script_grxmc.py similarity index 100% rename from tests/plotting_script_grxmc.py rename to samples/Beyer2024/plotting_script_grxmc.py diff --git a/tests/run_test_protein.py b/samples/Beyer2024/run_test_protein.py similarity index 100% rename from tests/run_test_protein.py rename to samples/Beyer2024/run_test_protein.py diff --git a/tests/simulation_script_grxmc.py b/samples/Beyer2024/simulation_script_grxmc.py similarity index 100% rename from tests/simulation_script_grxmc.py rename to samples/Beyer2024/simulation_script_grxmc.py diff --git a/sample_scripts/branched_polyampholyte.py b/samples/branched_polyampholyte.py similarity index 100% rename from sample_scripts/branched_polyampholyte.py rename to samples/branched_polyampholyte.py diff --git a/sample_scripts/peptide.py b/samples/peptide.py similarity index 100% rename from sample_scripts/peptide.py rename to samples/peptide.py diff --git a/sample_scripts/peptide_mixture_grxmc_ideal.py b/samples/peptide_mixture_grxmc_ideal.py similarity index 100% rename from sample_scripts/peptide_mixture_grxmc_ideal.py rename to samples/peptide_mixture_grxmc_ideal.py diff --git a/sample_scripts/peptide_mixture_grxmc_unified_ideal.py b/samples/peptide_mixture_grxmc_unified_ideal.py similarity index 100% rename from sample_scripts/peptide_mixture_grxmc_unified_ideal.py rename to samples/peptide_mixture_grxmc_unified_ideal.py diff --git a/handy_scripts/plot_HH.py b/samples/plot_HH.py similarity index 100% rename from handy_scripts/plot_HH.py rename to samples/plot_HH.py diff --git a/tests/GLU_HIS_peptide.py b/tests/GLU_HIS_peptide.py deleted file mode 100644 index 5641ff3..0000000 --- a/tests/GLU_HIS_peptide.py +++ /dev/null @@ -1,234 +0,0 @@ -""" -Script that simulates a dilute solution of GlU5-HIS5 peptide -It calculates the peptide average charge and radius of gyration (using a constant pH simulation) -All parameters are taken from Ref. 1, whose results are here reproduced as reference. -This script is part of pyMBE library and it is meant to serve as reference for it. - -Authors: Dr. Pablo M. Blanco (Charles University) - -[1] Lunkad, R., Murmiliuk, A., Hebbeker, P., Boublík, M., Tošner, Z., Štěpánek, M., & Košovan, P. -Quantitative prediction of charge regulation in oligopeptides. -Molecular Systems Design & Engineering, 2021, 6(2), 122-131. - -""" -# Load espresso, pyMBE and other necessary libraries -import os -import sys -import inspect -import numpy as np -import pandas as pd -from tqdm import tqdm -import matplotlib.pyplot as plt -from matplotlib.style import use -import espressomd -from espressomd import interactions -from espressomd.io.writer import vtf -from espressomd import electrostatics - -# Create an instance of pyMBE library -import pyMBE -pmb = pyMBE.pymbe_library() - -# Load some functions from the handy_scripts library for convinience -from handy_scripts.handy_functions import setup_electrostatic_interactions -from handy_scripts.handy_functions import minimize_espresso_system_energy -from handy_scripts.handy_functions import setup_langevin_dynamics -from handy_scripts.handy_functions import block_analyze - -# The trajectories of the simulations will be stored using espresso built-up functions in separed files in the folder 'frames' -if not os.path.exists('./frames'): - os.makedirs('./frames') - -# Simulation parameters - -pH_range = np.linspace(2, 12, num=21) -Samples_per_pH = 1000 -MD_steps_per_sample = 1000 -steps_eq =int(Samples_per_pH/3) -N_samples_print = 10 # Write the trajectory every 100 samples -probability_reaction = 0.5 -SEED = 1 -dt = 0.01 -solvent_permitivity = 78.3 - -L = 25.513*pmb.units.nm - -# Peptide parameters -N_aminoacids = 5 -sequence = "E"*N_aminoacids+"H"*N_aminoacids -model ='2beadAA' # Model with 2 beads per each aminoacid -pep_concentration = 1e-4 *pmb.units.mol/pmb.units.L -residue_positions = [0,3,5,len(sequence)-1] # Residue positions to calculate its average charge - -# Salt parameters -cation_name = 'Na' -anion_name = 'Cl' -c_salt=1e-2 * pmb.units.mol/ pmb.units.L - -pmb.define_particle( name=cation_name, q=1, diameter=0.35*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) -pmb.define_particle( name=anion_name, q=-1, diameter=0.35*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) - -# Load peptide parametrization from Lunkad, R. et al. Molecular Systems Design & Engineering (2021), 6(2), 122-131. -path_to_interactions=pmb.get_resource("reference_parameters/interaction_parameters/Lunkad2021.txt") -path_to_pka=pmb.get_resource("reference_parameters/pka_sets/CRC1991.txt") -pmb.load_interaction_parameters (filename=path_to_interactions) -pmb.load_pka_set (path_to_pka) - -# Create a molecule entry on the pyMBE dataframe -pmb.define_peptide (name=sequence, sequence=sequence, model=model) - -# System parameters -volume = L**3 -N_peptide_chains = int ( volume * pmb.N_A * pep_concentration) -L = volume ** (1./3.) # Side of the simulation box -calculated_peptide_concentration = N_peptide_chains/(volume*pmb.N_A) - -# Create an instance of an espresso system -espresso_system = espressomd.System(box_l=[L.to('reduced_length').magnitude]*3) - -# Add all bonds to espresso system -pmb.add_bonds_to_espresso(espresso_system=espresso_system) - -# Create your molecules into the espresso system -pmb.create_pmb_object (name=sequence, number_of_objects= N_peptide_chains,espresso_system=espresso_system, use_default_bond=True) - -# Create counterions for the peptide chains -pmb.create_counterions(object_name=sequence,cation_name=cation_name,anion_name=anion_name,espresso_system=espresso_system) -c_salt_calculated=pmb.create_added_salt(espresso_system=espresso_system,cation_name=cation_name,anion_name=anion_name,c_salt=c_salt) - -#List of ionisible groups -basic_groups = pmb.df.loc[(~pmb.df['particle_id'].isna()) & (pmb.df['acidity']=='basic')].name.to_list() -acidic_groups = pmb.df.loc[(~pmb.df['particle_id'].isna()) & (pmb.df['acidity']=='acidic')].name.to_list() -list_ionisible_groups = basic_groups + acidic_groups -total_ionisible_groups = len (list_ionisible_groups) - -print("The box length of your system is", L.to('reduced_length'), L.to('nm')) -print('The peptide concentration in your system is ', calculated_peptide_concentration.to('mol/L') , 'with', N_peptide_chains, 'peptides') -print('The ionisable groups in your peptide are ', list_ionisible_groups) - -# Setup the acid-base reactions of the peptide using the constant pH ensemble -RE, sucessfull_reactions_labels=pmb.setup_cpH (counter_ion=cation_name, constant_pH=2, SEED = SEED) -print('The acid-base reaction has been sucessfully setup for ', sucessfull_reactions_labels) - -# Setup espresso to track the each type defined in type_map -type_map =pmb.get_type_map() -types = list (type_map.values()) -espresso_system.setup_type_map( type_list = types) - -# Setup the non-interacting type for speeding up the sampling of the reactions -non_interacting_type = max(type_map.values())+1 -RE.set_non_interacting_type (type=non_interacting_type) -print('The non interacting type is set to ', non_interacting_type) - -#Setup the potential energy -pmb.setup_lj_interactions (espresso_system=espresso_system) -minimize_espresso_system_energy (espresso_system=espresso_system) -setup_electrostatic_interactions(units=pmb.units, - espresso_system=espresso_system, - kT=pmb.kT) -minimize_espresso_system_energy (espresso_system=espresso_system) - - -setup_langevin_dynamics (espresso_system=espresso_system, - kT = pmb.kT, - SEED = SEED, - time_step=dt, - tune_skin=False) - -espresso_system.cell_system.skin=0.4 - -# Write the initial state -with open('frames/trajectory1.vtf', mode='w+t') as coordinates: - vtf.writevsf(espresso_system, coordinates) - vtf.writevcf(espresso_system, coordinates) - -N_frame=0 -Z_pH=[] # List of the average global charge at each pH -Rg_pH=[] - -particle_id_list = pmb.get_particle_id_map(object_name=sequence)["all"] -first_peptide_id = min(particle_id_list) - -#Save the pyMBE dataframe in a CSV file -pmb.df.to_csv('df.csv') - -for index in tqdm(range(len(pH_range))): - # Sample list inicialization - pH_value=pH_range[index] - print (pH_value) - Z_sim=[] - Rg_sim=[] - Z_groups_time_series=[] - RE.constant_pH = pH_value - - # Inner loop for sampling each pH value - for step in range(Samples_per_pH+steps_eq): - - if np.random.random() > probability_reaction: - espresso_system.integrator.run(steps=MD_steps_per_sample) - else: - RE.reaction(reaction_steps=total_ionisible_groups) - - if ( step > steps_eq): - # Get peptide net charge - charge_dict=pmb.calculate_net_charge (espresso_system=espresso_system, - molecule_name=sequence) - Z_sim.append(charge_dict["mean"]) - # Get peptide radius of gyration - Rg = espresso_system.analysis.calc_rg(chain_start=first_peptide_id, number_of_chains=N_peptide_chains, chain_length=len(particle_id_list)) - Rg_value = pmb.units.Quantity(Rg[0], 'reduced_length') - Rg_nm = Rg_value.to('nm').magnitude - Rg_sim.append(Rg_nm) - - if (step % N_samples_print == 0) : - - N_frame+=1 - with open('frames/trajectory'+str(N_frame)+'.vtf', mode='w+t') as coordinates: - vtf.writevsf(espresso_system, coordinates) - vtf.writevcf(espresso_system, coordinates) - - - Z_pH.append(np.array(Z_sim)) - Rg_pH.append(Rg_sim) - - print("pH = {:6.4g} done".format(pH_value)) - -print("Net charge analysis") -av_charge, err_charge, tau_charge, block_size = block_analyze(input_data=pmb.np.array(Z_pH)) - -print("Rg analysis") -av_rg, err_rg, tau_rg, block_size = block_analyze(input_data=Rg_pH) - -# Calculate the ideal titration curve of the peptide with Henderson-Hasselbach equation -Z_HH = pmb.calculate_HH(object_name=sequence, - pH_list=pH_range) - -# Load the reference data -path_to_ref=pmb.get_resource("reference_data") -reference_data = pd.read_csv(f"{path_to_ref}/Glu-HisMSDE.csv") - - -Z_ref = N_aminoacids*-1*reference_data['aaa']+N_aminoacids*reference_data['aab'] -Rg_ref = reference_data['arg']*0.37 - -# Plot the results - -plt.figure(figsize=[11, 9]) -plt.subplot(1, 2, 1) -plt.suptitle('Peptide sequence: '+''.join(pmb.protein_sequence_parser(sequence=sequence))) -plt.errorbar(pH_range, av_charge, yerr=err_charge, fmt = '-o', capsize=3, label='Simulation') -plt.plot(pH_range, Z_ref, '-o', color = 'b', label='Lunkad2021') -plt.plot(pH_range, Z_HH, "-k", label='Henderson-Hasselbach') -plt.axhline(y=0.0, color="gray", linestyle="--") -plt.xlabel('pH') -plt.ylabel('Net charge / e') -plt.legend() - -plt.subplot(1, 2, 2) -plt.errorbar(pH_range, av_rg, yerr=err_rg, fmt = '-o', capsize=3, label='Simulation') -plt.plot(pH_range, Rg_ref, '-o', color='b', label='Lunkad2021') -plt.xlabel('pH') -plt.ylabel('Radius of gyration / nm') -plt.legend() - -plt.show() diff --git a/tests/LYS_ASP_peptide.py b/tests/LYS_ASP_peptide.py deleted file mode 100644 index 791febd..0000000 --- a/tests/LYS_ASP_peptide.py +++ /dev/null @@ -1,238 +0,0 @@ -""" -Script that simulates a dilute solution of GlU5-HIS5 peptide -It calculates the peptide average charge and radius of gyration (using a constant pH simulation) -All parameters are taken from Ref. 1, whose results are here reproduced as reference. -This script is part of pyMBE library and it is meant to serve as reference for it. - -Authors: Dr. Pablo M. Blanco (Charles University) - -[1] Lunkad, R., Murmiliuk, A., Hebbeker, P., Boublík, M., Tošner, Z., Štěpánek, M., & Košovan, P. -Quantitative prediction of charge regulation in oligopeptides. -Molecular Systems Design & Engineering, 2021, 6(2), 122-131. - -""" -# Load espresso, pyMBE and other necessary libraries -import os -import sys -import inspect -import numpy as np -import pandas as pd -from tqdm import tqdm -import matplotlib.pyplot as plt -from matplotlib.style import use -import espressomd -from espressomd import interactions -from espressomd.io.writer import vtf -from espressomd import electrostatics - -# Create an instance of pyMBE library -import pyMBE -pmb = pyMBE.pymbe_library() - -# Load some functions from the handy_scripts library for convinience -from handy_scripts.handy_functions import setup_electrostatic_interactions -from handy_scripts.handy_functions import minimize_espresso_system_energy -from handy_scripts.handy_functions import setup_langevin_dynamics -from handy_scripts.handy_functions import block_analyze - -# The trajectories of the simulations will be stored using espresso built-up functions in separed files in the folder 'frames' -if not os.path.exists('./frames'): - os.makedirs('./frames') - -# Simulation parameters -pmb.set_reduced_units(unit_length=0.4*pmb.units.nm) -pH_range = np.linspace(2, 12, num=21) -Samples_per_pH = 100 -MD_steps_per_sample = 100 -steps_eq =int(Samples_per_pH/3) -N_samples_print = 10 # Write the trajectory every 100 samples -probability_reaction = 0.5 -SEED = 1 -dt = 0.01 -solvent_permitivity = 78.3 - -L = 25.513*pmb.units.nm - -# Peptide parameters -N_aminoacids = 5 -sequence = 'K'*N_aminoacids+'D'*N_aminoacids -model = '2beadAA' # Model with 2 beads per each aminoacid -pep_concentration = 1e-4 *pmb.units.mol/pmb.units.L - -# Solution parameters -cation_name = 'Na' -anion_name = 'Cl' -c_salt = 1e-2 * pmb.units.mol/ pmb.units.L - -# Define salt parameters - -pmb.define_particle( name=cation_name, q=1, diameter=0.35*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) -pmb.define_particle( name=anion_name, q=-1, diameter=0.35*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) - -# Load peptide parametrization from Lunkad, R. et al. Molecular Systems Design & Engineering (2021), 6(2), 122-131. -path_to_interactions=pmb.get_resource("reference_parameters/interaction_parameters/Lunkad2021.txt") -path_to_pka=pmb.get_resource("reference_parameters/pka_sets/CRC1991.txt") -pmb.load_interaction_parameters (filename=path_to_interactions) -pmb.load_pka_set (path_to_pka) - -# Define the peptide on the pyMBE dataframe -pmb.define_peptide( name=sequence, sequence=sequence, model=model) - -# System parameters -volume = L**3 -N_peptide_chains = int ( volume * pmb.N_A * pep_concentration) -L = volume ** (1./3.) # Side of the simulation box -calculated_peptide_concentration = N_peptide_chains/(volume*pmb.N_A) - -# Create an instance of an espresso system -espresso_system = espressomd.System(box_l=[L.to('reduced_length').magnitude]*3) - -# Add all bonds to espresso system -pmb.add_bonds_to_espresso (espresso_system=espresso_system) - -# Create your molecules into the espresso system - -pmb.create_pmb_object (name=sequence, number_of_objects= N_peptide_chains,espresso_system=espresso_system, use_default_bond=True) - - -# Create counterions for the peptide chains -pmb.create_counterions (object_name=sequence,cation_name=cation_name,anion_name=anion_name,espresso_system=espresso_system) -c_salt_calculated = pmb.create_added_salt(espresso_system=espresso_system,cation_name=cation_name,anion_name=anion_name,c_salt=c_salt) - - -#List of ionisible groups -basic_groups = pmb.df.loc[(~pmb.df['particle_id'].isna()) & (pmb.df['acidity']=='basic')].name.to_list() -acidic_groups = pmb.df.loc[(~pmb.df['particle_id'].isna()) & (pmb.df['acidity']=='acidic')].name.to_list() -list_ionisible_groups = basic_groups + acidic_groups -total_ionisible_groups = len (list_ionisible_groups) - -print("The box length of your system is", L.to('reduced_length'), L.to('nm')) -print('The peptide concentration in your system is ', calculated_peptide_concentration.to('mol/L') , 'with', N_peptide_chains, 'peptides') -print('The ionisable groups in your peptide are ', list_ionisible_groups) - -# Setup the acid-base reactions of the peptide using the constant pH ensemble -RE, sucessfull_reactions_labels = pmb.setup_cpH(counter_ion=cation_name, constant_pH=2, SEED = SEED) -print('The acid-base reaction has been sucessfully setup for ', sucessfull_reactions_labels) - -# Setup espresso to track the each type defined in type_map -type_map = pmb.get_type_map() -types = list (type_map.values()) -espresso_system.setup_type_map( type_list = types) - -# Setup the non-interacting type for speeding up the sampling of the reactions -non_interacting_type = max(type_map.values())+1 -RE.set_non_interacting_type (type=non_interacting_type) -print('The non interacting type is set to ', non_interacting_type) - -# Setup the potential energy -pmb.setup_lj_interactions(espresso_system=espresso_system) -minimize_espresso_system_energy (espresso_system=espresso_system) -setup_electrostatic_interactions(units=pmb.units, - espresso_system=espresso_system, - kT=pmb.kT) -minimize_espresso_system_energy (espresso_system=espresso_system) - - -setup_langevin_dynamics (espresso_system=espresso_system, - kT = pmb.kT, - SEED = SEED, - time_step=dt, - tune_skin=False) - -espresso_system.cell_system.skin=0.4 - -# Save the initial state -with open('frames/trajectory1.vtf', mode='w+t') as coordinates: - vtf.writevsf(espresso_system, coordinates) - vtf.writevcf(espresso_system, coordinates) - -N_frame=0 -Z_pH=[] # List of the average global charge at each pH -Rg_pH=[] - -particle_id_list = pmb.get_particle_id_map(object_name=sequence)["all"] -first_peptide_id = min(particle_id_list) - -#Save the pyMBE dataframe in a CSV file -pmb.df.to_csv('df.csv') - -for index in tqdm(range(len(pH_range))): - # Sample list inicialization - pH_value=pH_range[index] - print (pH_value) - Z_sim=[] - Rg_sim=[] - Z_groups_time_series=[] - RE.constant_pH = pH_value - - # Inner loop for sampling each pH value - for step in range(Samples_per_pH+steps_eq): - - if np.random.random() > probability_reaction: - espresso_system.integrator.run(steps=MD_steps_per_sample) - else: - RE.reaction(reaction_steps=total_ionisible_groups) - - if ( step > steps_eq): - # Get peptide net charge - charge_dict=pmb.calculate_net_charge(espresso_system=espresso_system, - molecule_name=sequence) - Z_sim.append(charge_dict["mean"]) - # Get peptide radius of gyration - Rg = espresso_system.analysis.calc_rg(chain_start=first_peptide_id, number_of_chains=N_peptide_chains, chain_length=len(particle_id_list)) - Rg_value = pmb.units.Quantity(Rg[0], 'reduced_length') - Rg_nm = Rg_value.to('nm').magnitude - Rg_sim.append(Rg_nm) - - if (step % N_samples_print == 0) : - - N_frame+=1 - with open('frames/trajectory'+str(N_frame)+'.vtf', mode='w+t') as coordinates: - vtf.writevsf(espresso_system, coordinates) - vtf.writevcf(espresso_system, coordinates) - - - Z_pH.append(np.array(Z_sim)) - Rg_pH.append(Rg_sim) - - print("pH = {:6.4g} done".format(pH_value)) - -print("Net charge analysis") -av_charge, err_charge, tau_charge, block_size = block_analyze(input_data=pmb.np.array(Z_pH)) - -print("Rg analysis") -av_rg, err_rg, tau_rg, block_size = block_analyze(input_data=Rg_pH) - -# Calculate the ideal titration curve of the peptide with Henderson-Hasselbach equation -Z_HH = pmb.calculate_HH(object_name=sequence, - pH_list=pH_range) - -# Load the reference data -path_to_ref=pmb.get_resource("reference_data") -reference_data = pd.read_csv(f"{path_to_ref}/Lys-AspMSDE.csv") - - -Z_ref = N_aminoacids*-1*reference_data['aaa']+N_aminoacids*reference_data['aab'] -Rg_ref = reference_data['arg']*0.37 - -# Plot the results - -plt.figure(figsize=[11, 9]) -plt.subplot(1, 2, 1) -plt.suptitle('Peptide sequence: '+''.join(pmb.protein_sequence_parser(sequence=sequence))) -plt.errorbar(pH_range, av_charge, yerr=err_charge, fmt = '-o', capsize=3, label='Simulation') -plt.plot(pH_range, Z_ref, '-o', color = 'b', label='Lunkad2021') -plt.plot(pH_range, Z_HH, "-k", label='Henderson-Hasselbach') -plt.axhline(y=0.0, color="gray", linestyle="--") -plt.xlabel('pH') -plt.ylabel('Net charge / e') -plt.legend() - -plt.subplot(1, 2, 2) -plt.errorbar(pH_range, av_rg, yerr=err_rg, fmt = '-o', capsize=3, label='Simulation') -plt.plot(pH_range, Rg_ref, '-o', color='b', label='Lunkad2021') -plt.xlabel('pH') -plt.ylabel('Radius of gyration / nm') -plt.legend() - -plt.show() diff --git a/tests/histatin5_peptide.py b/tests/histatin5_peptide.py deleted file mode 100644 index 338285e..0000000 --- a/tests/histatin5_peptide.py +++ /dev/null @@ -1,262 +0,0 @@ -""" -Script that simulates a dilute solution of histatin-5 peptide at very low salt concentration. -It calculates the peptide average charge and radius of gyration (using a constant pH simulation) -All parameters are taken from Ref. 1, whose results are here reproduced as reference. -This script is part of Sugar library and it is meant to serve as reference for it. - -Authors: Dr. Pablo M. Blanco (Charles University) and MsC. Albert Martinez (Royal College of Surgeons in Ireland) - -[1] Blanco, P. M., Madurga, S., Garcés, J. L., Mas, F., & Dias, R. S. - Influence of macromolecular crowding on the charge regulation of intrinsically disordered proteins. - Soft Matter, 17(3), 655-669,2021. - -""" -# Load espresso, pyMBE and other necessary libraries -import os -import sys -import inspect -import numpy as np -import pandas as pd -from tqdm import tqdm -import matplotlib.pyplot as plt -from matplotlib.style import use -import espressomd -from espressomd import interactions -from espressomd.io.writer import vtf -from espressomd import electrostatics - -# Load some functions from the handy_scripts library for convinience -from handy_scripts.handy_functions import setup_electrostatic_interactions -from handy_scripts.handy_functions import minimize_espresso_system_energy -from handy_scripts.handy_functions import setup_langevin_dynamics -from handy_scripts.handy_functions import block_analyze - -# Create an instance of pyMBE library -import pyMBE -pmb = pyMBE.pymbe_library() - -# The trajectories of the simulations will be stored using espresso built-up functions in separed files in the folder 'frames' -if not os.path.exists('./frames'): - os.makedirs('./frames') - -# Simulation parameters -pmb.set_reduced_units(unit_length=0.4*pmb.units.nm) -pH_range = np.linspace(2, 12, num=21) -Samples_per_pH = 1000 -MD_steps_per_sample = 1000 -steps_eq = int(Samples_per_pH/3) -N_samples_print = 10 # Write the trajectory every 100 samples -probability_reaction = 0.5 -SEED = 100 -dt = 0.001 -solvent_permitivity = 78.3 - -L = 22 * pmb.units.nm # Side of the simulation box - -# Peptide parameters -peptide_name = 'histidin5' -sequence='NH2-ASP-SER-HIS-ALA-LYS-ARG-HIS-HIS-GLY-TYR-LYS-ARG-LYS-PHE-HIS-GLU-LYS-HIS-HIS-SER-HIS-ARG-GLY-TYR-COOH' -model = '1beadAA' -calculated_peptide_concentration = 1.56e-4 *pmb.units.mol/pmb.units.L -bead_size = 0.4*pmb.units.nm -epsilon = 1*pmb.units('reduced_energy') - -# Solution parameters -cation_name = 'Na' -anion_name = 'Cl' -c_salt = 5e-3 * pmb.units.mol/ pmb.units.L - -pmb.define_particle(name=cation_name, q=1, diameter=0.2*pmb.units.nm, epsilon=epsilon) -pmb.define_particle(name=anion_name, q=-1, diameter=0.36*pmb.units.nm, epsilon=epsilon) - -# use generic parameters for all beads in the peptide -acidic_aminoacids = ['c','E','D','Y','C'] -basic_aminoacids = ['R','n','K','H'] -N_aminoacids = len (pmb.protein_sequence_parser(sequence=sequence)) - -# Load pKa set -path_to_pka=pmb.get_resource("reference_parameters/pka_sets/Nozaki1967.txt") -pmb.load_pka_set (path_to_pka) - - -already_defined_AA=[] -for aminoacid_key in pmb.protein_sequence_parser(sequence=sequence): - if aminoacid_key in already_defined_AA: - continue - if aminoacid_key in acidic_aminoacids: - pmb.define_particle (name=aminoacid_key, acidity='acidic', diameter=bead_size, epsilon=epsilon) - elif aminoacid_key in basic_aminoacids: - pmb.define_particle (name=aminoacid_key, acidity='basic', diameter=bead_size, epsilon=epsilon) - else: - pmb.define_particle (name=aminoacid_key, q=0, diameter=bead_size, epsilon=epsilon) - already_defined_AA.append(aminoacid_key) - -generic_bond_lenght=0.4 * pmb.units.nm -generic_harmonic_constant=0.41 * pmb.units.N / pmb.units.m -generic_bond = interactions.HarmonicBond(k=generic_harmonic_constant.to('reduced_energy / reduced_length**2').magnitude, - r_0=generic_bond_lenght.to('reduced_length').magnitude) - -pmb.define_default_bond(bond_object=generic_bond, bond_type="harmonic") - -# Define the peptide in the pyMBE data frame - -pmb.define_peptide (name=peptide_name, sequence=sequence, model=model) - -# Create an instance of an espresso system - -espresso_system = espressomd.System(box_l=[L.to('reduced_length').magnitude]*3) - -# Add all bonds to espresso system -pmb.add_bonds_to_espresso(espresso_system=espresso_system) - -# Create your molecules into the espresso system - -N_peptide_chains = int(L**3*calculated_peptide_concentration*pmb.N_A) -pmb.create_pmb_object (name=peptide_name, number_of_objects= N_peptide_chains, espresso_system=espresso_system, use_default_bond=True, position = [[L.to('reduced_length').magnitude/2]*3]) - -with open('frames/trajectory0.vtf', mode='w+t') as coordinates: - vtf.writevsf(espresso_system, coordinates) - vtf.writevcf(espresso_system, coordinates) - -# Create counterions for the peptide chains -pmb.create_counterions(object_name=peptide_name,cation_name=cation_name,anion_name=anion_name,espresso_system=espresso_system) -c_salt_calculated = pmb.create_added_salt(espresso_system=espresso_system,cation_name=cation_name,anion_name=anion_name,c_salt=c_salt) - -#List of ionisible groups -basic_groups = pmb.df.loc[(~pmb.df['particle_id'].isna()) & (pmb.df['acidity']=='basic')].name.drop_duplicates().to_list() -acidic_groups = pmb.df.loc[(~pmb.df['particle_id'].isna()) & (pmb.df['acidity']=='acidic')].name.drop_duplicates().to_list() -list_ionisible_groups = basic_groups + acidic_groups -total_ionisible_groups = len (list_ionisible_groups) - -print('The box length of your system is', L.to('reduced_length'), L.to('nm')) -print('The peptide concentration in your system is ', calculated_peptide_concentration.to('mol/L') , 'with', N_peptide_chains, 'peptides') -print('The ionisable groups in your peptide are ', list_ionisible_groups) - -# Setup the acid-base reactions of the peptide using the constant pH ensemble -RE, sucessfull_reactions_labels = pmb.setup_cpH(counter_ion=cation_name, constant_pH=2, SEED = SEED ) -print('The acid-base reaction has been sucessfully setup for ', sucessfull_reactions_labels) - -type_map =pmb.get_type_map() -types = list (type_map.values()) -espresso_system.setup_type_map( type_list = types) - -# Setup the non-interacting type for speeding up the sampling of the reactions -non_interacting_type = max(type_map.values())+1 -RE.set_non_interacting_type (type=non_interacting_type) -print('The non interacting type is set to ', non_interacting_type) - -#Setup the potential energy -pmb.setup_lj_interactions(espresso_system=espresso_system) -minimize_espresso_system_energy (espresso_system=espresso_system) -setup_electrostatic_interactions(units=pmb.units, - espresso_system=espresso_system, - kT=pmb.kT) -minimize_espresso_system_energy (espresso_system=espresso_system) - - -setup_langevin_dynamics(espresso_system=espresso_system, - kT = pmb.kT, - SEED = SEED, - time_step=dt, - tune_skin=False) - -espresso_system.cell_system.skin=0.4 - -#Save the initial state -with open('frames/trajectory1.vtf', mode='w+t') as coordinates: - vtf.writevsf(espresso_system, coordinates) - vtf.writevcf(espresso_system, coordinates) - -Rg_pH=[] -Z_pH=[] -N_frame=0 - -particle_id_list = pmb.get_particle_id_map(object_name=peptide_name)["all"] -first_peptide_id = min(particle_id_list) - -#Save thepyMBE dataframe in a CSV file -pmb.df.to_csv('df.csv') - -# Main loop for performing simulations at different pH-values - -for index in tqdm(range(len(pH_range))): - - # Sample list inicialization - pH_value=pH_range[index] - print (pH_value) - Z_sim=[] - Rg_sim=[] - RE.constant_pH = pH_value - - # Inner loop for sampling each pH value - for step in range(Samples_per_pH+steps_eq): - if np.random.random() > probability_reaction: - espresso_system.integrator.run(steps=MD_steps_per_sample) - else: - RE.reaction(reaction_steps=total_ionisible_groups) - - if ( step > steps_eq): - # Get peptide net charge - charge_dict=pmb.calculate_net_charge(espresso_system=espresso_system, - molecule_name=peptide_name) - Z_sim.append(charge_dict["mean"]) - - Rg = espresso_system.analysis.calc_rg(chain_start=first_peptide_id, number_of_chains=N_peptide_chains, chain_length=len(particle_id_list)) - Rg_value = pmb.units.Quantity(Rg[0], 'reduced_length') - Rg_nm = Rg_value.to('nm').magnitude - Rg_sim.append(Rg_nm) - - if (step % N_samples_print == 0) : - N_frame+=1 - with open('frames/trajectory'+str(N_frame)+'.vtf', mode='w+t') as coordinates: - vtf.writevsf(espresso_system, coordinates) - vtf.writevcf(espresso_system, coordinates) - - Z_pH.append(Z_sim) - Rg_pH.append(Rg_sim) - print("pH = {:6.4g} done".format(pH_value)) - -# Estimate the statistical error and the autocorrelation time of the data - -print("Net charge analysis") -av_charge, err_charge, tau_charge, block_size = block_analyze(input_data=pmb.np.array(Z_pH)) - -print("Rg analysis") -av_rg, err_rg, tau_rg, block_size = block_analyze(input_data=Rg_pH) - -# Calculate the ideal titration curve of the peptide with Henderson-Hasselbach equation - -Z_HH = pmb.calculate_HH(object_name=peptide_name, - pH_list=pH_range) - -# Load the reference data -path_to_ref=pmb.get_resource("reference_data") -reference_data = np.loadtxt(f"{path_to_ref}/histatin5_SoftMatter.txt", delimiter=",") - -Z_ref=reference_data[:,1] -Z_err_ref=reference_data[:,2] - -Rg_ref=reference_data[:,4]/10 -Rg_err_ref=reference_data[:,5]/10 - -# Plot the results - -plt.figure(figsize=[11, 9]) -plt.subplot(1, 2, 1) -plt.suptitle('Peptide sequence: '+''.join(pmb.protein_sequence_parser(sequence=sequence))) -plt.errorbar(pH_range, av_charge, yerr=err_charge, fmt = '-o', capsize=3, label='Simulation') -plt.errorbar(pH_range, Z_ref, yerr=Z_err_ref, color='b', fmt = '-o', ecolor = 'b', capsize=3, label='Blanco2021') -plt.plot(pH_range, Z_HH, "-k", label='Henderson-Hasselbach') -plt.axhline(y=0.0, color="gray", linestyle="--") -plt.xlabel('pH') -plt.ylabel('Charge of Histatin-5 / e') -plt.legend() - -plt.subplot(1, 2, 2) -plt.errorbar(pH_range, av_rg, yerr=err_rg, fmt = '-o', capsize=3, label='Simulation') -plt.errorbar(pH_range, Rg_ref, yerr=Rg_err_ref, color='b', fmt = '-o', ecolor = 'b', capsize=3, label='Blanco2021') -plt.xlabel('pH') -plt.ylabel('Radius of gyration of Histatin-5 / nm') -plt.legend() - -plt.show() diff --git a/testsuite/LYS_ASP_peptide.py b/testsuite/LYS_ASP_peptide.py deleted file mode 100644 index 3389e2e..0000000 --- a/testsuite/LYS_ASP_peptide.py +++ /dev/null @@ -1,199 +0,0 @@ -# Load espresso, pyMBE and other necessary libraries -import os -import sys -import inspect -import numpy as np -import pandas as pd -from tqdm import tqdm -import espressomd -from espressomd import interactions -from espressomd.io.writer import vtf -from espressomd import electrostatics - -# Create an instance of pyMBE library -import pyMBE -pmb = pyMBE.pymbe_library() - -# Load some functions from the handy_scripts library for convinience -from handy_scripts.handy_functions import setup_electrostatic_interactions -from handy_scripts.handy_functions import minimize_espresso_system_energy -from handy_scripts.handy_functions import setup_langevin_dynamics -from handy_scripts.handy_functions import block_analyze - -# The trajectories of the simulations will be stored using espresso built-up functions in separed files in the folder 'frames' -if not os.path.exists('./frames'): - os.makedirs('./frames') - -# Simulation parameters -pmb.set_reduced_units(unit_length=0.4*pmb.units.nm) -pH_range = np.linspace(2, 12, num=21) -Samples_per_pH = 36 -MD_steps_per_sample = 50 -steps_eq =int(Samples_per_pH/3) -N_samples_print = 10 # Write the trajectory every 100 samples -probability_reaction = 0.5 -SEED = 1 -dt = 0.01 -solvent_permitivity = 78.3 - -L = 25.513*pmb.units.nm - -# Peptide parameters -N_aminoacids = 5 -sequence = 'K'*N_aminoacids+'D'*N_aminoacids -model = '2beadAA' # Model with 2 beads per each aminoacid -pep_concentration = 1e-4 *pmb.units.mol/pmb.units.L - -# Solution parameters -cation_name = 'Na' -anion_name = 'Cl' -c_salt = 1e-2 * pmb.units.mol/ pmb.units.L - -# Define salt parameters - -pmb.define_particle( name=cation_name, q=1, diameter=0.35*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) -pmb.define_particle( name=anion_name, q=-1, diameter=0.35*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) - -# Load peptide parametrization from Lunkad, R. et al. Molecular Systems Design & Engineering (2021), 6(2), 122-131. - -pmb.load_interaction_parameters (filename=pmb.get_resource('reference_parameters/interaction_parameters/Lunkad2021.txt')) -pmb.load_pka_set (filename=pmb.get_resource('reference_parameters/pka_sets/CRC1991.txt')) - -# Define the peptide on the pyMBE dataframe -pmb.define_peptide( name=sequence, sequence=sequence, model=model) - -# System parameters -volume = L**3 -N_peptide_chains = int ( volume * pmb.N_A * pep_concentration) -L = volume ** (1./3.) # Side of the simulation box -calculated_peptide_concentration = N_peptide_chains/(volume*pmb.N_A) - -# Create an instance of an espresso system -espresso_system = espressomd.System(box_l=[L.to('reduced_length').magnitude]*3) - -# Add all bonds to espresso system -pmb.add_bonds_to_espresso (espresso_system=espresso_system) - -# Create your molecules into the espresso system - -pmb.create_pmb_object (name=sequence, number_of_objects= N_peptide_chains,espresso_system=espresso_system, use_default_bond=True) - - -# Create counterions for the peptide chains -pmb.create_counterions (object_name=sequence,cation_name=cation_name,anion_name=anion_name,espresso_system=espresso_system) -c_salt_calculated = pmb.create_added_salt(espresso_system=espresso_system,cation_name=cation_name,anion_name=anion_name,c_salt=c_salt) - - -#List of ionisible groups -basic_groups = pmb.df.loc[(~pmb.df['particle_id'].isna()) & (pmb.df['acidity']=='basic')].name.to_list() -acidic_groups = pmb.df.loc[(~pmb.df['particle_id'].isna()) & (pmb.df['acidity']=='acidic')].name.to_list() -list_ionisible_groups = basic_groups + acidic_groups -total_ionisible_groups = len (list_ionisible_groups) - -print("The box length of your system is", L.to('reduced_length'), L.to('nm')) -print('The peptide concentration in your system is ', calculated_peptide_concentration.to('mol/L') , 'with', N_peptide_chains, 'peptides') -print('The ionisable groups in your peptide are ', list_ionisible_groups) - -# Setup the acid-base reactions of the peptide using the constant pH ensemble -RE, sucessfull_reactions_labels = pmb.setup_cpH(counter_ion=cation_name, constant_pH=2, SEED = SEED) -print('The acid-base reaction has been sucessfully setup for ', sucessfull_reactions_labels) - -# Setup espresso to track the each type defined in type_map -type_map = pmb.get_type_map() -types = list (type_map.values()) -espresso_system.setup_type_map( type_list = types) - -# Setup the non-interacting type for speeding up the sampling of the reactions -non_interacting_type = max(type_map.values())+1 -RE.set_non_interacting_type (type=non_interacting_type) -print('The non interacting type is set to ', non_interacting_type) - -# Setup the potential energy -pmb.setup_lj_interactions(espresso_system=espresso_system) -minimize_espresso_system_energy (espresso_system=espresso_system) -setup_electrostatic_interactions(units=pmb.units, - espresso_system=espresso_system, - kT=pmb.kT) -minimize_espresso_system_energy (espresso_system=espresso_system) - - -setup_langevin_dynamics (espresso_system=espresso_system, - kT = pmb.kT, - SEED = SEED, - time_step=dt, - tune_skin=False) - -espresso_system.cell_system.skin=0.4 - -# Save the initial state -with open('frames/trajectory1.vtf', mode='w+t') as coordinates: - vtf.writevsf(espresso_system, coordinates) - vtf.writevcf(espresso_system, coordinates) - -N_frame=0 -Z_pH=[] # List of the average global charge at each pH -Rg_pH=[] - -particle_id_list = pmb.get_particle_id_map(object_name=sequence)["all"] -first_peptide_id = min(particle_id_list) - -#Save the pyMBE dataframe in a CSV file -pmb.df.to_csv('df.csv') - -for index in (pbar := tqdm(range(len(pH_range)))): - # Sample list inicialization - pH_value=pH_range[index] - Z_sim=[] - Rg_sim=[] - Z_groups_time_series=[] - RE.constant_pH = pH_value - pbar.set_description(f"pH = {pH_value:2.1f}") - - # Inner loop for sampling each pH value - for step in range(Samples_per_pH+steps_eq): - - if np.random.random() > probability_reaction: - espresso_system.integrator.run(steps=MD_steps_per_sample) - else: - RE.reaction(reaction_steps=total_ionisible_groups) - - if ( step > steps_eq): - # Get peptide net charge - charge_dict=pmb.calculate_net_charge(espresso_system=espresso_system, - molecule_name=sequence) - Z_sim.append(charge_dict["mean"]) - # Get peptide radius of gyration - Rg = espresso_system.analysis.calc_rg(chain_start=first_peptide_id, number_of_chains=N_peptide_chains, chain_length=len(particle_id_list)) - Rg_value = pmb.units.Quantity(Rg[0], 'reduced_length') - Rg_nm = Rg_value.to('nm').magnitude - Rg_sim.append(Rg_nm) - - if (step % N_samples_print == 0) : - - N_frame+=1 - with open('frames/trajectory'+str(N_frame)+'.vtf', mode='w+t') as coordinates: - vtf.writevsf(espresso_system, coordinates) - vtf.writevcf(espresso_system, coordinates) - - - Z_pH.append(np.array(Z_sim)) - Rg_pH.append(Rg_sim) - -print("Net charge analysis") -av_charge, err_charge, tau_charge, block_size = block_analyze(input_data=pmb.np.array(Z_pH)) - -print("Rg analysis") -av_rg, err_rg, tau_rg, block_size = block_analyze(input_data=Rg_pH) - -# Calculate the ideal titration curve of the peptide with Henderson-Hasselbach equation -Z_HH = pmb.calculate_HH(object_name=sequence, - pH_list=pH_range) - -# Load the reference data -reference_file_Path = pmb.get_resource("reference_data/Lys-AspMSDE.csv") -reference_data = pd.read_csv(reference_file_Path) - -Z_ref = N_aminoacids*-1*reference_data['aaa']+N_aminoacids*reference_data['aab'] -Rg_ref = reference_data['arg']*0.37 - -np.testing.assert_allclose(np.copy(av_charge), Z_ref.to_numpy(), atol=2.5, rtol=0.) diff --git a/testsuite/data/Blanco2020a.csv b/testsuite/data/Blanco2020a.csv new file mode 100644 index 0000000..847b6fa --- /dev/null +++ b/testsuite/data/Blanco2020a.csv @@ -0,0 +1,22 @@ +pH,charge,charge_error +2.0,13.674,0.00692820323028 +2.5,12.891,0.00640312423743 +3.0,12.266,0.00469041575982 +3.5,11.788,0.00761577310586 +4.0,11.169,0.00721110255093 +4.5,10.167,0.00721110255093 +5.0,8.853,0.00714142842854 +5.5,7.447,0.00707106781187 +6.0,6.252,0.0636631761696 +6.5,5.416,0.0637024332345 +7.0,4.837,0.0637024332345 +7.5,4.278,0.0895321171424 +8.0,3.69,0.0895823643358 +8.5,3.112,0.089554452709 +9.0,2.501,0.089554452709 +9.5,1.729,0.0895377015564 +10.0,0.826,0.0895879456177 +10.5,-0.115,0.0634428877022 +11.0,-0.946,0.0634428877022 +11.5,-1.654,0.0634428877022 +12.0,-2.275,0.00721110255093 diff --git a/testsuite/data/Lunkad2021a.csv b/testsuite/data/Lunkad2021a.csv new file mode 100644 index 0000000..120f0c4 --- /dev/null +++ b/testsuite/data/Lunkad2021a.csv @@ -0,0 +1,22 @@ +pH,charge,charge_error +2.0,4.328030899613755,0.0538962451412445 +2.5,3.676594042574468,0.06369778710295225 +3.0,2.846670666616667,0.0691114121594015 +3.5,1.9688966387920153,0.06892040676649949 +4.0,1.1622067224159691,0.06312241346943875 +4.5,0.5701116236047055,0.05065347517351694 +5.0,0.22044849439381942,0.03501732616728689 +5.5,0.07649279384007635,0.021428102306625365 +6.0,0.024360945488182573,0.01246182548157354 +6.5,0.0072299096261296825,0.007105971896087083 +7.0,0.0009874876564035517,0.004790283141830334 +7.5,-0.003301208734892036,0.005269950644776093 +8.0,-0.012316096048799352,0.009043092692059515 +8.5,-0.04268946638167215,0.016169568639180957 +9.0,-0.12507093661329272,0.02716407000198067 +9.5,-0.3434894563817936,0.04259887933043685 +10.0,-0.8331858351770594,0.05840629764257237 +10.5,-1.612156098048775,0.06884781788167588 +11.0,-2.5490993862576716,0.07198577122196975 +11.5,-3.48038649516881,0.06908347786356787 +12.0,-4.236439544505694,0.05791528997068287 diff --git a/testsuite/data/Lunkad2021b.csv b/testsuite/data/Lunkad2021b.csv new file mode 100644 index 0000000..804c0c5 --- /dev/null +++ b/testsuite/data/Lunkad2021b.csv @@ -0,0 +1,22 @@ +pH,charge,charge_error +2.0,4.769976625292183,0.03589861247790809 +2.5,4.403126210922363,0.05183214139258131 +3.0,3.767890401369984,0.06473042658758646 +3.5,2.910399870001626,0.07297264097441067 +4.0,1.9509056136798293,0.076159435380892 +4.5,1.01215734803315,0.07623737867900791 +5.0,0.1300896238797007,0.07517492641612307 +5.5,-0.7458756765540429,0.07613015058623873 +6.0,-1.6825464681691473,0.07653887855278552 +6.5,-2.684907688653892,0.07382705658513389 +7.0,-3.5904288696391298,0.0687849849211043 +7.5,-4.312922338470769,0.05559908936108033 +8.0,-4.725088436394546,0.0387987480746436 +8.5,-4.9062336720791,0.02385030140762049 +9.0,-4.967755403057463,0.013989601304319406 +9.5,-4.989578880263996,0.008034723183410933 +10.0,-4.997462531718353,0.004017910847679971 +10.5,-4.998956263046711,0.0025471341039558855 +11.0,-4.999727503406207,0.001303259008174 +11.5,-4.999813752328096,0.0010779120839355 +12.0,-5.0,0.0 diff --git a/reference_data/1beb-10mM-torres.dat b/testsuite/data/src/1beb-10mM-torres.dat similarity index 100% rename from reference_data/1beb-10mM-torres.dat rename to testsuite/data/src/1beb-10mM-torres.dat diff --git a/reference_data/1f6s-10mM-torres.dat b/testsuite/data/src/1f6s-10mM-torres.dat similarity index 100% rename from reference_data/1f6s-10mM-torres.dat rename to testsuite/data/src/1f6s-10mM-torres.dat diff --git a/reference_data/Glu-HisMSDE.csv b/testsuite/data/src/Glu-HisMSDE.csv similarity index 100% rename from reference_data/Glu-HisMSDE.csv rename to testsuite/data/src/Glu-HisMSDE.csv diff --git a/reference_data/Lys-AspMSDE.csv b/testsuite/data/src/Lys-AspMSDE.csv similarity index 100% rename from reference_data/Lys-AspMSDE.csv rename to testsuite/data/src/Lys-AspMSDE.csv diff --git a/reference_data/data_landsgesell.csv b/testsuite/data/src/data_landsgesell.csv similarity index 100% rename from reference_data/data_landsgesell.csv rename to testsuite/data/src/data_landsgesell.csv diff --git a/reference_data/excess_chemical_potential.dat b/testsuite/data/src/excess_chemical_potential.dat similarity index 100% rename from reference_data/excess_chemical_potential.dat rename to testsuite/data/src/excess_chemical_potential.dat diff --git a/reference_data/histatin5_SoftMatter.txt b/testsuite/data/src/histatin5_SoftMatter.txt similarity index 100% rename from reference_data/histatin5_SoftMatter.txt rename to testsuite/data/src/histatin5_SoftMatter.txt diff --git a/testsuite/peptide_tests.py b/testsuite/peptide_tests.py new file mode 100644 index 0000000..b3f13d8 --- /dev/null +++ b/testsuite/peptide_tests.py @@ -0,0 +1,82 @@ +# Import pyMBE and other libraries +import pyMBE +from lib import analysis +import os +import tempfile +import subprocess +import numpy as np +import pandas as pd + +# Template of the test + +def run_peptide_test(script_path,test_pH_values,sequence,rtol,atol,mode="test"): + """ + Runs a set of tests for a given peptide sequence. + + Args: + script_path(`str`): Path to the script to run the test. + test_pH_values(`lst`): List of pH values to be tested. + sequence(`str`): Amino acid sequence of the peptide. + """ + valid_modes=["test","save"] + assert mode in valid_modes, f"Mode {mode} not supported, valid modes: {valid_modes}" + + print(f"Running tests for {sequence}") + with tempfile.TemporaryDirectory() as time_series_path: + for pH in test_pH_values: + print(f"pH = {pH}") + run_command=["python3", script_path, "--sequence", sequence, "--pH", str(pH), "--mode", "test", "--no_verbose", "--output", time_series_path] + print(subprocess.list2cmdline(run_command)) + subprocess.check_output(run_command) + # Analyze all time series + data=analysis.analyze_time_series(path_to_datafolder=time_series_path) + data_path=pmb.get_resource(path="testsuite/peptide_tests_data") + if mode == "test": + # Get reference test data + ref_data=pd.read_csv(data_path+f"/{sequence}.csv", header=[0, 1]) + # Check charge + test_charge=np.sort(data["mean","charge"].to_numpy()) + ref_charge=np.sort(ref_data["mean","charge"].to_numpy()) + np.testing.assert_allclose(test_charge, ref_charge, rtol=rtol, atol=atol) + # Check rg + test_rg=np.sort(data["mean","rg"].to_numpy()) + ref_rg=np.sort(ref_data["mean","rg"].to_numpy()) + np.testing.assert_allclose(test_rg, ref_rg, rtol=rtol, atol=atol) + print(f"Test for {sequence} was successful") + elif mode == "save": + # Save data for future testing + data.to_csv(f"{data_path}/{sequence}.csv", index=False) + else: + raise RuntimeError + +# Create an instance of pyMBE library +pmb = pyMBE.pymbe_library() + +script_path=pmb.get_resource(f"samples/Beyer2024/peptide.py") +test_pH_values=[3,7,11] +rtol=0.1 # relative tolerance +atol=0.5 # absolute tolerance + +# Run test for K_5-D_5 case +sequence="K"*5+"D"*5 +run_peptide_test(script_path=script_path, + test_pH_values=test_pH_values, + sequence=sequence, + rtol=rtol, + atol=atol) + +# Run test for E_5-H_5 case +sequence="E"*5+"H"*5 +run_peptide_test(script_path=script_path, + test_pH_values=test_pH_values, + sequence=sequence, + rtol=rtol, + atol=atol) + +# Run test for histatin-5 case +sequence="nDSHAKRHHGYKRKFHEKHHSHRGYc" +run_peptide_test(script_path=script_path, + test_pH_values=test_pH_values, + sequence=sequence, + rtol=rtol, + atol=atol) diff --git a/testsuite/peptide_tests_data/EEEEEHHHHH.csv b/testsuite/peptide_tests_data/EEEEEHHHHH.csv new file mode 100644 index 0000000..2029cbc --- /dev/null +++ b/testsuite/peptide_tests_data/EEEEEHHHHH.csv @@ -0,0 +1,5 @@ +pH,sequence,n_blocks,block_size,mean,mean,err_mean,err_mean,n_eff,n_eff,tau_int,tau_int +value,value,nan,nan,charge,rg,charge,rg,charge,rg,charge,rg +3,EEEEEHHHHH,16.0,28.125,3.74,2.2930545511345084,0.09668283176667573,0.012248005496732568,63.9924303361178,386.504157031771,24.61228604323432,4.074988512749192 +7,EEEEEHHHHH,16.0,28.125,-3.462222222222222,2.282215370136204,0.08152363902529337,0.02392826711272775,107.85755044145347,125.10472480386765,14.602593824798644,12.589452576659077 +11,EEEEEHHHHH,16.0,28.125,-5.0,2.379941392407537,0.0,0.015862071975736733,,271.3603994805873,,5.804089333038255 diff --git a/testsuite/peptide_tests_data/KKKKKDDDDD.csv b/testsuite/peptide_tests_data/KKKKKDDDDD.csv new file mode 100644 index 0000000..8a89941 --- /dev/null +++ b/testsuite/peptide_tests_data/KKKKKDDDDD.csv @@ -0,0 +1,5 @@ +pH,sequence,n_blocks,block_size,mean,mean,err_mean,err_mean,n_eff,n_eff,tau_int,tau_int +value,value,nan,nan,charge,rg,charge,rg,charge,rg,charge,rg +3,KKKKKDDDDD,16.0,28.125,2.92,2.24224133500218,0.10683895616817528,0.016899080996828193,66.55820017221221,218.86820380470385,23.66350045462667,7.196111507542911 +7,KKKKKDDDDD,16.0,28.125,0.0022222222222222222,2.061724695770055,0.002305347229885367,0.010022432237558167,418.13333333333344,337.82731077353264,3.7667410715105456,4.662145273062604 +11,KKKKKDDDDD,16.0,28.125,-2.537777777777778,2.2240114798138597,0.09417150320822036,0.016801648569199405,91.37898210342641,223.8836649942004,17.235910969676027,7.034903596361424 diff --git a/testsuite/peptide_tests_data/nDSHAKRHHGYKRKFHEKHHSHRGYc.csv b/testsuite/peptide_tests_data/nDSHAKRHHGYKRKFHEKHHSHRGYc.csv new file mode 100644 index 0000000..743f138 --- /dev/null +++ b/testsuite/peptide_tests_data/nDSHAKRHHGYKRKFHEKHHSHRGYc.csv @@ -0,0 +1,5 @@ +pH,sequence,n_blocks,block_size,mean,mean,err_mean,err_mean,n_eff,n_eff,tau_int,tau_int +value,value,nan,nan,charge,rg,charge,rg,charge,rg,charge,rg +7,nDSHAKRHHGYKRKFHEKHHSHRGYc,16.0,28.125,5.044444444444444,4.279252793720242,0.05798349244388049,0.09771700380997232,155.74642000226976,71.28000135030244,10.112591994161557,22.095959177862643 +3,nDSHAKRHHGYKRKFHEKHHSHRGYc,16.0,28.125,12.642222222222221,5.277371251802207,0.046374949253443896,0.11404609215228863,287.2696477457004,55.72484859428179,5.482653710177252,28.26387221796589 +11,nDSHAKRHHGYKRKFHEKHHSHRGYc,16.0,28.125,-1.1377777777777778,3.925277367120465,0.046895855319717836,0.0915024823081239,297.1873784737742,63.15113924203325,5.299686709855564,24.940167650783412