From c2c4b21bb93b1e219d21d3d776fa49fa393f6728 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20Ko=C5=A1ovan?= Date: Fri, 13 Sep 2024 14:00:09 +0200 Subject: [PATCH 1/6] adopted peptide.py to work with the current version of block_analyze, removed some examples of bad practice (not all of them yet) --- samples/peptide.py | 128 ++++++++++++++++++++------------------------- 1 file changed, 56 insertions(+), 72 deletions(-) diff --git a/samples/peptide.py b/samples/peptide.py index 08e4e97..bb0fa29 100644 --- a/samples/peptide.py +++ b/samples/peptide.py @@ -20,6 +20,7 @@ import espressomd import numpy as np import matplotlib.pyplot as plt +import pandas as pd from tqdm import tqdm from espressomd.io.writer import vtf from pathlib import Path @@ -40,22 +41,29 @@ # Simulation parameters pmb.set_reduced_units(unit_length=0.4*pmb.units.nm) -pH_range = np.linspace(2, 12, num=20) -Samples_per_pH = 1000 -MD_steps_per_sample = 1000 -steps_eq = int(Samples_per_pH/3) -N_samples_print = 10 # Write the trajectory every 100 samples -probability_reaction =0.5 +pH_value = 7 +N_samples = 100 # to make the demonstration quick, we set this to a very low value +MD_steps_per_sample = 100 # to make the demonstration quick, we set this to a very low value +N_samples_print = 10 # Write the full trajectory data every X samples LANGEVIN_SEED = 100 -dt = 0.001 +dt = 0.01 solvent_permitivity = 78.3 # Peptide parameters - -sequence = 'EEEEDDDD' +sequence = 'EEEEDDDD' # 8 ionizable side-chains model = '2beadAA' # Model with 2 beads per each aminoacid -pep_concentration = 5.56e-4 *pmb.units.mol/pmb.units.L -N_peptide_chains = 4 +pep_concentration = 5e-4 *pmb.units.mol/pmb.units.L # mols of peptide chains +N_peptide_chains = 1 + +# Solution parameters +cation_name = 'Na' +anion_name = 'Cl' +c_salt=5e-3 * pmb.units.mol/ pmb.units.L + +# Other system parameters, derived from those above +volume = N_peptide_chains/(pmb.N_A*pep_concentration) +L = volume ** (1./3.) # Side of the simulation box +calculated_peptide_concentration = N_peptide_chains/(volume*pmb.N_A) # Load peptide parametrization from Lunkad, R. et al. Molecular Systems Design & Engineering (2021), 6(2), 122-131. @@ -79,19 +87,9 @@ peptide_name = 'generic_peptide' pmb.define_peptide (name=peptide_name, sequence=sequence, model=model) -# Solution parameters -cation_name = 'Na' -anion_name = 'Cl' -c_salt=5e-3 * pmb.units.mol/ pmb.units.L - pmb.define_particle(name=cation_name, z=1, sigma=0.35*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) pmb.define_particle(name=anion_name, z=-1, sigma=0.35*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) -# System parameters -volume = N_peptide_chains/(pmb.N_A*pep_concentration) -L = volume ** (1./3.) # Side of the simulation box -calculated_peptide_concentration = N_peptide_chains/(volume*pmb.N_A) - # Create an instance of an espresso system espresso_system=espressomd.System (box_l = [L.to('reduced_length').magnitude]*3) @@ -102,6 +100,8 @@ pmb.create_pmb_object(name=peptide_name, number_of_objects= N_peptide_chains,espresso_system=espresso_system, use_default_bond=True) pmb.create_counterions(object_name=peptide_name,cation_name=cation_name,anion_name=anion_name,espresso_system=espresso_system) # Create counterions for the peptide chains +# check what is the actual salt concentration in the box +# if the number of salt ions is a small integer, then the actual and desired salt concentration may significantly differ c_salt_calculated = pmb.create_added_salt(espresso_system=espresso_system,cation_name=cation_name,anion_name=anion_name,c_salt=c_salt) with open('frames/trajectory0.vtf', mode='w+t') as coordinates: @@ -118,7 +118,7 @@ print('The peptide concentration in your system is ', calculated_peptide_concentration.to('mol/L') , 'with', N_peptide_chains, 'peptides') print('The ionisable groups in your peptide are ', list_ionisible_groups) -cpH, labels = pmb.setup_cpH(counter_ion=cation_name, constant_pH=2) +cpH, labels = pmb.setup_cpH(counter_ion=cation_name, constant_pH=pH_value) print('The acid-base reaction has been sucessfully setup for ', labels) # Setup espresso to track the ionization of the acid/basic groups in peptide @@ -129,81 +129,65 @@ # Setup the non-interacting type for speeding up the sampling of the reactions non_interacting_type = max(type_map.values())+1 cpH.set_non_interacting_type (type=non_interacting_type) -print('The non interacting type is set to ', non_interacting_type) +print('The non-interacting type is set to ', non_interacting_type) -#Setup the potential energy +##Setup the potential energy +print('Setup LJ interaction (this can take a few seconds)') pmb.setup_lj_interactions (espresso_system=espresso_system) +print('Minimize energy before adding electrostatics') minimize_espresso_system_energy (espresso_system=espresso_system) +print('Setup and tune electrostatics (this can take a few seconds)') setup_electrostatic_interactions(units=pmb.units, espresso_system=espresso_system, kT=pmb.kT) +print('Minimize energy after adding electrostatics') minimize_espresso_system_energy (espresso_system=espresso_system) +print('Setup Langeving dynamics') setup_langevin_dynamics(espresso_system=espresso_system, kT = pmb.kT, SEED = LANGEVIN_SEED, time_step=dt, tune_skin=False) - +# for this example, we use a hard-coded skin value; In general it should be optimized by tuning espresso_system.cell_system.skin=0.4 -N_frame=0 -Z_pH=[] # List of the average global charge at each pH - #Save the pyMBE dataframe in a CSV file pmb.write_pmb_df(filename='df.csv') +# Initialize the time series with arbitrary values at time = 0 +time_series={} # for convenience, here we save the whole time series in a python dictionary +time_series["time"] = [0.0] +time_series["charge"] = [0.0] +# In production, one should save the time series to a file, then read and analyze it after the simulation # Main loop for performing simulations at different pH-values +N_frame=0 +for sample in tqdm(range(N_samples)): -for index in tqdm(range(len(pH_range))): + # LD sampling of the configuration space + espresso_system.integrator.run(steps=MD_steps_per_sample) + # cpH sampling of the reaction space + cpH.reaction( reaction_steps = total_ionisible_groups) # rule of thumb: one reaction step per titratable group (on average) - pH_value=pH_range[index] - # Sample list inicialization - Z_sim=[] - cpH.constant_pH = pH_value - - # Inner loop for sampling each pH value - - for step in range(Samples_per_pH+steps_eq): - - if np.random.random() > probability_reaction: - espresso_system.integrator.run(steps=MD_steps_per_sample) - else: - cpH.reaction( reaction_steps = total_ionisible_groups) - - if step > steps_eq: - # Get peptide net charge - charge_dict=pmb.calculate_net_charge ( espresso_system=espresso_system, - molecule_name=peptide_name, - dimensionless=True) - Z_sim.append(charge_dict["mean"]) - - if step % N_samples_print == 0: - N_frame+=1 - with open('frames/trajectory'+str(N_frame)+'.vtf', mode='w+t') as coordinates: - vtf.writevsf(espresso_system, coordinates) - vtf.writevcf(espresso_system, coordinates) - - Z_pH.append(Z_sim) - print(f"pH = {pH_value:6.4g} done") + # Get peptide net charge + charge_dict=pmb.calculate_net_charge ( espresso_system=espresso_system, + molecule_name=peptide_name, + dimensionless=True) + time_series["time"].append(espresso_system.time) + time_series["charge"].append(charge_dict["mean"]) + if sample % N_samples_print == 0: + N_frame+=1 + with open('frames/trajectory'+str(N_frame)+'.vtf', mode='w+t') as coordinates: + vtf.writevsf(espresso_system, coordinates) + vtf.writevcf(espresso_system, coordinates) # Estimate the statistical error and the autocorrelation time of the data - -print("Net charge analysis") -av_net_charge, err_net_charge, tau_net_charge, block_size_net_charge = block_analyze(full_data=Z_pH) +print("Statistical analysis of the time series:") +av_net_charge, err_net_charge, tau_net_charge, block_size_net_charge = block_analyze(full_data=pd.DataFrame(time_series, columns=["time","charge"])) # Calculate the ideal titration curve of the peptide with Henderson-Hasselbach equation Z_HH = pmb.calculate_HH(molecule_name=peptide_name, - pH_list=pH_range) - -fig, ax = plt.subplots(figsize=(10, 7)) -plt.errorbar(pH_range, av_net_charge, yerr=err_net_charge, fmt = '-o', capsize=3, label='Simulation') -ax.plot(pH_range, Z_HH, "-k", label='Henderson-Hasselbach') -plt.legend() -plt.xlabel('pH') -plt.ylabel('Charge of the peptide / e') -plt.title(f'Peptide sequence: {sequence}') - -plt.show() + pH_list=[pH_value]) +print(f"average net charge = {av_net_charge}+/-{err_net_charge} (ideal value: {Z_HH})", ) From f2d3b21e502a2bada88452bad80aa3787038cc33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20Ko=C5=A1ovan?= Date: Fri, 13 Sep 2024 14:07:11 +0200 Subject: [PATCH 2/6] added some explanatory comments to README.md --- README.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 8b3d59b..dee74ac 100644 --- a/README.md +++ b/README.md @@ -63,11 +63,12 @@ sudo apt install python3-venv To set up pyMBE, the users need to install its virtual environment, install its Python dependencies and configure the path to the ESPResSo build folder as follows: ```sh -python3 -m venv pymbe -source pymbe/bin/activate -python3 maintainer/configure_venv.py --espresso_path=/home/user/espresso/build # adapt path +python3 -m venv pymbe # creates a local folder named pymbe, which contains the virtual environment +source pymbe/bin/activate # activates the pymbe venv +python3 maintainer/configure_venv.py --espresso_path=/home/user/espresso/build # please, adapt the espresso path accordingly python3 -m pip install -r requirements.txt -deactivate +python3 simulation_script.py # run the espresso simulation script +deactivate # deactivate the virtual environment ``` We highlight that the path `/home/user/espresso/build` is just an example of a possible From b18c73b76b1a315a9ad95db9e99989b678708bb8 Mon Sep 17 00:00:00 2001 From: blancoapa Date: Sun, 20 Oct 2024 14:25:09 +0200 Subject: [PATCH 3/6] remove block analysis, save time series, remove unnecesary imports --- samples/peptide.py | 136 ++++++++++++++++++++++++++++++--------------- 1 file changed, 90 insertions(+), 46 deletions(-) diff --git a/samples/peptide.py b/samples/peptide.py index bb0fa29..dd86e32 100644 --- a/samples/peptide.py +++ b/samples/peptide.py @@ -18,39 +18,50 @@ # Load espresso, pyMBE and other necessary libraries import espressomd -import numpy as np -import matplotlib.pyplot as plt import pandas as pd from tqdm import tqdm from espressomd.io.writer import vtf from pathlib import Path import pyMBE +import argparse # Create an instance of pyMBE library pmb = pyMBE.pymbe_library(seed=42) # Load some functions from the handy_scripts library for convinience -from lib.handy_functions import setup_electrostatic_interactions -from lib.handy_functions import minimize_espresso_system_energy -from lib.handy_functions import setup_langevin_dynamics -from lib.analysis import block_analyze +from lib.handy_functions import setup_electrostatic_interactions, minimize_espresso_system_energy,setup_langevin_dynamics +from lib.analysis import built_output_name + +parser = argparse.ArgumentParser(description='Sample script to run the pre-made peptide models with pyMBE') +parser.add_argument('--sequence', + type=str, + default= 'EEEEDDDD', # 8 ionizable side-chains + help='sequence of the peptide') +parser.add_argument('--pH', + type=float, + default=7, + help='pH of the solution') +parser.add_argument('--no_verbose', action='store_false', help="Switch to deactivate verbose",default=True) +args = parser.parse_args() # The trajectories of the simulations will be stored using espresso built-up functions in separed files in the folder 'frames' Path("./frames").mkdir(parents=True, exist_ok=True) # Simulation parameters -pmb.set_reduced_units(unit_length=0.4*pmb.units.nm) -pH_value = 7 +pmb.set_reduced_units(unit_length=0.4*pmb.units.nm, + verbose=False) +pH_value = args.pH N_samples = 100 # to make the demonstration quick, we set this to a very low value MD_steps_per_sample = 100 # to make the demonstration quick, we set this to a very low value N_samples_print = 10 # Write the full trajectory data every X samples LANGEVIN_SEED = 100 dt = 0.01 solvent_permitivity = 78.3 +verbose=args.no_verbose # Peptide parameters -sequence = 'EEEEDDDD' # 8 ionizable side-chains +sequence = args.sequence model = '2beadAA' # Model with 2 beads per each aminoacid pep_concentration = 5e-4 *pmb.units.mol/pmb.units.L # mols of peptide chains N_peptide_chains = 1 @@ -85,10 +96,18 @@ # Defines the peptide in the pyMBE data frame peptide_name = 'generic_peptide' -pmb.define_peptide (name=peptide_name, sequence=sequence, model=model) - -pmb.define_particle(name=cation_name, z=1, sigma=0.35*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) -pmb.define_particle(name=anion_name, z=-1, sigma=0.35*pmb.units.nm, epsilon=1*pmb.units('reduced_energy')) +pmb.define_peptide (name=peptide_name, + sequence=sequence, + model=model) + +pmb.define_particle(name=cation_name, + z=1, + sigma=0.35*pmb.units.nm, + epsilon=1*pmb.units('reduced_energy')) +pmb.define_particle(name=anion_name, + z=-1, + sigma=0.35*pmb.units.nm, + epsilon=1*pmb.units('reduced_energy')) # Create an instance of an espresso system espresso_system=espressomd.System (box_l = [L.to('reduced_length').magnitude]*3) @@ -97,12 +116,24 @@ pmb.add_bonds_to_espresso(espresso_system=espresso_system) # Create your molecules into the espresso system -pmb.create_pmb_object(name=peptide_name, number_of_objects= N_peptide_chains,espresso_system=espresso_system, use_default_bond=True) -pmb.create_counterions(object_name=peptide_name,cation_name=cation_name,anion_name=anion_name,espresso_system=espresso_system) # Create counterions for the peptide chains +pmb.create_pmb_object(name=peptide_name, + number_of_objects= N_peptide_chains, + espresso_system=espresso_system, + use_default_bond=True) +# Create counterions for the peptide chains +pmb.create_counterions(object_name=peptide_name, + cation_name=cation_name, + anion_name=anion_name, + espresso_system=espresso_system, + verbose=verbose) # check what is the actual salt concentration in the box # if the number of salt ions is a small integer, then the actual and desired salt concentration may significantly differ -c_salt_calculated = pmb.create_added_salt(espresso_system=espresso_system,cation_name=cation_name,anion_name=anion_name,c_salt=c_salt) +c_salt_calculated = pmb.create_added_salt(espresso_system=espresso_system, + cation_name=cation_name, + anion_name=anion_name, + c_salt=c_salt, + verbose=verbose) with open('frames/trajectory0.vtf', mode='w+t') as coordinates: vtf.writevsf(espresso_system, coordinates) @@ -114,12 +145,14 @@ list_ionisible_groups = basic_groups + acidic_groups total_ionisible_groups = len (list_ionisible_groups) -print("The box length of your system is", L.to('reduced_length'), L.to('nm')) -print('The peptide concentration in your system is ', calculated_peptide_concentration.to('mol/L') , 'with', N_peptide_chains, 'peptides') -print('The ionisable groups in your peptide are ', list_ionisible_groups) +if verbose: + print("The box length of your system is", L.to('reduced_length'), L.to('nm')) + print('The peptide concentration in your system is ', calculated_peptide_concentration.to('mol/L') , 'with', N_peptide_chains, 'peptides') + print('The ionisable groups in your peptide are ', list_ionisible_groups) cpH, labels = pmb.setup_cpH(counter_ion=cation_name, constant_pH=pH_value) -print('The acid-base reaction has been sucessfully setup for ', labels) +if verbose: + print('The acid-base reaction has been sucessfully setup for ', labels) # Setup espresso to track the ionization of the acid/basic groups in peptide type_map =pmb.get_type_map() @@ -129,27 +162,36 @@ # Setup the non-interacting type for speeding up the sampling of the reactions non_interacting_type = max(type_map.values())+1 cpH.set_non_interacting_type (type=non_interacting_type) -print('The non-interacting type is set to ', non_interacting_type) +if verbose: + print('The non-interacting type is set to ', non_interacting_type) ##Setup the potential energy -print('Setup LJ interaction (this can take a few seconds)') -pmb.setup_lj_interactions (espresso_system=espresso_system) -print('Minimize energy before adding electrostatics') -minimize_espresso_system_energy (espresso_system=espresso_system) -print('Setup and tune electrostatics (this can take a few seconds)') +if verbose: + print('Setup LJ interaction (this can take a few seconds)') +pmb.setup_lj_interactions (espresso_system=espresso_system, + warnings=verbose) +if verbose: + print('Minimize energy before adding electrostatics') +minimize_espresso_system_energy (espresso_system=espresso_system, + verbose=verbose) +if verbose: + print('Setup and tune electrostatics (this can take a few seconds)') setup_electrostatic_interactions(units=pmb.units, - espresso_system=espresso_system, - kT=pmb.kT) -print('Minimize energy after adding electrostatics') -minimize_espresso_system_energy (espresso_system=espresso_system) - - -print('Setup Langeving dynamics') + espresso_system=espresso_system, + kT=pmb.kT, + verbose=verbose) +if verbose: + print('Minimize energy after adding electrostatics') +minimize_espresso_system_energy (espresso_system=espresso_system, + verbose=verbose) + +if verbose: + print('Setup Langeving dynamics') setup_langevin_dynamics(espresso_system=espresso_system, - kT = pmb.kT, - SEED = LANGEVIN_SEED, - time_step=dt, - tune_skin=False) + kT = pmb.kT, + SEED = LANGEVIN_SEED, + time_step=dt, + tune_skin=False) # for this example, we use a hard-coded skin value; In general it should be optimized by tuning espresso_system.cell_system.skin=0.4 @@ -172,7 +214,7 @@ cpH.reaction( reaction_steps = total_ionisible_groups) # rule of thumb: one reaction step per titratable group (on average) # Get peptide net charge - charge_dict=pmb.calculate_net_charge ( espresso_system=espresso_system, + charge_dict=pmb.calculate_net_charge(espresso_system=espresso_system, molecule_name=peptide_name, dimensionless=True) time_series["time"].append(espresso_system.time) @@ -183,11 +225,13 @@ vtf.writevsf(espresso_system, coordinates) vtf.writevcf(espresso_system, coordinates) -# Estimate the statistical error and the autocorrelation time of the data -print("Statistical analysis of the time series:") -av_net_charge, err_net_charge, tau_net_charge, block_size_net_charge = block_analyze(full_data=pd.DataFrame(time_series, columns=["time","charge"])) - -# Calculate the ideal titration curve of the peptide with Henderson-Hasselbach equation -Z_HH = pmb.calculate_HH(molecule_name=peptide_name, - pH_list=[pH_value]) -print(f"average net charge = {av_net_charge}+/-{err_net_charge} (ideal value: {Z_HH})", ) +# Store time series +data_path=pmb.get_resource(path="samples")+"/time_series/peptide" +Path(data_path).mkdir(parents=True, + exist_ok=True) +time_series=pd.DataFrame(time_series) +filename=built_output_name(input_dict={"sequence":sequence,"pH":pH_value}) + +time_series.to_csv(f"{data_path}/{filename}_time_series.csv", index=False) + + From d8e6a3430b1056a740d4aadc850037aeeee7f9a2 Mon Sep 17 00:00:00 2001 From: blancoapa Date: Sun, 20 Oct 2024 18:46:24 +0200 Subject: [PATCH 4/6] add test script for peptide sample, add new functionality for analysis --- lib/analysis.py | 14 +++- samples/analysis.py | 36 ++++++++ samples/peptide.py | 9 +- testsuite/CTestTestfile.cmake | 1 + testsuite/analysis_tests.py | 11 +++ .../peptide_tests_data/peptide_sample.csv | 4 + testsuite/samples_tests.py | 82 +++++++++++++++++++ 7 files changed, 155 insertions(+), 2 deletions(-) create mode 100644 samples/analysis.py create mode 100644 testsuite/peptide_tests_data/peptide_sample.csv create mode 100644 testsuite/samples_tests.py diff --git a/lib/analysis.py b/lib/analysis.py index 65dbd0b..e336bc7 100644 --- a/lib/analysis.py +++ b/lib/analysis.py @@ -42,7 +42,7 @@ def add_data_to_df(df, data_dict, index): index=index)]) return updated_df -def analyze_time_series(path_to_datafolder, filename_extension= ".csv", minus_separator = False,): +def analyze_time_series(path_to_datafolder, filename_extension= ".csv", minus_separator = False, ignore_files=None): """ Analyzes all time series stored in `path_to_datafolder` using the block binning method. @@ -50,6 +50,7 @@ def analyze_time_series(path_to_datafolder, filename_extension= ".csv", minus_se path_to_datafolder(`str`): path to the folder with the files with the time series filename_extension(`str`): extension of the file. Defaults to ".csv" minus_separator(`bool`): switch to enable the minus as a separator. Defaults to False. + ignore_files(`lst`): list of filenames to be ignored for the bining analysis. Returns: data(`Pandas.Dataframe`): Dataframe with the time averages of all the time series in the datafolder. @@ -59,10 +60,21 @@ def analyze_time_series(path_to_datafolder, filename_extension= ".csv", minus_se """ data=pd.DataFrame() + if ignore_files is None: + ignore_files=[] with os.scandir(path_to_datafolder) as subdirectory: # Gather all data for subitem in subdirectory: if subitem.is_file(): + ignore_file=False + for file in ignore_files: + if set(file.split()) == set(subitem.name.split()): + ignore_file=True + if ignore_file: + continue + print(subitem.name) + from time import sleep + sleep(5) if filename_extension in subitem.name: # Get parameters from the file name data_dict=get_params_from_file_name(file_name=subitem.name, diff --git a/samples/analysis.py b/samples/analysis.py new file mode 100644 index 0000000..3881957 --- /dev/null +++ b/samples/analysis.py @@ -0,0 +1,36 @@ +# +# Copyright (C) 2024 pyMBE-dev team +# +# This file is part of pyMBE. +# +# pyMBE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# pyMBE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import pandas as pd +from pathlib import Path +import argparse +from lib import analysis + +parser = argparse.ArgumentParser(description='Sample script analyze time series from the other sample scripts using the binning method.') +parser.add_argument('--data_folder', + type=str, + required=True, + help='path to the data folder with the time series') +parser.add_argument('--no_verbose', action='store_false', help="Switch to deactivate verbose",default=True) +args = parser.parse_args() + +# Read and analyze time series +analyzed_data=analysis.analyze_time_series(path_to_datafolder=args.data_folder, + ignore_files=["analyzed_data.csv"]) +analyzed_data.to_csv(f"{args.data_folder}/analyzed_data.csv", + index=False) diff --git a/samples/peptide.py b/samples/peptide.py index dd86e32..a8d9d1e 100644 --- a/samples/peptide.py +++ b/samples/peptide.py @@ -41,6 +41,11 @@ type=float, default=7, help='pH of the solution') +parser.add_argument('--output', + type=str, + required= False, + default="samples/time_series/peptide", + help='output directory') parser.add_argument('--no_verbose', action='store_false', help="Switch to deactivate verbose",default=True) args = parser.parse_args() @@ -226,7 +231,9 @@ vtf.writevcf(espresso_system, coordinates) # Store time series -data_path=pmb.get_resource(path="samples")+"/time_series/peptide" + +data_path=pmb.get_resource(path=args.output) + Path(data_path).mkdir(parents=True, exist_ok=True) time_series=pd.DataFrame(time_series) diff --git a/testsuite/CTestTestfile.cmake b/testsuite/CTestTestfile.cmake index 274322a..8012957 100644 --- a/testsuite/CTestTestfile.cmake +++ b/testsuite/CTestTestfile.cmake @@ -42,6 +42,7 @@ endfunction() pymbe_add_test(PATH globular_protein_tests.py LABELS long beyer2024 THREADS 2) pymbe_add_test(PATH peptide_tests.py LABELS long beyer2024 THREADS 2) pymbe_add_test(PATH weak_polyelectrolyte_dialysis_test.py LABELS long beyer2024) +pymbe_add_test(PATH samples_tests.py LABELS long THREADS 2) pymbe_add_test(PATH cph_ideal_tests.py LABELS long) pymbe_add_test(PATH grxmc_ideal_tests.py LABELS long) pymbe_add_test(PATH gcmc_tests.py LABELS long) diff --git a/testsuite/analysis_tests.py b/testsuite/analysis_tests.py index 9acddb9..024eec8 100644 --- a/testsuite/analysis_tests.py +++ b/testsuite/analysis_tests.py @@ -36,6 +36,17 @@ def test_analyze_time_series(self): reference_data.columns = reference_data.sort_index(axis=1,level=[0,1],ascending=[True,True]).columns pd.testing.assert_frame_equal(analyzed_data.dropna(),reference_data.dropna(), check_column_type=False, check_dtype=False) print("*** Unit passed ***") + print("*** Unit test: test that analysis.analyze_time_series ignores files that should not be analyzed ***") + analyzed_data = ana.analyze_time_series(path_to_datafolder=self.data_root, + ignore_files=["average_data.csv","N-064_Solvent-good_Init-coil_time_series_analyzed.csv"], + minus_separator=True, + filename_extension="_time_series.csv") + analyzed_data[["Dens","eps"]] = analyzed_data[["Dens","eps"]].apply(pd.to_numeric) + reference_data = pd.read_csv(self.data_root / "average_data.csv", header=[0,1]) + analyzed_data.columns = analyzed_data.sort_index(axis=1,level=[0,1],ascending=[True,True]).columns + reference_data.columns = reference_data.sort_index(axis=1,level=[0,1],ascending=[True,True]).columns + pd.testing.assert_frame_equal(analyzed_data.dropna(),reference_data.dropna(), check_column_type=False, check_dtype=False) + print("*** Unit passed ***") def test_get_dt(self): print("*** Unit test: test that analysis.get_dt returns the right time step ***") diff --git a/testsuite/peptide_tests_data/peptide_sample.csv b/testsuite/peptide_tests_data/peptide_sample.csv new file mode 100644 index 0000000..a7ead6b --- /dev/null +++ b/testsuite/peptide_tests_data/peptide_sample.csv @@ -0,0 +1,4 @@ +sequence,pH,time,mean,err_mean,n_eff,tau_int +value,value,value,charge,charge,charge,charge +EEEEDDDD,7,series,-7.648351648351649,0.1255297416924878,23.09080597201252,1.9704812406795775 +EEEEDDDD,3,series,-0.6263736263736264,0.1103921181655937,28.535136224326788,1.594525417447734 diff --git a/testsuite/samples_tests.py b/testsuite/samples_tests.py new file mode 100644 index 0000000..5fa69b7 --- /dev/null +++ b/testsuite/samples_tests.py @@ -0,0 +1,82 @@ +# +# Copyright (C) 2024 pyMBE-dev team +# +# This file is part of pyMBE. +# +# pyMBE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# pyMBE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +# Tests that samples/peptide.py and analysis.py work properly + +import sys +import pathlib +import tempfile +import subprocess +import multiprocessing +from lib import analysis +import numpy as np +import pandas as pd +import unittest as ut + +root = pathlib.Path(__file__).parent.parent.resolve() +data_root = root / "samples" / "time_series" / "peptide" +sample_path = root / "samples" / "peptide.py" +analysis_path = root / "samples" / "analysis.py" +reference_path = root / "testsuite" / "peptide_tests_data" +sequences=["EEEEDDDD"] +test_pH_values=[3,7] +mode = "save" + +def kernel(sequence): + """ + Runs a set of tests for a given peptide sequence. + + Args: + sequence(`str`): Amino acid sequence of the peptide. + """ + with tempfile.TemporaryDirectory() as time_series_path: + for pH in test_pH_values: + print(f"pH = {pH}") + run_command=[sys.executable, sample_path, "--sequence", sequence, + "--pH", str(pH), "--output", time_series_path] + print(subprocess.list2cmdline(run_command)) + subprocess.check_output(run_command) + run_command=[sys.executable, analysis_path, "--data_folder", time_series_path] + subprocess.check_output(run_command) + analyzed_data=pd.read_csv(time_series_path + "/analyzed_data.csv", header=[0, 1]) + # Save data for future testing + if mode == "save": + analyzed_data.to_csv(reference_path / f"peptide_sample.csv", index=False) + else: + assert mode == "test", f"Mode {mode} not supported, valid modes: ['save', 'test']" + + + return analyzed_data + + +class Test(ut.TestCase): + + def test_peptide(self): + with multiprocessing.Pool(processes=2) as pool: + analyzed_data=pool.map(kernel, sequences, chunksize=2)[0] + rtol=0.1 # relative tolerance + atol=0.5 # absolute tolerance + reference_data=pd.read_csv(reference_path / f"peptide_sample.csv", header=[0, 1]) + pd.testing.assert_series_equal (analyzed_data[("mean","charge")], + reference_data[("mean","charge")], + rtol=rtol, + atol=atol) + print(analyzed_data[("mean","charge")]) +if __name__ == "__main__": + ut.main() \ No newline at end of file From d277fe0775ecb4c4913562cacdc7bff84d6fb873 Mon Sep 17 00:00:00 2001 From: blancoapa Date: Sun, 20 Oct 2024 18:58:21 +0200 Subject: [PATCH 5/6] improve code quality --- samples/analysis.py | 2 -- testsuite/samples_tests.py | 6 ++---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/samples/analysis.py b/samples/analysis.py index 3881957..9299009 100644 --- a/samples/analysis.py +++ b/samples/analysis.py @@ -16,8 +16,6 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -import pandas as pd -from pathlib import Path import argparse from lib import analysis diff --git a/testsuite/samples_tests.py b/testsuite/samples_tests.py index 5fa69b7..e6e758c 100644 --- a/testsuite/samples_tests.py +++ b/testsuite/samples_tests.py @@ -24,8 +24,6 @@ import tempfile import subprocess import multiprocessing -from lib import analysis -import numpy as np import pandas as pd import unittest as ut @@ -57,7 +55,7 @@ def kernel(sequence): analyzed_data=pd.read_csv(time_series_path + "/analyzed_data.csv", header=[0, 1]) # Save data for future testing if mode == "save": - analyzed_data.to_csv(reference_path / f"peptide_sample.csv", index=False) + analyzed_data.to_csv(reference_path / "peptide_sample.csv", index=False) else: assert mode == "test", f"Mode {mode} not supported, valid modes: ['save', 'test']" @@ -72,7 +70,7 @@ def test_peptide(self): analyzed_data=pool.map(kernel, sequences, chunksize=2)[0] rtol=0.1 # relative tolerance atol=0.5 # absolute tolerance - reference_data=pd.read_csv(reference_path / f"peptide_sample.csv", header=[0, 1]) + reference_data=pd.read_csv(reference_path / "peptide_sample.csv", header=[0, 1]) pd.testing.assert_series_equal (analyzed_data[("mean","charge")], reference_data[("mean","charge")], rtol=rtol, From abaf035f75060a200ccfefa194de904bfb70d200 Mon Sep 17 00:00:00 2001 From: blancoapa Date: Mon, 21 Oct 2024 17:39:14 +0200 Subject: [PATCH 6/6] refactor peptide_mixture_grxmc_ideal.py to the new logic, adapt tests, add new script for plotting grxmc data --- .../{analysis.py => analyze_time_series.py} | 1 - samples/peptide.py | 51 ++-- samples/peptide_mixture_grxmc_ideal.py | 258 +++++++----------- samples/plot_HH.py | 69 ----- samples/plot_peptide.py | 104 +++++++ testsuite/CTestTestfile.cmake | 8 +- testsuite/grxmc_ideal_tests.py | 98 +++++-- .../peptide_tests_data/peptide_sample.csv | 4 - testsuite/samples_tests.py | 80 ------ testsuite/test_peptide_sample.py | 84 ++++++ 10 files changed, 392 insertions(+), 365 deletions(-) rename samples/{analysis.py => analyze_time_series.py} (92%) delete mode 100644 samples/plot_HH.py create mode 100644 samples/plot_peptide.py delete mode 100644 testsuite/peptide_tests_data/peptide_sample.csv delete mode 100644 testsuite/samples_tests.py create mode 100644 testsuite/test_peptide_sample.py diff --git a/samples/analysis.py b/samples/analyze_time_series.py similarity index 92% rename from samples/analysis.py rename to samples/analyze_time_series.py index 9299009..373b17c 100644 --- a/samples/analysis.py +++ b/samples/analyze_time_series.py @@ -24,7 +24,6 @@ type=str, required=True, help='path to the data folder with the time series') -parser.add_argument('--no_verbose', action='store_false', help="Switch to deactivate verbose",default=True) args = parser.parse_args() # Read and analyze time series diff --git a/samples/peptide.py b/samples/peptide.py index a8d9d1e..f4ffaa3 100644 --- a/samples/peptide.py +++ b/samples/peptide.py @@ -46,6 +46,10 @@ required= False, default="samples/time_series/peptide", help='output directory') +parser.add_argument('--test', + default=False, + action='store_true', + help='to run a short simulation for testing the script') parser.add_argument('--no_verbose', action='store_false', help="Switch to deactivate verbose",default=True) args = parser.parse_args() @@ -57,13 +61,18 @@ pmb.set_reduced_units(unit_length=0.4*pmb.units.nm, verbose=False) pH_value = args.pH -N_samples = 100 # to make the demonstration quick, we set this to a very low value +N_samples = 1000 # to make the demonstration quick, we set this to a very low value MD_steps_per_sample = 100 # to make the demonstration quick, we set this to a very low value N_samples_print = 10 # Write the full trajectory data every X samples LANGEVIN_SEED = 100 dt = 0.01 solvent_permitivity = 78.3 verbose=args.no_verbose +ideal=False + +if args.test: + MD_steps_per_sample = 1 + ideal=True # Peptide parameters sequence = args.sequence @@ -169,26 +178,26 @@ cpH.set_non_interacting_type (type=non_interacting_type) if verbose: print('The non-interacting type is set to ', non_interacting_type) - -##Setup the potential energy -if verbose: - print('Setup LJ interaction (this can take a few seconds)') -pmb.setup_lj_interactions (espresso_system=espresso_system, - warnings=verbose) -if verbose: - print('Minimize energy before adding electrostatics') -minimize_espresso_system_energy (espresso_system=espresso_system, - verbose=verbose) -if verbose: - print('Setup and tune electrostatics (this can take a few seconds)') -setup_electrostatic_interactions(units=pmb.units, - espresso_system=espresso_system, - kT=pmb.kT, - verbose=verbose) -if verbose: - print('Minimize energy after adding electrostatics') -minimize_espresso_system_energy (espresso_system=espresso_system, - verbose=verbose) +if not ideal: + ##Setup the potential energy + if verbose: + print('Setup LJ interaction (this can take a few seconds)') + pmb.setup_lj_interactions (espresso_system=espresso_system, + warnings=verbose) + if verbose: + print('Minimize energy before adding electrostatics') + minimize_espresso_system_energy (espresso_system=espresso_system, + verbose=verbose) + if verbose: + print('Setup and tune electrostatics (this can take a few seconds)') + setup_electrostatic_interactions(units=pmb.units, + espresso_system=espresso_system, + kT=pmb.kT, + verbose=verbose) + if verbose: + print('Minimize energy after adding electrostatics') + minimize_espresso_system_energy (espresso_system=espresso_system, + verbose=verbose) if verbose: print('Setup Langeving dynamics') diff --git a/samples/peptide_mixture_grxmc_ideal.py b/samples/peptide_mixture_grxmc_ideal.py index 84f20e3..a00a762 100644 --- a/samples/peptide_mixture_grxmc_ideal.py +++ b/samples/peptide_mixture_grxmc_ideal.py @@ -19,12 +19,11 @@ #Load espresso, pyMBE and other necessary libraries import espressomd from pathlib import Path -import numpy as np -import matplotlib.pyplot as plt import pandas as pd import argparse from espressomd.io.writer import vtf import pyMBE +from lib.analysis import built_output_name # Create an instance of pyMBE library pmb = pyMBE.pymbe_library(seed=42) @@ -41,7 +40,24 @@ default=False, action='store_true', help='to run a short simulation for testing the script') - +parser.add_argument('--sequence1', + type=str, + default= 'nEHc', + help='sequence of the first peptide') +parser.add_argument('--sequence2', + type=str, + default= 'nEEHHc', + help='sequence of the second peptide') +parser.add_argument('--pH', + type=float, + default=7, + help='pH of the solution') +parser.add_argument('--output', + type=str, + required= False, + default="samples/time_series/peptide_mixture_grxmc_ideal", + help='output directory') +parser.add_argument('--no_verbose', action='store_false', help="Switch to deactivate verbose",default=True) args = parser.parse_args() if args.mode not in valid_modes: @@ -51,43 +67,33 @@ Path("./frames").mkdir(parents=True, exist_ok=True) -#Import functions from handy_functions script -from lib.analysis import block_analyze - # Simulation parameters +verbose=args.no_verbose pmb.set_reduced_units(unit_length=0.4*pmb.units.nm, - Kw=1e-14) -pH_range = np.linspace(2, 12, num=20) -Samples_per_pH = 500 -MD_steps_per_sample = 0 -steps_eq = int(Samples_per_pH) + Kw=1e-14, + verbose=verbose) +N_samples = 1000 # to make the demonstration quick, we set this to a very low value +MD_steps_per_sample = 1000 N_samples_print = 1000 # Write the trajectory every 100 samples -probability_reaction =1 LANGEVIN_SEED = 42 dt = 0.001 solvent_permitivity = 78.3 - +pH_value= args.pH # Peptide parameters -sequence1 = 'nEHc' +sequence1 = args.sequence1 model = '1beadAA' # Model with 2 beads per each aminoacid pep1_concentration = 1e-2 *pmb.units.mol/pmb.units.L N_peptide1_chains = 10 -sequence2 = 'nEEHHc' +sequence2 = args.sequence2 pep2_concentration = 1e-2 *pmb.units.mol/pmb.units.L N_peptide2_chains = 10 -verbose=True if args.test: - Samples_per_pH = 1000 - probability_reaction = 1 - N_samples_print = 1000 + MD_steps_per_sample = 1 N_peptide1_chains = 5 N_peptide2_chains = 5 - pH_range = np.linspace(2, 12, num=10) - verbose = False - - + # Load peptide parametrization from Lunkad, R. et al. Molecular Systems Design & Engineering (2021), 6(2), 122-131. # Note that this parameterization only includes some of the natural aminoacids # For the other aminoacids the user needs to use a parametrization including all the aminoacids in the peptide sequence @@ -97,17 +103,13 @@ pmb.load_interaction_parameters(filename=path_to_interactions) pmb.load_pka_set(path_to_pka) - # Defines the bonds - bond_type = 'harmonic' generic_bond_length=0.4 * pmb.units.nm generic_harmonic_constant = 400 * pmb.units('reduced_energy / reduced_length**2') harmonic_bond = {'r_0' : generic_bond_length, - 'k' : generic_harmonic_constant, - } - + 'k' : generic_harmonic_constant} pmb.define_default_bond(bond_type = bond_type, bond_parameters = harmonic_bond) @@ -187,27 +189,36 @@ pmb.create_counterions(object_name=peptide1, cation_name=proton_name, anion_name=hydroxide_name, - espresso_system=espresso_system) # Create counterions for the peptide chains + espresso_system=espresso_system, + verbose=verbose) # Create counterions for the peptide chains with sequence 1 pmb.create_counterions(object_name=peptide2, cation_name=proton_name, anion_name=hydroxide_name, - espresso_system=espresso_system) # Create counterions for the peptide chains + espresso_system=espresso_system, + verbose=verbose) # Create counterions for the peptide chains with sequence 2 - c_salt_calculated = pmb.create_added_salt(espresso_system=espresso_system,cation_name=sodium_name,anion_name=chloride_name,c_salt=c_salt) + c_salt_calculated = pmb.create_added_salt(espresso_system=espresso_system, + cation_name=sodium_name, + anion_name=chloride_name, + c_salt=c_salt, + verbose=verbose) elif args.mode == 'unified': pmb.create_counterions(object_name=peptide1, cation_name=cation_name, anion_name=anion_name, - espresso_system=espresso_system) # Create counterions for the peptide chains + espresso_system=espresso_system, + verbose=verbose) # Create counterions for the peptide chains with sequence 1 pmb.create_counterions(object_name=peptide2, cation_name=cation_name, anion_name=anion_name, - espresso_system=espresso_system) # Create counterions for the peptide chains + espresso_system=espresso_system, + verbose=verbose) # Create counterions for the peptide chains with sequence 2 c_salt_calculated = pmb.create_added_salt(espresso_system=espresso_system, cation_name=cation_name, anion_name=anion_name, - c_salt=c_salt) + c_salt=c_salt, + verbose=verbose) with open('frames/trajectory0.vtf', mode='w+t') as coordinates: @@ -219,11 +230,12 @@ acidic_groups = pmb.df.loc[(~pmb.df['particle_id'].isna()) & (pmb.df['acidity']=='acidic')].name.to_list() list_ionisible_groups = basic_groups + acidic_groups total_ionisible_groups = len (list_ionisible_groups) - -print("The box length of your system is", L.to('reduced_length'), L.to('nm')) +# Get peptide net charge +if verbose: + print("The box length of your system is", L.to('reduced_length'), L.to('nm')) if args.mode == 'standard': - grxmc, sucessful_reactions_labels, ionic_strength_res = pmb.setup_grxmc_reactions(pH_res=2, + grxmc, sucessful_reactions_labels, ionic_strength_res = pmb.setup_grxmc_reactions(pH_res=pH_value, c_salt_res=c_salt, proton_name=proton_name, hydroxide_name=hydroxide_name, @@ -231,12 +243,13 @@ salt_anion_name=chloride_name, activity_coefficient=lambda x: 1.0) elif args.mode == 'unified': - grxmc, sucessful_reactions_labels, ionic_strength_res = pmb.setup_grxmc_unified(pH_res=2, + grxmc, sucessful_reactions_labels, ionic_strength_res = pmb.setup_grxmc_unified(pH_res=pH_value, c_salt_res=c_salt, cation_name=cation_name, anion_name=anion_name, activity_coefficient=lambda x: 1.0) -print('The acid-base reaction has been sucessfully setup for ', sucessful_reactions_labels) +if verbose: + print('The acid-base reaction has been sucessfully setup for ', sucessful_reactions_labels) # Setup espresso to track the ionization of the acid/basic groups in peptide type_map =pmb.get_type_map() @@ -246,7 +259,8 @@ # Setup the non-interacting type for speeding up the sampling of the reactions non_interacting_type = max(type_map.values())+1 grxmc.set_non_interacting_type (type=non_interacting_type) -print('The non interacting type is set to ', non_interacting_type) +if verbose: + print('The non interacting type is set to ', non_interacting_type) espresso_system.time_step = dt @@ -259,131 +273,53 @@ espresso_system.time_step= dt espresso_system.integrator.set_vv() espresso_system.thermostat.set_langevin(kT=pmb.kT.to('reduced_energy').magnitude, gamma=0.1, seed=LANGEVIN_SEED) - -N_frame=0 -Z_pH=[] # List of the average global charge at each pH -err_Z_pH=[] # List of the error of the global charge at each pH -xi_plus=[] # List of the average partition coefficient of positive ions -err_xi_plus=[] # List of the error of the partition coefficient of positive ions - -particle_id_list = pmb.get_particle_id_map(peptide1)["all"]+pmb.get_particle_id_map(peptide2)["all"] +espresso_system.cell_system.skin=0.4 #Save the pyMBE dataframe in a CSV file pmb.write_pmb_df (filename='df.csv') +time_series={} +for label in ["time","charge_peptide1","charge_peptide2","num_plus","xi_plus"]: + time_series[label]=[] -# Main loop for performing simulations at different pH-values -labels_obs=["time","charge","num_plus"] - -for pH_value in pH_range: - - time_series={} - - for label in labels_obs: - time_series[label]=[] - +# Main simulation loop +N_frame=0 +for step in range(N_samples): + espresso_system.integrator.run(steps=MD_steps_per_sample) + grxmc.reaction(reaction_steps = total_ionisible_groups) + time_series["time"].append(espresso_system.time) + # Get net charge of peptide1 and peptide2 + charge_dict_peptide1=pmb.calculate_net_charge(espresso_system=espresso_system, + molecule_name=peptide1, + dimensionless=True) + charge_dict_peptide2=pmb.calculate_net_charge(espresso_system=espresso_system, + molecule_name=peptide2, + dimensionless=True) + time_series["charge_peptide1"].append(charge_dict_peptide1["mean"]) + time_series["charge_peptide2"].append(charge_dict_peptide2["mean"]) if args.mode == 'standard': - grxmc, sucessful_reactions_labels, ionic_strength_res = pmb.setup_grxmc_reactions(pH_res=pH_value, - c_salt_res=c_salt, - proton_name=proton_name, - hydroxide_name=hydroxide_name, - salt_cation_name=sodium_name, - salt_anion_name=chloride_name, - activity_coefficient=lambda x: 1.0) + num_plus = espresso_system.number_of_particles(type=type_map["Na"])+espresso_system.number_of_particles(type=type_map["Hplus"]) elif args.mode == 'unified': - grxmc, sucessful_reactions_labels, ionic_strength_res = pmb.setup_grxmc_unified(pH_res=pH_value, - c_salt_res=c_salt, - cation_name=cation_name, - anion_name=anion_name, - activity_coefficient=lambda x: 1.0) - - # Inner loop for sampling each pH value - - for step in range(Samples_per_pH+steps_eq): - - if np.random.random() > probability_reaction: - espresso_system.integrator.run(steps=MD_steps_per_sample) - else: - grxmc.reaction(reaction_steps = total_ionisible_groups) - - if step > steps_eq: - # Get peptide net charge - z_one_object=0 - for pid in particle_id_list: - z_one_object +=espresso_system.part.by_id(pid).q - - if args.test: - time_series["time"].append(step) - else: - time_series["time"].append(espresso_system.time) - time_series["charge"].append(np.mean((z_one_object))) - - if args.mode == 'standard': - time_series["num_plus"].append(espresso_system.number_of_particles(type=type_map["Na"])+espresso_system.number_of_particles(type=type_map["Hplus"])) - elif args.mode == 'unified': - time_series["num_plus"].append(espresso_system.number_of_particles(type=type_map["Na"])) - - if step % N_samples_print == 0: - N_frame+=1 - with open(f'frames/trajectory{N_frame}.vtf', mode='w+t') as coordinates: - vtf.writevsf(espresso_system, coordinates) - vtf.writevcf(espresso_system, coordinates) - - # Estimate the statistical error and the autocorrelation time of the data - processed_data = block_analyze(full_data=pd.DataFrame(time_series, columns=labels_obs), - verbose=verbose) - - Z_pH.append(processed_data["mean", "charge"]) - err_Z_pH.append(processed_data["err_mean", "charge"]) - concentration_plus = (processed_data["mean", "num_plus"]/(pmb.N_A * L**3)).to('mol/L') - err_concentration_plus = (processed_data["err_mean", "num_plus"]/(pmb.N_A * L**3)).to('mol/L') - xi_plus.append((concentration_plus/ionic_strength_res).magnitude) - err_xi_plus.append(err_concentration_plus/ionic_strength_res) - print(f"pH = {pH_value:6.4g} done") - + num_plus = espresso_system.number_of_particles(type=type_map["Na"]) + time_series["num_plus"].append(num_plus) + concentration_plus = (num_plus/(pmb.N_A * L**3)).to("mol/L") + xi_plus = (concentration_plus/ionic_strength_res).magnitude + time_series["xi_plus"].append(xi_plus) + if step % N_samples_print == 0: + N_frame+=1 + with open(f'frames/trajectory{N_frame}.vtf', mode='w+t') as coordinates: + vtf.writevsf(espresso_system, coordinates) + vtf.writevcf(espresso_system, coordinates) + +# Store time series +data_path=pmb.get_resource(path=args.output) +Path(data_path).mkdir(parents=True, + exist_ok=True) +time_series=pd.DataFrame(time_series) -if args.test: - # Calculate the ideal titration curve of the peptide with Henderson-Hasselbach equation - HH_charge_dict = pmb.calculate_HH_Donnan(c_macro={peptide1: pep1_concentration, peptide2: pep2_concentration}, - c_salt=c_salt, - pH_list=pH_range) - Z_HH_Donnan = HH_charge_dict["charges_dict"] - xi = HH_charge_dict["partition_coefficients"] - - # Write out the data - data = {} - data["Z_sim"] = np.asarray(Z_pH)/N_peptide1_chains - data["xi_sim"] = np.asarray(xi_plus) - data["Z_HH_Donnan"] = np.asarray(Z_HH_Donnan[peptide1])+np.asarray(Z_HH_Donnan[peptide2]) - data["xi_HH_Donnan"] = np.asarray(xi) - data = pd.DataFrame.from_dict(data) - - data_path = pmb.get_resource(path="samples") - data.to_csv(f"{data_path}/data_peptide_grxmc.csv", index=False) - -else: - # Calculate the ideal titration curve of the peptide with Henderson-Hasselbach equation - pH_range_HH = np.linspace(2, 12, num=100) - HH_charge_dict = pmb.calculate_HH_Donnan(c_macro={peptide1: pep1_concentration, peptide2: pep2_concentration}, - c_salt=c_salt, - pH_list=pH_range_HH) - Z_HH_Donnan = HH_charge_dict["charges_dict"] - xi = HH_charge_dict["partition_coefficients"] - - # Plot the results - fig, ax = plt.subplots(figsize=(10, 7)) - plt.errorbar(pH_range, np.asarray(Z_pH)/N_peptide1_chains, yerr=np.asarray(err_Z_pH)/N_peptide1_chains, fmt = 'o', capsize=3, label='Simulation') - ax.plot(pH_range_HH, np.asarray(Z_HH_Donnan[peptide1])+np.asarray(Z_HH_Donnan[peptide2]), "--r", label='HH+Donnan') - plt.legend() - plt.xlabel('pH') - plt.ylabel('Charge of the peptide 1 + peptide 2 / e') - plt.show() - plt.close() - - fig, ax = plt.subplots(figsize=(10, 7)) - plt.errorbar(pH_range, xi_plus, yerr=err_xi_plus, fmt = 'o', capsize=3, label='Simulation') - ax.plot(pH_range_HH, np.asarray(xi), "-k", label='HH+Donnan') - plt.legend() - plt.xlabel('pH') - plt.ylabel(r'partition coefficient $\xi_+$') - plt.show() - plt.close() +filename=built_output_name(input_dict={"mode":args.mode, + "sequence1":sequence1, + "sequence2": sequence2, + "pH":pH_value}) + +time_series.to_csv(f"{data_path}/{filename}_time_series.csv", + index=False) \ No newline at end of file diff --git a/samples/plot_HH.py b/samples/plot_HH.py deleted file mode 100644 index 3382dfe..0000000 --- a/samples/plot_HH.py +++ /dev/null @@ -1,69 +0,0 @@ -# -# Copyright (C) 2024 pyMBE-dev team -# -# This file is part of pyMBE. -# -# pyMBE is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# pyMBE is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -# Plots the charge of a peptide with a given sequence using either a custom or reference set of pKa values - -import numpy as np -import matplotlib.pyplot as plt - -# Create an instance of pyMBE library -import pyMBE -pmb = pyMBE.pymbe_library(seed=42) - -# Input parameters - -sequence="nDEHKc" -pH_values = np.linspace(2, 12, num=21) -load_pka_set_from_file=True # If set to false, uses custom_pka_set -path_to_pka=pmb.get_resource("parameters/pka_sets/Nozaki1967.json") - -custom_pka_set={"D" : {"pka_value": 4.0, "acidity": "acidic"}, - "E" : {"pka_value": 4.4, "acidity": "acidic"}, - "H" : {"pka_value": 6.8, "acidity": "basic"}, - "K" : {"pka_value": 10.4, "acidity": "basic"}, - "n" : {"pka_value": 8.0, "acidity": "basic"}, - "c" : {"pka_value": 3.6, "acidity": "acidic"}} - -pmb.define_peptide(name="example_pep", - sequence=sequence, - model="1beadAA") - -if load_pka_set_from_file: - pka_set=None - pmb.load_pka_set(filename=path_to_pka) - print('pka_set stored in pyMBE: ', pmb.get_pka_set()) -else: - pka_set=custom_pka_set - -# pka_set is an optional argument, if it is not provided sugar will use the one stored in pmb.pka_set - -Z_HH = pmb.calculate_HH(molecule_name="example_pep", - pH_list=pH_values, - pka_set=pka_set) - -# Plot - -plt.figure(figsize=[11, 9]) -plt.suptitle('Peptide sequence: '+sequence) -plt.plot(pH_values, Z_HH, "-k", label='Henderson-Hasselbach') -plt.axhline(y=0.0, color="gray", linestyle="--") -plt.xlabel('pH') -plt.ylabel('Charge of the peptide / e') -plt.legend() - -plt.show() diff --git a/samples/plot_peptide.py b/samples/plot_peptide.py new file mode 100644 index 0000000..70e04c3 --- /dev/null +++ b/samples/plot_peptide.py @@ -0,0 +1,104 @@ +# +# Copyright (C) 2024 pyMBE-dev team +# +# This file is part of pyMBE. +# +# pyMBE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# pyMBE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# Plots the charge of a peptide with a given sequence using either a custom or reference set of pKa values + +import numpy as np +import matplotlib.pyplot as plt +import argparse +import pandas as pd +# Create an instance of pyMBE library +import pyMBE +pmb = pyMBE.pymbe_library(seed=42) + +parser = argparse.ArgumentParser(description='Plots the titration data from peptide_mixture_grxmc_ideal.py and the corresponding analytical solution.') +parser.add_argument('--sequence', + type=str, + default= 'EEEEDDDD', + help='sequence of the peptide') +parser.add_argument('--path_to_data', + type=str, + required= False, + default="samples/time_series/peptide/analyzed_data.csv", + help='path to the analyzed data') +parser.add_argument('--output', + type=str, + required= False, + default="time_series/peptide", + help='output directory') +parser.add_argument('--mode', + type=str, + required= False, + default="plot", + help='mode to execute the script available options are: store_HH (stores the analytical HH solution, used for testing) and plot (produces the plots)') +args = parser.parse_args() + +valid_modes = ["plot","store_HH"] +if args.mode not in valid_modes: + raise ValueError(f"mode {args.mode} is not supported, supported modes are {valid_modes}. Please check the docs for more information.") + +# Define peptide parameters +sequence = args.sequence +# Define the peptide in the pyMBE dataframe and load the pka set +# This is necesary to calculate the analytical solution from the Henderson-Hasselbach equation +peptide = 'generic_peptide' +pmb.define_peptide (name=peptide, + sequence=sequence, + model="1beadAA") # not really relevant for plotting +path_to_pka=pmb.get_resource("parameters/pka_sets/Hass2015.json") +pmb.load_pka_set(path_to_pka) + +# Calculate the ideal titration curve of the peptide with Henderson-Hasselbach equation +if args.mode == "plot": + pH_range_HH = np.linspace(2, 12, num=100) +elif args.mode == "store_HH": + pH_range_HH = [2,4,5,6] +Z_HH = pmb.calculate_HH(molecule_name=peptide, + pH_list=pH_range_HH) + +if args.mode == "plot": + # Read the analyzed data produced with peptide_mixture_grxmc_ideal + time_series_folder_path=pmb.get_resource(args.path_to_data) + analyzed_data = pd.read_csv(time_series_folder_path, header=[0,1]) + + # Plot the results + fig, ax = plt.subplots(figsize=(10, 7)) + ax.errorbar(analyzed_data["pH"]["value"], + analyzed_data["mean"]["charge"], + yerr=analyzed_data["err_mean"]["charge"], + fmt = 'o', + capsize=3, + color="red", + label='peptide 1 (sim)') + + ax.plot(pH_range_HH, + Z_HH, + "--r", + label='peptide 1 (Theory: HH)') + + plt.legend() + plt.xlabel('pH') + plt.ylabel('Net charge of the peptide / e') + plt.show() + plt.close() + +elif args.mode == "store_HH": + HH_data=pd.DataFrame({"pH": pH_range_HH, + "Z_HH": Z_HH,}) + HH_data.to_csv(f"{args.output}/HH_data.csv", + index=False) diff --git a/testsuite/CTestTestfile.cmake b/testsuite/CTestTestfile.cmake index 8012957..12e4fbb 100644 --- a/testsuite/CTestTestfile.cmake +++ b/testsuite/CTestTestfile.cmake @@ -39,13 +39,13 @@ function(pymbe_add_test) endfunction() # functional tests, e.g. long simulations and ensemble averages +pymbe_add_test(PATH test_peptide_sample.py LABELS long THREADS 2) +pymbe_add_test(PATH grxmc_ideal_tests.py LABELS long THREADS 2) +pymbe_add_test(PATH cph_ideal_tests.py LABELS long) +pymbe_add_test(PATH gcmc_tests.py LABELS long) pymbe_add_test(PATH globular_protein_tests.py LABELS long beyer2024 THREADS 2) pymbe_add_test(PATH peptide_tests.py LABELS long beyer2024 THREADS 2) pymbe_add_test(PATH weak_polyelectrolyte_dialysis_test.py LABELS long beyer2024) -pymbe_add_test(PATH samples_tests.py LABELS long THREADS 2) -pymbe_add_test(PATH cph_ideal_tests.py LABELS long) -pymbe_add_test(PATH grxmc_ideal_tests.py LABELS long) -pymbe_add_test(PATH gcmc_tests.py LABELS long) # unit tests pymbe_add_test(PATH serialization_test.py) diff --git a/testsuite/grxmc_ideal_tests.py b/testsuite/grxmc_ideal_tests.py index d31b148..8402b9a 100644 --- a/testsuite/grxmc_ideal_tests.py +++ b/testsuite/grxmc_ideal_tests.py @@ -16,42 +16,90 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +# Tests that samples/peptide_mixture_grxmc_ideal.py, analysis.py, plot_peptide_mixture_grxmc_ideal.py work properly # Import pyMBE and other libraries -import pyMBE import sys import subprocess import numpy as np +import sys +import pathlib +import tempfile +import subprocess +import multiprocessing import pandas as pd +import unittest as ut -# Create an instance of pyMBE library -pmb = pyMBE.pymbe_library(seed=42) -script_path = pmb.get_resource("samples/peptide_mixture_grxmc_ideal.py") -data_path = pmb.get_resource("samples/data_peptide_grxmc.csv") - -print("*** Grand reaction (G-RxMC) implementation tests ***\n") -print("*** Test that our implementation of the original G-RxMC method reproduces the Henderson-Hasselbalch equation corrected with the Donnan potential (HH+Don) for an ideal mixture of peptides ***") +root = pathlib.Path(__file__).parent.parent.resolve() +data_root = root / "samples" / "time_series" / "peptide_mixture_grxmc_ideal" +sample_path = root / "samples" / "peptide_mixture_grxmc_ideal.py" +analysis_path = root / "samples" / "analyze_time_series.py" +ideal_path = root / "samples" / "plot_peptide_mixture_grxmc_ideal.py" +test_pH_values=[2,5,7,10,12] +rtol=0.01 # relative tolerance +atol=0.05 # absolute tolerance -run_command = [sys.executable, script_path, "--mode", "standard", "--test"] -subprocess.check_output(run_command) +def kernel(pH_value,temp_dir_path,mode): + """ + Runs a set of tests for a given peptide sequence. -data = pd.read_csv(data_path) -# Check if charges agree -np.testing.assert_allclose(data["Z_sim"], data["Z_HH_Donnan"], rtol=0.01, atol=0.05) -# Check if partition coefficients agree -np.testing.assert_allclose(data["xi_sim"], data["xi_HH_Donnan"], rtol=0.1, atol=0.1) + Args: + pH_value(`float`): pH of the media. + temp_dir_path(`str`): path of the folder were to output the data. + mode(`str`): mode for the setup of the grxmc mode, supported modes are "standard" and "unified" + """ + run_command=[sys.executable, sample_path, "--mode", mode, + "--pH", str(pH_value), "--output", temp_dir_path, "--test"] + print(subprocess.list2cmdline(run_command)) + subprocess.check_output(run_command) + return -print("*** Test passed ***\n") +def analyze_and_test_data(temp_dir_path): + """ + Analyzes the data and checks that is consistent against the HH analytical result. + Args: + temp_dir_path(`str`): path of the folder were to output the data. + """ + # Analyze the data + run_command=[sys.executable, analysis_path, "--data_folder", temp_dir_path] + subprocess.check_output(run_command) + analyzed_data=pd.read_csv(temp_dir_path + "/analyzed_data.csv", header=[0, 1]) + # Produce the ideal data + run_command=[sys.executable, ideal_path, "--output", temp_dir_path, "--mode", "store_HH"] + subprocess.check_output(run_command) + HH_data=pd.read_csv(temp_dir_path + "/HH_data.csv") + np.testing.assert_allclose(np.sort(analyzed_data["mean"]["charge_peptide1"].to_numpy()), + np.sort(HH_data["Z_HH_peptide1"].to_numpy()), + rtol=rtol, + atol=atol) + np.testing.assert_allclose(np.sort(analyzed_data["mean"]["charge_peptide2"].to_numpy()), + np.sort(HH_data["Z_HH_peptide2"].to_numpy()), + rtol=rtol, + atol=atol) + np.testing.assert_allclose(np.sort(analyzed_data["mean"]["xi_plus"].to_numpy()), + np.sort(HH_data["xi_HH"].to_numpy()), + rtol=rtol*5, + atol=atol*5) -print("*** Test that our implementation of the G-RxMC method with unified ion types reproduces HH+Don for an ideal mixture of peptides ***") +class Test(ut.TestCase): + def test_standard_grxmc(self): + temp_dir=tempfile.TemporaryDirectory() + mode="standard" + N_cases=len(test_pH_values) + with multiprocessing.Pool(processes=2) as pool: + pool.starmap(kernel, zip(test_pH_values,[temp_dir.name]*N_cases,[mode]*N_cases), chunksize=2)[0] + analyze_and_test_data(temp_dir_path=temp_dir.name) + temp_dir.cleanup() + def test_unified_grxmc(self): + temp_dir=tempfile.TemporaryDirectory() + mode="unified" + N_cases=len(test_pH_values) + with multiprocessing.Pool(processes=2) as pool: + pool.starmap(kernel, zip(test_pH_values,[temp_dir.name]*N_cases,[mode]*N_cases), chunksize=2)[0] + analyze_and_test_data(temp_dir_path=temp_dir.name) + temp_dir.cleanup() -run_command = [sys.executable, script_path, "--mode", "unified", "--test"] -subprocess.check_output(run_command) +if __name__ == "__main__": + ut.main() -data = pd.read_csv(data_path) -# Check if charges agree -np.testing.assert_allclose(data["Z_sim"], data["Z_HH_Donnan"], rtol=0.01, atol=0.05) -# Check if partition coefficients agree -np.testing.assert_allclose(data["xi_sim"], data["xi_HH_Donnan"], rtol=0.1, atol=0.1) -print("*** Test passed ***\n") diff --git a/testsuite/peptide_tests_data/peptide_sample.csv b/testsuite/peptide_tests_data/peptide_sample.csv deleted file mode 100644 index a7ead6b..0000000 --- a/testsuite/peptide_tests_data/peptide_sample.csv +++ /dev/null @@ -1,4 +0,0 @@ -sequence,pH,time,mean,err_mean,n_eff,tau_int -value,value,value,charge,charge,charge,charge -EEEEDDDD,7,series,-7.648351648351649,0.1255297416924878,23.09080597201252,1.9704812406795775 -EEEEDDDD,3,series,-0.6263736263736264,0.1103921181655937,28.535136224326788,1.594525417447734 diff --git a/testsuite/samples_tests.py b/testsuite/samples_tests.py deleted file mode 100644 index e6e758c..0000000 --- a/testsuite/samples_tests.py +++ /dev/null @@ -1,80 +0,0 @@ -# -# Copyright (C) 2024 pyMBE-dev team -# -# This file is part of pyMBE. -# -# pyMBE is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# pyMBE is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - - -# Tests that samples/peptide.py and analysis.py work properly - -import sys -import pathlib -import tempfile -import subprocess -import multiprocessing -import pandas as pd -import unittest as ut - -root = pathlib.Path(__file__).parent.parent.resolve() -data_root = root / "samples" / "time_series" / "peptide" -sample_path = root / "samples" / "peptide.py" -analysis_path = root / "samples" / "analysis.py" -reference_path = root / "testsuite" / "peptide_tests_data" -sequences=["EEEEDDDD"] -test_pH_values=[3,7] -mode = "save" - -def kernel(sequence): - """ - Runs a set of tests for a given peptide sequence. - - Args: - sequence(`str`): Amino acid sequence of the peptide. - """ - with tempfile.TemporaryDirectory() as time_series_path: - for pH in test_pH_values: - print(f"pH = {pH}") - run_command=[sys.executable, sample_path, "--sequence", sequence, - "--pH", str(pH), "--output", time_series_path] - print(subprocess.list2cmdline(run_command)) - subprocess.check_output(run_command) - run_command=[sys.executable, analysis_path, "--data_folder", time_series_path] - subprocess.check_output(run_command) - analyzed_data=pd.read_csv(time_series_path + "/analyzed_data.csv", header=[0, 1]) - # Save data for future testing - if mode == "save": - analyzed_data.to_csv(reference_path / "peptide_sample.csv", index=False) - else: - assert mode == "test", f"Mode {mode} not supported, valid modes: ['save', 'test']" - - - return analyzed_data - - -class Test(ut.TestCase): - - def test_peptide(self): - with multiprocessing.Pool(processes=2) as pool: - analyzed_data=pool.map(kernel, sequences, chunksize=2)[0] - rtol=0.1 # relative tolerance - atol=0.5 # absolute tolerance - reference_data=pd.read_csv(reference_path / "peptide_sample.csv", header=[0, 1]) - pd.testing.assert_series_equal (analyzed_data[("mean","charge")], - reference_data[("mean","charge")], - rtol=rtol, - atol=atol) - print(analyzed_data[("mean","charge")]) -if __name__ == "__main__": - ut.main() \ No newline at end of file diff --git a/testsuite/test_peptide_sample.py b/testsuite/test_peptide_sample.py new file mode 100644 index 0000000..ad38624 --- /dev/null +++ b/testsuite/test_peptide_sample.py @@ -0,0 +1,84 @@ +# +# Copyright (C) 2024 pyMBE-dev team +# +# This file is part of pyMBE. +# +# pyMBE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# pyMBE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +# Tests that samples/peptide.py and analysis.py work properly + +import sys +import pathlib +import tempfile +import subprocess +import multiprocessing +import pandas as pd +import unittest as ut +import numpy as np + +root = pathlib.Path(__file__).parent.parent.resolve() +data_root = root / "samples" / "time_series" / "peptide_mixture_grxmc_ideal" +sample_path = root / "samples" / "peptide.py" +analysis_path = root / "samples" / "analyze_time_series.py" +ideal_path = root / "samples" / "plot_peptide.py" +test_pH_values=[2,4,5,6] +rtol=0.01 # relative tolerance +atol=0.05 # absolute tolerance + +def kernel(pH_value,temp_dir_path): + """ + Runs a set of tests for a given peptide sequence. + + Args: + pH_value(`float`): pH of the media. + temp_dir_path(`str`): path of the folder were to output the data. + """ + run_command=[sys.executable, sample_path, + "--pH", str(pH_value), "--output", temp_dir_path, "--test"] + print(subprocess.list2cmdline(run_command)) + subprocess.check_output(run_command) + return + +def analyze_and_test_data(temp_dir_path): + """ + Analyzes the data and checks that is consistent against the HH analytical result. + + Args: + temp_dir_path(`str`): path of the folder were to output the data. + """ + # Analyze the data + run_command=[sys.executable, analysis_path, "--data_folder", temp_dir_path] + subprocess.check_output(run_command) + analyzed_data=pd.read_csv(temp_dir_path + "/analyzed_data.csv", header=[0, 1]) + # Produce the ideal data + run_command=[sys.executable, ideal_path, "--output", temp_dir_path, "--mode", "store_HH"] + subprocess.check_output(run_command) + HH_data=pd.read_csv(temp_dir_path + "/HH_data.csv") + np.testing.assert_allclose(np.sort(analyzed_data["mean"]["charge"].to_numpy()), + np.sort(HH_data["Z_HH"].to_numpy()), + rtol=rtol, + atol=atol) + +class Test(ut.TestCase): + def test_peptide(self): + temp_dir=tempfile.TemporaryDirectory() + N_cases=len(test_pH_values) + with multiprocessing.Pool(processes=2) as pool: + pool.starmap(kernel, zip(test_pH_values,[temp_dir.name]*N_cases), chunksize=2)[0] + analyze_and_test_data(temp_dir_path=temp_dir.name) + temp_dir.cleanup() + +if __name__ == "__main__": + ut.main() \ No newline at end of file