From ab716faea5e021dab2ca429803594b86409ccff9 Mon Sep 17 00:00:00 2001
From: blancoapa <apple.pablo@hotmail.com>
Date: Mon, 4 Mar 2024 19:13:50 +0100
Subject: [PATCH] Add CI test for all peptide cases

---
 Makefile                                      |  18 +-
 lib/analysis.py                               |  73 ++++---
 samples/Beyer2024/create_paper_data.py        |  28 +--
 samples/Beyer2024/peptide.py                  |   9 +-
 testsuite/LYS_ASP_peptide.py                  | 193 ------------------
 testsuite/peptide_tests.py                    |  76 +++++++
 testsuite/peptide_tests_data/EEEEEHHHHH.csv   |   5 +
 testsuite/peptide_tests_data/KKKKKDDDDD.csv   |   5 +
 .../nDSHAKRHHGYKRKFHEKHHSHRGYc.csv            |   5 +
 9 files changed, 141 insertions(+), 271 deletions(-)
 delete mode 100644 testsuite/LYS_ASP_peptide.py
 create mode 100644 testsuite/peptide_tests.py
 create mode 100644 testsuite/peptide_tests_data/EEEEEHHHHH.csv
 create mode 100644 testsuite/peptide_tests_data/KKKKKDDDDD.csv
 create mode 100644 testsuite/peptide_tests_data/nDSHAKRHHGYKRKFHEKHHSHRGYc.csv

diff --git a/Makefile b/Makefile
index 308b15c..ee2836a 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,6 @@
 .PHONY: sample
 .PHONY: visual 
 .PHONY: clean
-.PHONY: tests
 .PHONY: testsuite
 .PHONY: docs
 
@@ -12,7 +11,7 @@ docs:
 	pdoc ./pyMBE.py -o ./docs --docformat google 
 
 testsuite:
-	${ESPResSo_build_path}/pypresso testsuite/LYS_ASP_peptide.py
+	${ESPResSo_build_path}/pypresso testsuite/peptide_tests.py
 
 sample:
 	${ESPResSo_build_path}/pypresso sample_scripts/peptide_simulation_example.py
@@ -22,17 +21,4 @@ visual:
 	vmd -e visualization.tcl
 
 tutorial:
-	${ESPResSo_build_path}/ipypresso notebook sugar_tutorial.ipynb
-
-tests_peptide:
-	 ${ESPResSo_build_path}/pypresso tests/LYS_ASP_peptide.py
-	 ${ESPResSo_build_path}/pypresso tests/GLU_HIS_peptide.py
-	 ${ESPResSo_build_path}/pypresso tests/histatin5_peptide.py
-	 
-tests_globular_protein:
-	python3 tests/run_test_protein.py --pdb_code 1beb --run_command "${ESPResSo_build_path}/pypresso sample_scripts/globular_protein.py  --pdb 1beb --path_to_cg reference_parameters/coarse_grained_structures/1beb.vtf"
-	python3 tests/run_test_protein.py --pdb_code 1f6s --run_command "${ESPResSo_build_path}/pypresso  sample_scripts/globular_protein.py  --pdb 1f6s --metal_ion_name Ca --metal_ion_charge 2 --path_to_cg reference_parameters/coarse_grained_structures/1f6s.vtf"
-
-tests:
-	make tests_peptide
-	make tests_globular_protein
+	${ESPResSo_build_path}/ipypresso notebook sugar_tutorial.ipynb
\ No newline at end of file
diff --git a/lib/analysis.py b/lib/analysis.py
index 956688d..f2ed1c5 100644
--- a/lib/analysis.py
+++ b/lib/analysis.py
@@ -24,6 +24,39 @@ def add_data_to_df(df, data_dict, index):
                          index=index)])
     return updated_df
 
+def analyze_time_series(path_to_datafolder):
+    """
+    Analyzes all time series stored in `path_to_datafolder` using the block binning method.
+
+    Args:
+        path_to_datafolder(`str`): path to the folder with the files with the time series
+
+    Returns:
+        (`obj`): pandas dataframe with
+
+    """
+    data=pd.DataFrame()
+    with os.scandir(path_to_datafolder) as subdirectory:
+        # Gather all data
+        for subitem in subdirectory:
+            if subitem.is_file():
+                if 'time_series' in subitem.name:
+                    # Get parameters from the file name
+                    data_dict=get_params_from_dir_name(subitem.name.replace('_time_series.csv', ''))
+                    # Get the observables for binning analysis
+                    time_series_data=read_csv_file(path=f"{path_to_datafolder}/{subitem.name}")
+                    analyzed_data=block_analyze(full_data=time_series_data)
+                    value_list=[]
+                    index_list=[]
+                    for key in data_dict.keys():
+                        value_list.append(data_dict[key])
+                        index_list.append((key,))
+                    analyzed_data = pd.concat([pd.Series(value_list, index=index_list), analyzed_data])
+                    data = add_data_to_df(df=data,
+                                        data_dict=analyzed_data.to_dict(),
+                                        index=[len(data)])   
+    return data
+
 def do_binning_analysis(list_time_series_df, frac_data_to_discard=0.3):
     """
     Does a binning analysis of all Pandas DataFrame objects in `list_time_series_df`.
@@ -77,9 +110,6 @@ def merge_time_series_dfs(list_time_series_df,frac_data_to_discard=0,rescale_tim
             binning_df["time"]+=index*binning_df.shape[0]*dt
         list_time_series_df[index]=binning_df
 
-
-    
-
     # Join all the dataframes for binning analysis
     gathered_binning_df=pd.concat(list_time_series_df)
     
@@ -154,8 +184,6 @@ def split_dataframe(df,n_blocks):
 
 
     # Blocks of size 2 (s2) =  df.shape[0]//n_blocks
-
-    n_blocks_s2= n_blocks - n_blocks_s1
     block_size_s2=df.shape[0]//n_blocks
     blocks+=split_dataframe_in_equal_blocks(df=df,
                                            start_row=n_blocks_s1*block_size_s1,
@@ -173,29 +201,22 @@ def get_time_series_from_average_df(pd_series, label):
     
     Returns:
         (`obj`): PandasDataFrame with the time series and their statistical error
-
-    Authors:
-        - Pablo M. Blanco, Norwegian University of Science and Technology
     """
     import numpy as np
     dist_dict={}
     expected_labels=["mean", "errmean", "nsamples"]
-
     for data_str in pd_series[label]:
         clean_data_str=data_str.strip("{").strip("}")
         for data_set in clean_data_str.split("]"):
             data_list=data_set.split(":")
             if len(data_list) == 1:
-                continue
-            
+                continue            
             # Parse data and label
             data=list(data_list[1][2:].split(","))
             data=np.array([float(x) for x in data])
-            
             clean_label=data_list[0].strip("'").strip(",").strip()
             label_sts=clean_label.split("_")[-1]
-            label_qty=clean_label[:-len(label_sts)-1].strip("'")
-            
+            label_qty=clean_label[:-len(label_sts)-1].strip("'")            
             if label_sts not in expected_labels:
                 raise ValueError(f"Error while parsing the df, found label for stats {label_sts}")
 
@@ -203,10 +224,7 @@ def get_time_series_from_average_df(pd_series, label):
                 dist_dict[label_qty][label_sts]=data
             else:
                 dist_dict[label_qty]={}
-                dist_dict[label_qty][label_sts]=data
-            
-
-    
+                dist_dict[label_qty][label_sts]=data 
     return pd.DataFrame.from_dict(dist_dict)
 
 
@@ -224,13 +242,6 @@ def block_analyze(full_data, n_blocks=16, time_col = "time", equil=0.1,  columns
 
     Returns:
         `result`: pandas dataframe with the mean (mean), statistical error (err_mean), number of effective samples (n_eff) and correlation time (tau_int) of each observable.
-
-    Note:
-        This function should not be modified without consulting first its authors.
-
-    Authors:
-        - Peter Kosovan, Charles University
-        - Pablo M. Blanco, Norwegian University of Science and Technology
     """
 
     dt = get_dt(full_data) # check that the data was stored with the same time interval dt
@@ -255,8 +266,6 @@ def block_analyze(full_data, n_blocks=16, time_col = "time", equil=0.1,  columns
         print(f"n_blocks b = {n_blocks},  block_size k = {block_size}")
 
     # calculate the mean per each block
-
-    series_data = pd.Series(data.iat[0, 0], index=data.columns)
     blocks = split_dataframe(df=data,
                              n_blocks=n_blocks)
 
@@ -271,9 +280,10 @@ def block_analyze(full_data, n_blocks=16, time_col = "time", equil=0.1,  columns
     err_mean = np.sqrt(var_blocks/n_blocks) # standard error of the mean by eq.(37) of Janke
     tau_int = dt*block_size * var_blocks / var_all /2.# eq.(38) of Janke
     n_eff = n_samples / (2*tau_int/dt) # effective number of samples in the whole simulation using eq.(28) of Janke
+
     # concatenate the observables and atribute a key for each (unique index)
-    result = pd.concat( [ mean, err_mean, n_eff, tau_int], keys= [ "mean", "err_mean", "n_eff", "tau_int" ])
-    result = pd.concat( [ pd.Series({"n_blocks":n_blocks,"block_size":block_size}), result] )
+    result = pd.concat( [ mean, err_mean, n_eff, tau_int], keys= [ "mean", "err_mean", "n_eff", "tau_int" ], join="inner")
+    result = pd.concat( [ pd.Series([n_blocks,block_size], index=[('n_blocks',),('block_size',)]), result])
     return result
 
 def get_dt(data):
@@ -373,7 +383,6 @@ def create_histogram_df_from_distribution_list(distribution_list, start, end, nb
         cnt+=5000
     return  pd.DataFrame.from_dict(dict_hist)
 
-
 def find_index_with_value_in_df(df,column_name, value, tol=0.01):
     """
     Finds the index in the pandas DataFrame `df` with a column `column_name` and a row `value`.
@@ -386,7 +395,6 @@ def find_index_with_value_in_df(df,column_name, value, tol=0.01):
 
     Returns:
         index (int): Index found.
-
     """
     index = np.where(abs(df[column_name]-value)/value < tol)
     return index[0]
@@ -403,9 +411,6 @@ def built_output_name(input_dict):
 
     Note:
         The standard formatting rule is parametername1-parametervalue1_parametername2-parametervalue2
-    
-    Authors:
-        - Pablo M. Blanco, Norwegian University of Science and Technology (NTNU) 
     """
     output_name=""
     for label in input_dict:
diff --git a/samples/Beyer2024/create_paper_data.py b/samples/Beyer2024/create_paper_data.py
index da3d453..1779b6e 100644
--- a/samples/Beyer2024/create_paper_data.py
+++ b/samples/Beyer2024/create_paper_data.py
@@ -40,7 +40,6 @@
     raise ValueError(f"Mode {mode} is not currently supported, valid modes are {valid_modes}")
 
 ## Peptide plots (Fig. 6)
-
 labels_fig6=["6a", "6b", "6c"]
 
 if fig_label in labels_fig6:
@@ -54,40 +53,19 @@
     else:
         raise RuntimeError()
     pH_range = np.linspace(2, 12, num=21)
-
     for pH in pH_range:
         run_command=f"python3 {script_path} --sequence {sequence} --pH {pH} --mode {mode}"
         print(run_command)
         os.system(run_command)
 
-# Read all files in the subdir
-data_files=[]
+# Analyze all time series
 time_series_folder_path=pmb.get_resource(f"samples/Beyer2024/time_series")
+data=analysis.analyze_time_series(path_to_datafolder=time_series_folder_path)
 
-data=pd.DataFrame()
-
-with os.scandir(time_series_folder_path) as subdirectory:
-    # Gather all data
-    for subitem in subdirectory:
-        if subitem.is_file():
-            if 'time_series' in subitem.name:
-                # Get parameters from the file name
-                data_dict=analysis.get_params_from_dir_name(subitem.name.replace('_time_series.csv', ''))
-                file_data=pd.DataFrame(data_dict, index=[0])
-                # Get the observables for binning analysis
-                time_series_data=analysis.read_csv_file(path=f"{time_series_folder_path}/{subitem.name}")
-                analyzed_data=analysis.block_analyze(full_data=time_series_data)
-                data_dict.update(analyzed_data.to_dict())
-                data = analysis.add_data_to_df(df=data,
-                                               data_dict=data_dict,
-                                               index=[len(data)])
-                for param in data_dict.keys():
-                    analyzed_data[param]=data_dict[param]
-
+# Store mean values and other statistics
 data_path=pmb.get_resource("samples/Beyer2024/")+"data"
 if not os.path.exists(data_path):
     os.makedirs(data_path)
-
 data.to_csv(f"{data_path}/fig{fig_label}.csv")
 
 # Plot the data
diff --git a/samples/Beyer2024/peptide.py b/samples/Beyer2024/peptide.py
index 4c0e804..1fef3a7 100644
--- a/samples/Beyer2024/peptide.py
+++ b/samples/Beyer2024/peptide.py
@@ -19,6 +19,7 @@
 # Create an instance of pyMBE library
 pmb = pyMBE.pymbe_library()
 
+valid_modes=["short-run","long-run", "test"]
 parser = argparse.ArgumentParser(description='Script to run the peptide test cases for pyMBE')
 parser.add_argument('--sequence', 
                     type=str, 
@@ -35,7 +36,7 @@
 parser.add_argument('--mode', 
                     type=str, 
                     default= "short-run",  
-                    help='sets for how long the simulation runs, valid modes are "short-run" and "long-run"')
+                    help='sets for how long the simulation runs, valid modes are {valid_modes}')
 args = parser.parse_args()
 
 # Inputs
@@ -46,7 +47,6 @@
 
 mode=args.mode
 
-valid_modes=["short-run","long-run"]
 if mode not in valid_modes:
     raise ValueError(f"Mode {mode} is not currently supported, valid modes are {valid_modes}")
 
@@ -55,9 +55,12 @@
     Nsamples = 1000
     MD_steps_per_sample = 1000
 
-if mode == "long-run":
+elif mode == "long-run":
     Nsamples = 5000
     MD_steps_per_sample = 5000
+elif mode == "test":
+    Nsamples = 500
+    MD_steps_per_sample = 700
 
 SEED = 100
 dt = 0.01
diff --git a/testsuite/LYS_ASP_peptide.py b/testsuite/LYS_ASP_peptide.py
deleted file mode 100644
index 7eb42fc..0000000
--- a/testsuite/LYS_ASP_peptide.py
+++ /dev/null
@@ -1,193 +0,0 @@
-# Load espresso, pyMBE and other necessary libraries
-import os
-import sys
-import inspect
-import numpy as np
-import pandas as pd
-from tqdm import tqdm
-import espressomd
-from espressomd import interactions
-from espressomd.io.writer import vtf
-from espressomd import electrostatics 
-
-# Create an instance of pyMBE library
-import pyMBE
-pmb = pyMBE.pymbe_library()
-
-# Load some functions from the handy_scripts library for convinience
-from lib.handy_functions import setup_electrostatic_interactions
-from lib.handy_functions import minimize_espresso_system_energy
-from lib.handy_functions import setup_langevin_dynamics
-from lib.analysis import block_analyze
-
-# The trajectories of the simulations will be stored using espresso built-up functions in separed files in the folder 'frames'
-if not os.path.exists('./frames'):
-    os.makedirs('./frames')
-
-# Simulation parameters
-pmb.set_reduced_units(unit_length=0.4*pmb.units.nm)
-pH_range = np.linspace(2, 12, num=21)
-Samples_per_pH = 36
-MD_steps_per_sample = 50
-steps_eq =int(Samples_per_pH/3)
-N_samples_print = 10 # Write the trajectory every 100 samples
-probability_reaction = 0.5 
-SEED = 1
-dt = 0.01
-solvent_permitivity = 78.3 
-
-L = 25.513*pmb.units.nm
-
-# Peptide parameters
-N_aminoacids = 5
-sequence = 'K'*N_aminoacids+'D'*N_aminoacids
-model = '2beadAA'  # Model with 2 beads per each aminoacid
-pep_concentration = 1e-4 *pmb.units.mol/pmb.units.L
-
-# Solution parameters
-cation_name = 'Na'
-anion_name = 'Cl'
-c_salt = 1e-2 * pmb.units.mol/ pmb.units.L
-
-# Define salt parameters
-
-pmb.define_particle( name=cation_name,  q=1, diameter=0.35*pmb.units.nm, epsilon=1*pmb.units('reduced_energy'))
-pmb.define_particle( name=anion_name,  q=-1, diameter=0.35*pmb.units.nm,  epsilon=1*pmb.units('reduced_energy'))
-
-# Load peptide parametrization from Lunkad, R. et al.  Molecular Systems Design & Engineering (2021), 6(2), 122-131.
-
-pmb.load_interaction_parameters (filename=pmb.get_resource('parameters/peptides/Lunkad2021.txt'))
-pmb.load_pka_set (filename=pmb.get_resource('parameters/pka_sets/CRC1991.txt'))
-
-# Define the peptide on the pyMBE dataframe 
-pmb.define_peptide( name=sequence, sequence=sequence, model=model)
-
-# System parameters
-volume = L**3
-N_peptide_chains = int ( volume * pmb.N_A * pep_concentration)
-L = volume ** (1./3.) # Side of the simulation box
-calculated_peptide_concentration = N_peptide_chains/(volume*pmb.N_A)
-
-# Create an instance of an espresso system
-espresso_system = espressomd.System(box_l=[L.to('reduced_length').magnitude]*3)
-
-# Add all bonds to espresso system
-pmb.add_bonds_to_espresso (espresso_system=espresso_system)
-
-# Create your molecules into the espresso system
-
-pmb.create_pmb_object (name=sequence, number_of_objects= N_peptide_chains,espresso_system=espresso_system)
-
-
-# Create counterions for the peptide chains
-pmb.create_counterions (object_name=sequence,cation_name=cation_name,anion_name=anion_name,espresso_system=espresso_system) 
-c_salt_calculated = pmb.create_added_salt(espresso_system=espresso_system,cation_name=cation_name,anion_name=anion_name,c_salt=c_salt)
-
-
-#List of ionisible groups 
-basic_groups = pmb.df.loc[(~pmb.df['particle_id'].isna()) & (pmb.df['acidity']=='basic')].name.to_list()
-acidic_groups = pmb.df.loc[(~pmb.df['particle_id'].isna()) & (pmb.df['acidity']=='acidic')].name.to_list()
-list_ionisible_groups = basic_groups + acidic_groups
-total_ionisible_groups = len (list_ionisible_groups)
-
-print("The box length of your system is", L.to('reduced_length'), L.to('nm'))
-print('The peptide concentration in your system is ', calculated_peptide_concentration.to('mol/L') , 'with', N_peptide_chains, 'peptides')
-print('The ionisable groups in your peptide are ', list_ionisible_groups)
-
-# Setup the acid-base reactions of the peptide using the constant pH ensemble
-RE, sucessfull_reactions_labels = pmb.setup_cpH(counter_ion=cation_name, constant_pH=2, SEED = SEED)
-print('The acid-base reaction has been sucessfully setup for ', sucessfull_reactions_labels)
-
-# Setup espresso to track the each type defined in type_map
-type_map = pmb.get_type_map()
-types = list (type_map.values())
-espresso_system.setup_type_map( type_list = types)
-
-# Setup the non-interacting type for speeding up the sampling of the reactions
-non_interacting_type = max(type_map.values())+1
-RE.set_non_interacting_type (type=non_interacting_type)
-print('The non interacting type is set to ', non_interacting_type)
-
-# Setup the potential energy
-pmb.setup_lj_interactions(espresso_system=espresso_system)
-minimize_espresso_system_energy (espresso_system=espresso_system)
-setup_electrostatic_interactions(units=pmb.units,
-                                            espresso_system=espresso_system,
-                                            kT=pmb.kT)
-minimize_espresso_system_energy (espresso_system=espresso_system)
-
-
-setup_langevin_dynamics (espresso_system=espresso_system, 
-                                    kT = pmb.kT, 
-                                    SEED = SEED,
-                                    time_step=dt,
-                                    tune_skin=False)
-
-espresso_system.cell_system.skin=0.4
-
-# Save the initial state
-with open('frames/trajectory1.vtf', mode='w+t') as coordinates:
-    vtf.writevsf(espresso_system, coordinates)
-    vtf.writevcf(espresso_system, coordinates)
-
-N_frame=0
-Z_pH=[] # List of the average global charge at each pH
-Rg_pH=[] 
-
-particle_id_list = pmb.get_particle_id_map(object_name=sequence)["all"]
-first_peptide_id = min(particle_id_list)
-
-#Save the pyMBE dataframe in a CSV file
-pmb.df.to_csv('df.csv')
-
-for index in (pbar := tqdm(range(len(pH_range)))):
-    # Sample list inicialization
-    pH_value=pH_range[index]
-    Z_sim=[]
-    Rg_sim=[]
-    Z_groups_time_series=[]       
-    RE.constant_pH = pH_value
-    pbar.set_description(f"pH = {pH_value:2.1f}")
-
-    # Inner loop for sampling each pH value
-    for step in range(Samples_per_pH+steps_eq):
-        
-        if np.random.random() > probability_reaction:
-            espresso_system.integrator.run(steps=MD_steps_per_sample)
-        else:
-            RE.reaction(reaction_steps=total_ionisible_groups)
-
-        if ( step > steps_eq):
-            # Get peptide net charge
-            charge_dict=pmb.calculate_net_charge(espresso_system=espresso_system, 
-                                                molecule_name=sequence)      
-            Z_sim.append(charge_dict["mean"])
-            # Get peptide radius of gyration
-            Rg = espresso_system.analysis.calc_rg(chain_start=first_peptide_id, number_of_chains=N_peptide_chains, chain_length=len(particle_id_list))
-            Rg_value = pmb.units.Quantity(Rg[0], 'reduced_length')
-            Rg_nm = Rg_value.to('nm').magnitude
-            Rg_sim.append(Rg_nm)
-
-        if (step % N_samples_print == 0) :
-
-            N_frame+=1
-            with open('frames/trajectory'+str(N_frame)+'.vtf', mode='w+t') as coordinates:
-                vtf.writevsf(espresso_system, coordinates)
-                vtf.writevcf(espresso_system, coordinates)
-
-    
-    Z_pH.append(np.array(Z_sim))
-    Rg_pH.append(Rg_sim)
-
-
-# Calculate the ideal titration curve of the peptide with Henderson-Hasselbach equation
-Z_HH = pmb.calculate_HH(object_name=sequence,
-                         pH_list=pH_range)
-
-# Load the reference data 
-reference_file_Path = pmb.get_resource("testsuite/data/src/Lys-AspMSDE.csv")
-reference_data = pd.read_csv(reference_file_Path)
-
-Z_ref = N_aminoacids*-1*reference_data['aaa']+N_aminoacids*reference_data['aab']         
-
-#np.testing.assert_allclose(np.copy(av_charge), Z_ref.to_numpy(), atol=2.5, rtol=0.)
diff --git a/testsuite/peptide_tests.py b/testsuite/peptide_tests.py
new file mode 100644
index 0000000..2399692
--- /dev/null
+++ b/testsuite/peptide_tests.py
@@ -0,0 +1,76 @@
+# Import pyMBE and other libraries
+import pyMBE
+from lib import analysis
+import os 
+import numpy as np
+import pandas as pd
+from tqdm import tqdm
+
+# Template of the test
+
+def run_peptide_test(script_path,test_pH_values,sequence,rtol,atol):
+    """
+    Runs a set of tests for a given peptide sequence.
+
+    Args:
+        script_path(`str`): Path to the script to run the test.
+        test_pH_values(`lst`): List of pH values to be tested.
+        sequence(`str`): Amino acid sequence of the peptide.
+    """
+    # Get data folder
+    time_series_folder_path=pmb.get_resource(f"samples/Beyer2024/time_series")
+    # clean up data folder
+    os.system(f"rm {time_series_folder_path}/*")
+    print(f"Running tests for {sequence}")
+    for pH in (pbar := tqdm(test_pH_values)):
+        pbar.set_description(f"pH = {pH}")
+        run_command=f"python3 {script_path} --sequence {sequence} --pH {pH} --mode test"
+        print(run_command)
+        os.system(run_command)
+    # Analyze all time series
+    data=analysis.analyze_time_series(path_to_datafolder=time_series_folder_path)
+    # Get reference test data
+    data_path=pmb.get_resource(path="testsuite/peptide_tests_data")
+    ref_data=pd.read_csv(data_path+f"/{sequence}.csv", header=[0, 1], index_col=0)
+    # Check that the charge and radius of gyration are consistent
+    np.testing.assert_allclose(data["mean","charge"].to_numpy(), 
+                               ref_data["mean","charge"].to_numpy(), 
+                               rtol=rtol, 
+                               atol=atol)
+    np.testing.assert_allclose(data["mean","rg"].to_numpy(), 
+                               ref_data["mean","rg"].to_numpy(), 
+                               rtol=rtol, 
+                               atol=atol)
+    print(f"Test for {sequence} succesful")
+
+# Create an instance of pyMBE library
+pmb = pyMBE.pymbe_library()
+
+script_path=pmb.get_resource(f"samples/Beyer2024/peptide.py")
+test_pH_values=[3,7,11]
+rtol=0.1 # relative tolerance
+atol=0.5 # absolute tolerance
+
+# Run test for K_5-D_5 case
+sequence="K"*5+"D"*5
+run_peptide_test(script_path=script_path,
+                    test_pH_values=test_pH_values,
+                    sequence=sequence,
+                    rtol=rtol,
+                    atol=atol)   
+
+# Run test for E_5-H_5 case
+sequence="E"*5+"H"*5
+run_peptide_test(script_path=script_path,
+                    test_pH_values=test_pH_values,
+                    sequence=sequence,
+                    rtol=rtol,
+                    atol=atol)   
+
+# Run test for histatin-5 case
+sequence="nDSHAKRHHGYKRKFHEKHHSHRGYc"
+run_peptide_test(script_path=script_path,
+                    test_pH_values=test_pH_values,
+                    sequence=sequence,
+                    rtol=rtol,
+                    atol=atol)   
\ No newline at end of file
diff --git a/testsuite/peptide_tests_data/EEEEEHHHHH.csv b/testsuite/peptide_tests_data/EEEEEHHHHH.csv
new file mode 100644
index 0000000..a2c1b9b
--- /dev/null
+++ b/testsuite/peptide_tests_data/EEEEEHHHHH.csv
@@ -0,0 +1,5 @@
+pH,sequence,n_blocks,block_size,mean,mean,err_mean,err_mean,n_eff,n_eff,tau_int,tau_int
+nan,nan,nan,nan,charge,rg,charge,rg,charge,rg,charge,rg
+7,EEEEEHHHHH,16.0,28.125,-3.46,2.2703462218107697,0.08485448511846783,0.011270236960158787,29.626400298799958,123.97658163444471,53.16204412785421,12.704012155120516
+3,EEEEEHHHHH,16.0,28.125,3.6155555555555554,2.2852353902531646,0.0659070291966048,0.00965865997785087,36.01484582149586,171.5617117303853,43.73196564107516,9.18037004963811
+11,EEEEEHHHHH,16.0,28.125,-5.0,2.376694528257385,0.0,0.009075495803215277,,180.56146365212345,,8.722791498127924
diff --git a/testsuite/peptide_tests_data/KKKKKDDDDD.csv b/testsuite/peptide_tests_data/KKKKKDDDDD.csv
new file mode 100644
index 0000000..ad81080
--- /dev/null
+++ b/testsuite/peptide_tests_data/KKKKKDDDDD.csv
@@ -0,0 +1,5 @@
+pH,sequence,n_blocks,block_size,mean,mean,err_mean,err_mean,n_eff,n_eff,tau_int,tau_int
+nan,nan,nan,nan,charge,rg,charge,rg,charge,rg,charge,rg
+11,KKKKKDDDDD,16.0,28.125,-2.4394444444444443,2.2049612765357924,0.0715700444086051,0.012580800325214478,36.920026395901495,97.521880100548,42.65977448513195,16.150221862113444
+7,KKKKKDDDDD,16.0,28.125,0.0,2.0725310670590007,0.0,0.004655049123372929,,485.26077729437475,,3.2456775278971928
+3,KKKKKDDDDD,16.0,28.125,2.7888888888888888,2.2437294343122227,0.10015636535927931,0.009715071564187647,23.660790449758085,160.02409606208622,66.56582346133663,9.842267750871764
diff --git a/testsuite/peptide_tests_data/nDSHAKRHHGYKRKFHEKHHSHRGYc.csv b/testsuite/peptide_tests_data/nDSHAKRHHGYKRKFHEKHHSHRGYc.csv
new file mode 100644
index 0000000..677c803
--- /dev/null
+++ b/testsuite/peptide_tests_data/nDSHAKRHHGYKRKFHEKHHSHRGYc.csv
@@ -0,0 +1,5 @@
+pH,sequence,n_blocks,block_size,mean,mean,err_mean,err_mean,n_eff,n_eff,tau_int,tau_int
+nan,nan,nan,nan,charge,rg,charge,rg,charge,rg,charge,rg
+11,nDSHAKRHHGYKRKFHEKHHSHRGYc,16.0,28.125,-1.191111111111111,4.063464427616705,0.05348160750422651,0.08851343132097834,251.94488535923495,72.1482798903042,6.251367229735849,21.83004227445118
+7,nDSHAKRHHGYKRKFHEKHHSHRGYc,16.0,28.125,4.997777777777777,4.20850730205197,0.045342906001995156,0.11132315378572168,215.56745024938866,55.258072529405915,7.306297858105055,28.50262283752534
+3,nDSHAKRHHGYKRKFHEKHHSHRGYc,16.0,28.125,12.657777777777778,5.2393962478267655,0.060622215668095406,0.11362578274313505,161.98895235804122,60.20523413647287,9.722885277713772,26.160516151537188