From c48c0f6bf88c7c5bff37555e8b4745b93bfaa7f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Thu, 29 Aug 2024 19:21:11 +0200 Subject: [PATCH] Use Python multiprocessing --- requirements.txt | 4 +- testsuite/CTestTestfile.cmake | 28 ++--- testsuite/globular_protein_tests.py | 101 ++++++++---------- testsuite/henderson_hasselbalch_tests.py | 87 ++++++--------- testsuite/peptide_tests.py | 101 ++++++++---------- .../weak_polyelectrolyte_dialysis_test.py | 69 +++++++----- 6 files changed, 178 insertions(+), 212 deletions(-) diff --git a/requirements.txt b/requirements.txt index 8e0efd6..f07ecbb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,5 +5,7 @@ pint-pandas>=0.3 biopandas==0.5.1.dev0 scipy>=1.8.0 matplotlib>=3.5.1 +# soft dependencies to run the samples tqdm>=4.57.0 -cmake>=3.22.1 +# soft dependencies to run the testsuite +cmake>=3.22.1 # for CTest diff --git a/testsuite/CTestTestfile.cmake b/testsuite/CTestTestfile.cmake index 316b168..166c796 100644 --- a/testsuite/CTestTestfile.cmake +++ b/testsuite/CTestTestfile.cmake @@ -25,19 +25,27 @@ cmake_path(GET CMAKE_CURRENT_SOURCE_FILE PARENT_PATH CMAKE_CURRENT_SOURCE_DIR) cmake_path(GET CMAKE_CURRENT_SOURCE_DIR PARENT_PATH CMAKE_SOURCE_DIR) function(pymbe_add_test) - cmake_parse_arguments(TEST "" "PATH;NAME" "LABELS;ARGUMENTS" ${ARGN}) - if(NOT DEFINED TEST_NAME) - cmake_path(GET TEST_PATH STEM TEST_NAME) - set(TEST_NAME ${TEST_NAME} PARENT_SCOPE) - endif() + cmake_parse_arguments(TEST "" "PATH;THREADS" "LABELS" ${ARGN}) + cmake_path(GET TEST_PATH STEM TEST_NAME) if(DEFINED ENV{COVERAGE} AND "$ENV{COVERAGE}" STREQUAL "1") list(APPEND PYTHON_ARGUMENTS "-m" "coverage" "run" "--parallel-mode" "--source=${CMAKE_SOURCE_DIR}") endif() - add_test(${TEST_NAME} "${Python_EXECUTABLE}" ${PYTHON_ARGUMENTS} "${TEST_PATH}" ${TEST_ARGUMENTS}) + add_test(${TEST_NAME} "${Python_EXECUTABLE}" ${PYTHON_ARGUMENTS} "${TEST_PATH}") set_tests_properties(${TEST_NAME} PROPERTIES SKIP_RETURN_CODE 5) set_tests_properties(${TEST_NAME} PROPERTIES LABELS ${TEST_LABELS}) + if(DEFINED TEST_THREADS) + set_tests_properties(${TEST_NAME} PROPERTIES PROCESSORS ${TEST_THREADS}) + endif() endfunction() +# functional tests, e.g. long simulations and ensemble averages +pymbe_add_test(PATH globular_protein_tests.py LABELS long beyer2024 THREADS 2) +pymbe_add_test(PATH peptide_tests.py LABELS long beyer2024 THREADS 2) +pymbe_add_test(PATH weak_polyelectrolyte_dialysis_test.py LABELS long beyer2024) +pymbe_add_test(PATH cph_ideal_tests.py LABELS long) +pymbe_add_test(PATH grxmc_ideal_tests.py LABELS long) +pymbe_add_test(PATH gcmc_tests.py LABELS long) + # unit tests pymbe_add_test(PATH serialization_test.py) pymbe_add_test(PATH lj_tests.py) @@ -58,11 +66,3 @@ pymbe_add_test(PATH charge_number_map_tests.py) pymbe_add_test(PATH generate_coordinates_tests.py) pymbe_add_test(PATH reaction_methods_unit_tests.py) pymbe_add_test(PATH determine_reservoir_concentrations_unit_test.py) - -# functional tests, e.g. long simulations and ensemble averages -pymbe_add_test(PATH globular_protein_tests.py LABELS long) -pymbe_add_test(PATH peptide_tests.py LABELS long) -pymbe_add_test(PATH weak_polyelectrolyte_dialysis_test.py LABELS long) -pymbe_add_test(PATH cph_ideal_tests.py LABELS long) -pymbe_add_test(PATH grxmc_ideal_tests.py LABELS long) -pymbe_add_test(PATH gcmc_tests.py LABELS long) diff --git a/testsuite/globular_protein_tests.py b/testsuite/globular_protein_tests.py index a646c26..b475e06 100644 --- a/testsuite/globular_protein_tests.py +++ b/testsuite/globular_protein_tests.py @@ -18,77 +18,66 @@ from lib import analysis import sys +import pathlib import tempfile import subprocess +import multiprocessing import numpy as np import pandas as pd import unittest as ut -import pathlib -class Test(ut.TestCase): - data_root = pathlib.Path(__file__).parent.resolve() / "globular_protein_tests_data" - samples_root = pathlib.Path(__file__).parent.parent.resolve() / "samples" +root = pathlib.Path(__file__).parent.parent.resolve() +data_root = root / "testsuite" / "globular_protein_tests_data" +script_path = root / "samples" / "Beyer2024" / "globular_protein.py" +test_pH_values = [2, 5, 7] +tasks = ["1beb", "1f6s"] +mode = "test" - def run_protein_test(self, script_path, test_pH_values, protein_pdb, rtol, atol,mode="test"): - """ - Runs a set of tests for a given protein pdb. - Args: - script_path(`str`): Path to the script to run the test. - test_pH_values(`lst`): List of pH values to be tested. - protein_pdb(`str`): PDB code of the protein. - """ - valid_modes=["test","save"] - assert mode in valid_modes, f"Mode {mode} not supported, valid modes: {valid_modes}" +def kernel(protein_pdb): + """ + Runs a set of tests for a given protein pdb. - print(f"Running tests for {protein_pdb}") - with tempfile.TemporaryDirectory() as time_series_path: - for pH in test_pH_values: - print(f"pH = {pH}") - run_command=[sys.executable, script_path, "--pdb", protein_pdb, "--pH", str(pH), - "--path_to_cg", f"parameters/globular_proteins/{protein_pdb}.vtf", - "--mode", "test", "--no_verbose", "--output", time_series_path] - print(subprocess.list2cmdline(run_command)) - subprocess.check_output(run_command) - # Analyze all time series - data=analysis.analyze_time_series(path_to_datafolder=time_series_path, - filename_extension="_time_series.csv") + Args: + protein_pdb(`str`): PDB code of the protein. + """ + with tempfile.TemporaryDirectory() as time_series_path: + for pH in test_pH_values: + print(f"pH = {pH}") + run_command=[sys.executable, script_path, "--pdb", protein_pdb, "--pH", str(pH), + "--path_to_cg", f"parameters/globular_proteins/{protein_pdb}.vtf", + "--mode", "test", "--no_verbose", "--output", time_series_path] + print(subprocess.list2cmdline(run_command)) + subprocess.check_output(run_command) + # Analyze all time series + data=analysis.analyze_time_series(path_to_datafolder=time_series_path, + filename_extension="_time_series.csv") + return (protein_pdb, data) - if mode == "test": - # Get reference test data - ref_data=pd.read_csv(self.data_root / f"{protein_pdb}.csv", header=[0, 1]) - # Check charge - test_charge=np.sort(data["mean","charge"].to_numpy()) - ref_charge=np.sort(ref_data["mean","charge"].to_numpy()) - np.testing.assert_allclose(test_charge, ref_charge, rtol=rtol, atol=atol) - print(f"Test for {protein_pdb} was successful") - else: - assert mode == "save" - # Save data for future testing - data.to_csv(self.data_root / f"{protein_pdb}.csv", index=False) + +class Test(ut.TestCase): def test_globular_protein(self): - script_path = self.samples_root / "Beyer2024" / "globular_protein.py" - test_pH_values=[2,5,7] + with multiprocessing.Pool(processes=2) as pool: + results = dict(pool.map(kernel, tasks, chunksize=1)) + rtol=0.1 # relative tolerance atol=0.5 # absolute tolerance - - # Run test for 1BEB case - protein_pdb = "1beb" - self.run_protein_test(script_path=script_path, - test_pH_values=test_pH_values, - protein_pdb=protein_pdb, - rtol=rtol, - atol=atol) - - # Run test for 1F6S case - protein_pdb = "1f6s" - self.run_protein_test(script_path=script_path, - test_pH_values=test_pH_values, - protein_pdb=protein_pdb, - rtol=rtol, - atol=atol) + for protein_pdb, data in results.items(): + # Save data for future testing + if mode == "save": + data.to_csv(data_root / f"{protein_pdb}.csv", index=False) + continue + assert mode == "test", f"Mode {mode} not supported, valid modes: ['save', 'test']" + with self.subTest(msg=f"Protein {protein_pdb}"): + # Get reference test data + ref_data=pd.read_csv(data_root / f"{protein_pdb}.csv", header=[0, 1]) + # Check charge + test_charge=np.sort(data["mean","charge"].to_numpy()) + ref_charge=np.sort(ref_data["mean","charge"].to_numpy()) + np.testing.assert_allclose( + test_charge, ref_charge, rtol=rtol, atol=atol) if __name__ == "__main__": ut.main() diff --git a/testsuite/henderson_hasselbalch_tests.py b/testsuite/henderson_hasselbalch_tests.py index d9b0bcf..1ae451b 100644 --- a/testsuite/henderson_hasselbalch_tests.py +++ b/testsuite/henderson_hasselbalch_tests.py @@ -50,63 +50,38 @@ def test(self): sequence = sequence2, model = model) + with self.subTest(msg="Check Henderson-Hasselbalch equation"): + pH_range = np.linspace(2, 12, num=200) + Z_HH_1 = pmb.calculate_HH(molecule_name = "peptide_1", + pH_list = pH_range) + Z_HH_2 = pmb.calculate_HH(molecule_name = "peptide_2", + pH_list = pH_range) + + data_path = pmb.get_resource(path=self.data_root) + ref_data_HH = np.loadtxt(f"{data_path}/HH.csv", delimiter=",") + np.testing.assert_allclose(Z_HH_1, ref_data_HH[0,:]) + np.testing.assert_allclose(Z_HH_2, ref_data_HH[1,:]) + + with self.subTest(msg="Check Henderson-Hasselbalch equation + Donnan"): + HH_Donnan_dict = pmb.calculate_HH_Donnan( + c_macro = {"peptide_1": pep1_concentration, + "peptide_2": pep2_concentration}, + c_salt = c_salt, + pH_list = pH_range) + + ref_data_HH_Donnan = np.loadtxt(f"{data_path}/HH_Donnan.csv", delimiter=",") + np.testing.assert_allclose(HH_Donnan_dict["charges_dict"]["peptide_1"], ref_data_HH_Donnan[0,:]) + np.testing.assert_allclose(HH_Donnan_dict["charges_dict"]["peptide_2"], ref_data_HH_Donnan[1,:]) + + with self.subTest(msg="Check HH and HH_Don are consistentn"): + Z_HH_1 = pmb.calculate_HH(molecule_name = "peptide_1", + pH_list = HH_Donnan_dict["pH_system_list"]) + Z_HH_2 = pmb.calculate_HH(molecule_name = "peptide_2", + pH_list = HH_Donnan_dict["pH_system_list"]) + + np.testing.assert_allclose(Z_HH_1, HH_Donnan_dict["charges_dict"]["peptide_1"]) + np.testing.assert_allclose(Z_HH_2, HH_Donnan_dict["charges_dict"]["peptide_2"]) - print("*** Check that Henderson-Hasselbalch equation works correctly ***") - - # Calculate charge according to Henderson-Hasselbalch equation - pH_range = np.linspace(2, 12, num=200) - Z_HH_1 = pmb.calculate_HH(molecule_name = "peptide_1", - pH_list = pH_range) - Z_HH_2 = pmb.calculate_HH(molecule_name = "peptide_2", - pH_list = pH_range) - - """ - with open(self.data_root / "HH.csv", "wb") as f: - np.savetxt(f, np.asarray(Z_HH_1).reshape(1,-1), delimiter=",") - np.savetxt(f, np.asarray(Z_HH_2).reshape(1,-1), delimiter=",") - """ - - data_path = pmb.get_resource(path=self.data_root) - ref_data_HH = np.loadtxt(f"{data_path}/HH.csv", delimiter=",") - np.testing.assert_allclose(Z_HH_1, ref_data_HH[0,:]) - np.testing.assert_allclose(Z_HH_2, ref_data_HH[1,:]) - - print("*** Test passed ***\n") - - - print("*** Check that Henderson-Hasselbalch equation + Donnan works correctly ***") - - HH_Donnan_dict = pmb.calculate_HH_Donnan( - c_macro = {"peptide_1": pep1_concentration, - "peptide_2": pep2_concentration}, - c_salt = c_salt, - pH_list = pH_range) - - """ - with open(self.data_root / "HH_Donnan.csv", "wb") as f: - np.savetxt(f, np.asarray(HH_Donnan_dict["charges_dict"]["peptide_1"]).reshape(1,-1), delimiter=",") - np.savetxt(f, np.asarray(HH_Donnan_dict["charges_dict"]["peptide_2"]).reshape(1,-1), delimiter=",") - """ - - ref_data_HH_Donnan = np.loadtxt(f"{data_path}/HH_Donnan.csv", delimiter=",") - np.testing.assert_allclose(HH_Donnan_dict["charges_dict"]["peptide_1"], ref_data_HH_Donnan[0,:]) - np.testing.assert_allclose(HH_Donnan_dict["charges_dict"]["peptide_2"], ref_data_HH_Donnan[1,:]) - - print("*** Test passed ***\n") - - - print("*** Check that HH and HH_Don are consistent ***") - - Z_HH_1 = pmb.calculate_HH(molecule_name = "peptide_1", - pH_list = HH_Donnan_dict["pH_system_list"]) - Z_HH_2 = pmb.calculate_HH(molecule_name = "peptide_2", - pH_list = HH_Donnan_dict["pH_system_list"]) - - np.testing.assert_allclose(Z_HH_1, HH_Donnan_dict["charges_dict"]["peptide_1"]) - np.testing.assert_allclose(Z_HH_2, HH_Donnan_dict["charges_dict"]["peptide_2"]) - - - print("*** Test passed***") if __name__ == "__main__": ut.main() diff --git a/testsuite/peptide_tests.py b/testsuite/peptide_tests.py index 46acf87..5340476 100644 --- a/testsuite/peptide_tests.py +++ b/testsuite/peptide_tests.py @@ -16,87 +16,72 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -# Import pyMBE and other libraries import sys +import pathlib import tempfile import subprocess -import pyMBE +import multiprocessing from lib import analysis import numpy as np import pandas as pd +import unittest as ut -# Template of the test +root = pathlib.Path(__file__).parent.parent.resolve() +data_root = root / "testsuite" / "peptide_tests_data" +script_path = root / "samples" / "Beyer2024" / "peptide.py" +test_pH_values = [3, 7, 11] +tasks = [ + "K"*5+"D"*5, # K_5-D_5 case + "E"*5+"H"*5, # E_5-H_5 case + "nDSHAKRHHGYKRKFHEKHHSHRGYc", # histatin-5 case, slow simulation +] +mode = "test" -def run_peptide_test(script_path,test_pH_values,sequence,rtol,atol,mode="test"): +def kernel(sequence): """ Runs a set of tests for a given peptide sequence. Args: - script_path(`str`): Path to the script to run the test. - test_pH_values(`lst`): List of pH values to be tested. sequence(`str`): Amino acid sequence of the peptide. """ - valid_modes=["test","save"] - assert mode in valid_modes, f"Mode {mode} not supported, valid modes: {valid_modes}" - - print(f"Running tests for {sequence}") with tempfile.TemporaryDirectory() as time_series_path: for pH in test_pH_values: print(f"pH = {pH}") - run_command=[sys.executable, script_path, "--sequence", sequence, "--pH", str(pH), "--mode", "test", "--no_verbose", "--output", time_series_path] + run_command=[sys.executable, script_path, "--sequence", sequence, + "--pH", str(pH), "--mode", "test", "--no_verbose", + "--output", time_series_path] print(subprocess.list2cmdline(run_command)) subprocess.check_output(run_command) # Analyze all time series data=analysis.analyze_time_series(path_to_datafolder=time_series_path) - data_path=pmb.get_resource(path="testsuite/peptide_tests_data") - if mode == "test": - # Get reference test data - ref_data=pd.read_csv(data_path+f"/{sequence}.csv", header=[0, 1]) - # Check charge - test_charge=np.sort(data["mean","charge"].to_numpy()) - ref_charge=np.sort(ref_data["mean","charge"].to_numpy()) - np.testing.assert_allclose(test_charge, ref_charge, rtol=rtol, atol=atol) - # Check rg - test_rg=np.sort(data["mean","rg"].to_numpy()) - ref_rg=np.sort(ref_data["mean","rg"].to_numpy()) - np.testing.assert_allclose(test_rg, ref_rg, rtol=rtol, atol=atol) - print(f"Test for {sequence} was successful") - elif mode == "save": - # Save data for future testing - data.to_csv(f"{data_path}/{sequence}.csv", index=False) - else: - raise RuntimeError - -# Create an instance of pyMBE library -pmb = pyMBE.pymbe_library(seed=42) + return (sequence, data) -script_path=pmb.get_resource("samples/Beyer2024/peptide.py") -test_pH_values=[3,7,11] -rtol=0.1 # relative tolerance -atol=0.5 # absolute tolerance -# Run test for K_5-D_5 case -sequence="K"*5+"D"*5 +class Test(ut.TestCase): -run_peptide_test(script_path=script_path, - test_pH_values=test_pH_values, - sequence=sequence, - rtol=rtol, - atol=atol) + def test_peptide(self): + with multiprocessing.Pool(processes=2) as pool: + results = dict(pool.map(kernel, tasks, chunksize=2)) -# Run test for E_5-H_5 case -sequence="E"*5+"H"*5 + rtol=0.1 # relative tolerance + atol=0.5 # absolute tolerance + for sequence, data in results.items(): + # Save data for future testing + if mode == "save": + data.to_csv(data_root / f"{sequence}.csv", index=False) + continue + assert mode == "test", f"Mode {mode} not supported, valid modes: ['save', 'test']" + with self.subTest(msg=f"Sequence {sequence}"): + # Get reference test data + ref_data=pd.read_csv(data_root / f"{sequence}.csv", header=[0, 1]) + # Check charge + test_charge=np.sort(data["mean","charge"].to_numpy()) + ref_charge=np.sort(ref_data["mean","charge"].to_numpy()) + np.testing.assert_allclose(test_charge, ref_charge, rtol=rtol, atol=atol) + # Check rg + test_rg=np.sort(data["mean","rg"].to_numpy()) + ref_rg=np.sort(ref_data["mean","rg"].to_numpy()) + np.testing.assert_allclose(test_rg, ref_rg, rtol=rtol, atol=atol) -run_peptide_test(script_path=script_path, - test_pH_values=test_pH_values, - sequence=sequence, - rtol=rtol, - atol=atol) - -# Run test for histatin-5 case -sequence="nDSHAKRHHGYKRKFHEKHHSHRGYc" -run_peptide_test(script_path=script_path, - test_pH_values=test_pH_values, - sequence=sequence, - rtol=rtol, - atol=atol) +if __name__ == "__main__": + ut.main() diff --git a/testsuite/weak_polyelectrolyte_dialysis_test.py b/testsuite/weak_polyelectrolyte_dialysis_test.py index f64bca5..d3a8f73 100644 --- a/testsuite/weak_polyelectrolyte_dialysis_test.py +++ b/testsuite/weak_polyelectrolyte_dialysis_test.py @@ -16,42 +16,57 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -# Import pyMBE and other libraries -import pyMBE from lib import analysis import sys +import pathlib import tempfile import subprocess import numpy as np import pandas as pd +import unittest as ut -# Create an instance of pyMBE library -pmb = pyMBE.pymbe_library(seed=42) +root = pathlib.Path(__file__).parent.parent.resolve() +data_root = root / "testsuite" / "weak_polyelectrolyte_dialysis_test_data" +script_path = root / "samples" / "Beyer2024" / "weak_polyelectrolyte_dialysis.py" -script_path=pmb.get_resource("samples/Beyer2024/weak_polyelectrolyte_dialysis.py") test_pH_values=[3,5,7,9] c_salt_res=0.01 c_mon_sys=0.435 pKa_value=4.0 -rtol=0.1 # relative tolerance -atol=0.05 # absolute tolerance - -print("*** Running test for weak polyelectrolyte dialysis with G-RxMC (interacting). ***") -with tempfile.TemporaryDirectory() as time_series_path: - for pH in test_pH_values: - print(f"pH = {pH}") - run_command=[sys.executable, script_path, "--c_salt_res", str(c_salt_res), "--c_mon_sys", str(c_mon_sys), "--pKa_value", str(pKa_value), "--pH_res", str(pH), "--mode", "test", "--output", time_series_path, "--no_verbose"] - print(subprocess.list2cmdline(run_command)) - subprocess.check_output(run_command) - # Analyze all time series - data=analysis.analyze_time_series(path_to_datafolder=time_series_path, - filename_extension="_time_series.csv") - data_path=pmb.get_resource(path="testsuite/weak_polyelectrolyte_dialysis_test_data") - -# Get reference test data -ref_data=pd.read_csv(f"{data_path}/data.csv", header=[0, 1]) -# Check charge -test_charge=np.sort(data["mean","alpha"].to_numpy()) -ref_charge=np.sort(ref_data["mean","alpha"].to_numpy()) -np.testing.assert_allclose(test_charge, ref_charge, rtol=rtol, atol=atol) -print("*** Test was successful ***") + + +def kernel(): + with tempfile.TemporaryDirectory() as time_series_path: + for pH in test_pH_values: + print(f"pH = {pH}") + run_command=[sys.executable, script_path, "--c_salt_res", str(c_salt_res), + "--c_mon_sys", str(c_mon_sys), "--pKa_value", str(pKa_value), + "--pH_res", str(pH), "--mode", "test", "--output", + time_series_path, "--no_verbose"] + print(subprocess.list2cmdline(run_command)) + subprocess.check_output(run_command) + # Analyze all time series + data=analysis.analyze_time_series(path_to_datafolder=time_series_path, + filename_extension="_time_series.csv") + return data + + +class Test(ut.TestCase): + + def test_polyelectrolyte_dialysis(self): + """ + Test weak polyelectrolyte dialysis with G-RxMC (interacting). + """ + rtol=0.1 # relative tolerance + atol=0.05 # absolute tolerance + data = kernel() + # Get reference test data + ref_data=pd.read_csv(data_root / "data.csv", header=[0, 1]) + # Check charge + test_charge=np.sort(data["mean","alpha"].to_numpy()) + ref_charge=np.sort(ref_data["mean","alpha"].to_numpy()) + np.testing.assert_allclose(test_charge, ref_charge, rtol=rtol, atol=atol) + + +if __name__ == "__main__": + ut.main()