Use Python multiprocessing

pyMBE-dev · Aug 29, 2024 · c48c0f6 · c48c0f6
1 parent c0a5d1b
commit c48c0f6
Show file tree

Hide file tree

Showing 6 changed files with 178 additions and 212 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -5,5 +5,7 @@ pint-pandas>=0.3
 biopandas==0.5.1.dev0
 scipy>=1.8.0
 matplotlib>=3.5.1
+# soft dependencies to run the samples
 tqdm>=4.57.0
-cmake>=3.22.1
+# soft dependencies to run the testsuite
+cmake>=3.22.1 # for CTest
diff --git a/testsuite/CTestTestfile.cmake b/testsuite/CTestTestfile.cmake
@@ -25,19 +25,27 @@ cmake_path(GET CMAKE_CURRENT_SOURCE_FILE PARENT_PATH CMAKE_CURRENT_SOURCE_DIR)
 cmake_path(GET CMAKE_CURRENT_SOURCE_DIR PARENT_PATH CMAKE_SOURCE_DIR)
 
 function(pymbe_add_test)
-  cmake_parse_arguments(TEST "" "PATH;NAME" "LABELS;ARGUMENTS" ${ARGN})
-  if(NOT DEFINED TEST_NAME)
-    cmake_path(GET TEST_PATH STEM TEST_NAME)
-    set(TEST_NAME ${TEST_NAME} PARENT_SCOPE)
-  endif()
+  cmake_parse_arguments(TEST "" "PATH;THREADS" "LABELS" ${ARGN})
+  cmake_path(GET TEST_PATH STEM TEST_NAME)
   if(DEFINED ENV{COVERAGE} AND "$ENV{COVERAGE}" STREQUAL "1")
     list(APPEND PYTHON_ARGUMENTS "-m" "coverage" "run" "--parallel-mode" "--source=${CMAKE_SOURCE_DIR}")
   endif()
-  add_test(${TEST_NAME} "${Python_EXECUTABLE}" ${PYTHON_ARGUMENTS} "${TEST_PATH}" ${TEST_ARGUMENTS})
+  add_test(${TEST_NAME} "${Python_EXECUTABLE}" ${PYTHON_ARGUMENTS} "${TEST_PATH}")
   set_tests_properties(${TEST_NAME} PROPERTIES SKIP_RETURN_CODE 5)
   set_tests_properties(${TEST_NAME} PROPERTIES LABELS ${TEST_LABELS})
+  if(DEFINED TEST_THREADS)
+    set_tests_properties(${TEST_NAME} PROPERTIES PROCESSORS ${TEST_THREADS})
+  endif()
 endfunction()
 
+# functional tests, e.g. long simulations and ensemble averages
+pymbe_add_test(PATH globular_protein_tests.py LABELS long beyer2024 THREADS 2)
+pymbe_add_test(PATH peptide_tests.py LABELS long beyer2024 THREADS 2)
+pymbe_add_test(PATH weak_polyelectrolyte_dialysis_test.py LABELS long beyer2024)
+pymbe_add_test(PATH cph_ideal_tests.py LABELS long)
+pymbe_add_test(PATH grxmc_ideal_tests.py LABELS long)
+pymbe_add_test(PATH gcmc_tests.py LABELS long)
+
 # unit tests
 pymbe_add_test(PATH serialization_test.py)
 pymbe_add_test(PATH lj_tests.py)
@@ -58,11 +66,3 @@ pymbe_add_test(PATH charge_number_map_tests.py)
 pymbe_add_test(PATH generate_coordinates_tests.py)
 pymbe_add_test(PATH reaction_methods_unit_tests.py)
 pymbe_add_test(PATH determine_reservoir_concentrations_unit_test.py)
-
-# functional tests, e.g. long simulations and ensemble averages
-pymbe_add_test(PATH globular_protein_tests.py LABELS long)
-pymbe_add_test(PATH peptide_tests.py LABELS long)
-pymbe_add_test(PATH weak_polyelectrolyte_dialysis_test.py LABELS long)
-pymbe_add_test(PATH cph_ideal_tests.py LABELS long)
-pymbe_add_test(PATH grxmc_ideal_tests.py LABELS long)
-pymbe_add_test(PATH gcmc_tests.py LABELS long)
diff --git a/testsuite/globular_protein_tests.py b/testsuite/globular_protein_tests.py
@@ -18,77 +18,66 @@
 
 from lib import analysis
 import sys
+import pathlib
 import tempfile
 import subprocess
+import multiprocessing
 import numpy as np
 import pandas as pd
 import unittest as ut
-import pathlib
 
 
-class Test(ut.TestCase):
-    data_root = pathlib.Path(__file__).parent.resolve() / "globular_protein_tests_data"
-    samples_root = pathlib.Path(__file__).parent.parent.resolve() / "samples"
+root = pathlib.Path(__file__).parent.parent.resolve()
+data_root = root / "testsuite" / "globular_protein_tests_data"
+script_path = root / "samples" / "Beyer2024" / "globular_protein.py"
+test_pH_values = [2, 5, 7]
+tasks = ["1beb", "1f6s"]
+mode = "test"
 
-    def run_protein_test(self, script_path, test_pH_values, protein_pdb, rtol, atol,mode="test"):
-        """
-        Runs a set of tests for a given protein pdb.
 
-        Args:
-            script_path(`str`): Path to the script to run the test.
-            test_pH_values(`lst`): List of pH values to be tested.
-            protein_pdb(`str`): PDB code of the protein.
-        """
-        valid_modes=["test","save"]
-        assert mode in valid_modes, f"Mode {mode} not supported, valid modes: {valid_modes}"
+def kernel(protein_pdb):
+    """
+    Runs a set of tests for a given protein pdb.
 
-        print(f"Running tests for {protein_pdb}")
-        with tempfile.TemporaryDirectory() as time_series_path:
-            for pH in test_pH_values:
-                print(f"pH = {pH}")
-                run_command=[sys.executable, script_path, "--pdb", protein_pdb, "--pH", str(pH),
-                             "--path_to_cg", f"parameters/globular_proteins/{protein_pdb}.vtf",
-                             "--mode", "test", "--no_verbose", "--output", time_series_path]
-                print(subprocess.list2cmdline(run_command))
-                subprocess.check_output(run_command)
-            # Analyze all time series
-            data=analysis.analyze_time_series(path_to_datafolder=time_series_path,
-                                              filename_extension="_time_series.csv")
+    Args:
+        protein_pdb(`str`): PDB code of the protein.
+    """
+    with tempfile.TemporaryDirectory() as time_series_path:
+        for pH in test_pH_values:
+            print(f"pH = {pH}")
+            run_command=[sys.executable, script_path, "--pdb", protein_pdb, "--pH", str(pH),
+                         "--path_to_cg", f"parameters/globular_proteins/{protein_pdb}.vtf",
+                         "--mode", "test", "--no_verbose", "--output", time_series_path]
+            print(subprocess.list2cmdline(run_command))
+            subprocess.check_output(run_command)
+        # Analyze all time series
+        data=analysis.analyze_time_series(path_to_datafolder=time_series_path,
+                                          filename_extension="_time_series.csv")
+    return (protein_pdb, data)
 
-        if mode == "test":
-            # Get reference test data
-            ref_data=pd.read_csv(self.data_root / f"{protein_pdb}.csv", header=[0, 1])
-            # Check charge
-            test_charge=np.sort(data["mean","charge"].to_numpy())
-            ref_charge=np.sort(ref_data["mean","charge"].to_numpy())
-            np.testing.assert_allclose(test_charge, ref_charge, rtol=rtol, atol=atol)
-            print(f"Test for {protein_pdb} was successful")
-        else:
-            assert mode == "save"
-            # Save data for future testing
-            data.to_csv(self.data_root / f"{protein_pdb}.csv", index=False)
+
+class Test(ut.TestCase):
 
     def test_globular_protein(self):
-        script_path = self.samples_root / "Beyer2024" / "globular_protein.py"
-        test_pH_values=[2,5,7]
+        with multiprocessing.Pool(processes=2) as pool:
+            results = dict(pool.map(kernel, tasks, chunksize=1))
+
         rtol=0.1 # relative tolerance
         atol=0.5 # absolute tolerance
-
-        # Run test for 1BEB case
-        protein_pdb = "1beb"
-        self.run_protein_test(script_path=script_path,
-                              test_pH_values=test_pH_values,
-                              protein_pdb=protein_pdb,
-                              rtol=rtol,
-                              atol=atol)
-
-        # Run test for 1F6S case
-        protein_pdb = "1f6s"
-        self.run_protein_test(script_path=script_path,
-                              test_pH_values=test_pH_values,
-                              protein_pdb=protein_pdb,
-                              rtol=rtol,
-                              atol=atol)
+        for protein_pdb, data in results.items():
+            # Save data for future testing
+            if mode == "save":
+                data.to_csv(data_root / f"{protein_pdb}.csv", index=False)
+                continue
+            assert mode == "test", f"Mode {mode} not supported, valid modes: ['save', 'test']"
+            with self.subTest(msg=f"Protein {protein_pdb}"):
+                # Get reference test data
+                ref_data=pd.read_csv(data_root / f"{protein_pdb}.csv", header=[0, 1])
+                # Check charge
+                test_charge=np.sort(data["mean","charge"].to_numpy())
+                ref_charge=np.sort(ref_data["mean","charge"].to_numpy())
+                np.testing.assert_allclose(
+                    test_charge, ref_charge, rtol=rtol, atol=atol)
 
 if __name__ == "__main__":
     ut.main()
diff --git a/testsuite/henderson_hasselbalch_tests.py b/testsuite/henderson_hasselbalch_tests.py
@@ -50,63 +50,38 @@ def test(self):
                 sequence = sequence2,
                 model = model)
 
+        with self.subTest(msg="Check Henderson-Hasselbalch equation"):
+            pH_range = np.linspace(2, 12, num=200)
+            Z_HH_1 = pmb.calculate_HH(molecule_name = "peptide_1",
+                                      pH_list = pH_range)
+            Z_HH_2 = pmb.calculate_HH(molecule_name = "peptide_2",
+                                      pH_list = pH_range)
+
+            data_path = pmb.get_resource(path=self.data_root)
+            ref_data_HH = np.loadtxt(f"{data_path}/HH.csv", delimiter=",")
+            np.testing.assert_allclose(Z_HH_1, ref_data_HH[0,:])
+            np.testing.assert_allclose(Z_HH_2, ref_data_HH[1,:])
+
+        with self.subTest(msg="Check Henderson-Hasselbalch equation + Donnan"):
+            HH_Donnan_dict = pmb.calculate_HH_Donnan(
+                    c_macro = {"peptide_1": pep1_concentration,
+                               "peptide_2": pep2_concentration},
+                    c_salt = c_salt,
+                    pH_list = pH_range)
+
+            ref_data_HH_Donnan = np.loadtxt(f"{data_path}/HH_Donnan.csv", delimiter=",")
+            np.testing.assert_allclose(HH_Donnan_dict["charges_dict"]["peptide_1"], ref_data_HH_Donnan[0,:])
+            np.testing.assert_allclose(HH_Donnan_dict["charges_dict"]["peptide_2"], ref_data_HH_Donnan[1,:])
+
+        with self.subTest(msg="Check HH and HH_Don are consistentn"):
+            Z_HH_1 = pmb.calculate_HH(molecule_name = "peptide_1",
+                                      pH_list = HH_Donnan_dict["pH_system_list"])
+            Z_HH_2 = pmb.calculate_HH(molecule_name = "peptide_2",
+                                      pH_list = HH_Donnan_dict["pH_system_list"])
+
+            np.testing.assert_allclose(Z_HH_1, HH_Donnan_dict["charges_dict"]["peptide_1"])
+            np.testing.assert_allclose(Z_HH_2, HH_Donnan_dict["charges_dict"]["peptide_2"])
 
-        print("*** Check that Henderson-Hasselbalch equation works correctly ***")
-
-        # Calculate charge according to Henderson-Hasselbalch equation
-        pH_range = np.linspace(2, 12, num=200)
-        Z_HH_1 = pmb.calculate_HH(molecule_name = "peptide_1",
-                                pH_list = pH_range)
-        Z_HH_2 = pmb.calculate_HH(molecule_name = "peptide_2",
-                                pH_list = pH_range)
-
-        """
-        with open(self.data_root / "HH.csv", "wb") as f:
-            np.savetxt(f, np.asarray(Z_HH_1).reshape(1,-1), delimiter=",")
-            np.savetxt(f, np.asarray(Z_HH_2).reshape(1,-1), delimiter=",")
-        """
-
-        data_path = pmb.get_resource(path=self.data_root)
-        ref_data_HH = np.loadtxt(f"{data_path}/HH.csv", delimiter=",")
-        np.testing.assert_allclose(Z_HH_1, ref_data_HH[0,:])
-        np.testing.assert_allclose(Z_HH_2, ref_data_HH[1,:])
-
-        print("*** Test passed ***\n")
-
-
-        print("*** Check that Henderson-Hasselbalch equation + Donnan works correctly ***")
-
-        HH_Donnan_dict = pmb.calculate_HH_Donnan(
-                c_macro = {"peptide_1": pep1_concentration,
-                           "peptide_2": pep2_concentration},
-                c_salt = c_salt,
-                pH_list = pH_range)
-
-        """
-        with open(self.data_root / "HH_Donnan.csv", "wb") as f:
-            np.savetxt(f, np.asarray(HH_Donnan_dict["charges_dict"]["peptide_1"]).reshape(1,-1), delimiter=",")
-            np.savetxt(f, np.asarray(HH_Donnan_dict["charges_dict"]["peptide_2"]).reshape(1,-1), delimiter=",")
-        """
-
-        ref_data_HH_Donnan = np.loadtxt(f"{data_path}/HH_Donnan.csv", delimiter=",")
-        np.testing.assert_allclose(HH_Donnan_dict["charges_dict"]["peptide_1"], ref_data_HH_Donnan[0,:])
-        np.testing.assert_allclose(HH_Donnan_dict["charges_dict"]["peptide_2"], ref_data_HH_Donnan[1,:])
-
-        print("*** Test passed ***\n")
-
-
-        print("*** Check that HH and HH_Don are consistent ***")
-
-        Z_HH_1 = pmb.calculate_HH(molecule_name = "peptide_1",
-                                pH_list = HH_Donnan_dict["pH_system_list"])
-        Z_HH_2 = pmb.calculate_HH(molecule_name = "peptide_2",
-                                pH_list = HH_Donnan_dict["pH_system_list"])
-
-        np.testing.assert_allclose(Z_HH_1, HH_Donnan_dict["charges_dict"]["peptide_1"])
-        np.testing.assert_allclose(Z_HH_2, HH_Donnan_dict["charges_dict"]["peptide_2"])
-
-
-        print("*** Test passed***")
 
 if __name__ == "__main__":
     ut.main()