Skip to content

Commit

Permalink
Use Python multiprocessing
Browse files Browse the repository at this point in the history
  • Loading branch information
jngrad committed Aug 29, 2024
1 parent c0a5d1b commit c48c0f6
Show file tree
Hide file tree
Showing 6 changed files with 178 additions and 212 deletions.
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,7 @@ pint-pandas>=0.3
biopandas==0.5.1.dev0
scipy>=1.8.0
matplotlib>=3.5.1
# soft dependencies to run the samples
tqdm>=4.57.0
cmake>=3.22.1
# soft dependencies to run the testsuite
cmake>=3.22.1 # for CTest
28 changes: 14 additions & 14 deletions testsuite/CTestTestfile.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,27 @@ cmake_path(GET CMAKE_CURRENT_SOURCE_FILE PARENT_PATH CMAKE_CURRENT_SOURCE_DIR)
cmake_path(GET CMAKE_CURRENT_SOURCE_DIR PARENT_PATH CMAKE_SOURCE_DIR)

function(pymbe_add_test)
cmake_parse_arguments(TEST "" "PATH;NAME" "LABELS;ARGUMENTS" ${ARGN})
if(NOT DEFINED TEST_NAME)
cmake_path(GET TEST_PATH STEM TEST_NAME)
set(TEST_NAME ${TEST_NAME} PARENT_SCOPE)
endif()
cmake_parse_arguments(TEST "" "PATH;THREADS" "LABELS" ${ARGN})
cmake_path(GET TEST_PATH STEM TEST_NAME)
if(DEFINED ENV{COVERAGE} AND "$ENV{COVERAGE}" STREQUAL "1")
list(APPEND PYTHON_ARGUMENTS "-m" "coverage" "run" "--parallel-mode" "--source=${CMAKE_SOURCE_DIR}")
endif()
add_test(${TEST_NAME} "${Python_EXECUTABLE}" ${PYTHON_ARGUMENTS} "${TEST_PATH}" ${TEST_ARGUMENTS})
add_test(${TEST_NAME} "${Python_EXECUTABLE}" ${PYTHON_ARGUMENTS} "${TEST_PATH}")
set_tests_properties(${TEST_NAME} PROPERTIES SKIP_RETURN_CODE 5)
set_tests_properties(${TEST_NAME} PROPERTIES LABELS ${TEST_LABELS})
if(DEFINED TEST_THREADS)
set_tests_properties(${TEST_NAME} PROPERTIES PROCESSORS ${TEST_THREADS})
endif()
endfunction()

# functional tests, e.g. long simulations and ensemble averages
pymbe_add_test(PATH globular_protein_tests.py LABELS long beyer2024 THREADS 2)
pymbe_add_test(PATH peptide_tests.py LABELS long beyer2024 THREADS 2)
pymbe_add_test(PATH weak_polyelectrolyte_dialysis_test.py LABELS long beyer2024)
pymbe_add_test(PATH cph_ideal_tests.py LABELS long)
pymbe_add_test(PATH grxmc_ideal_tests.py LABELS long)
pymbe_add_test(PATH gcmc_tests.py LABELS long)

# unit tests
pymbe_add_test(PATH serialization_test.py)
pymbe_add_test(PATH lj_tests.py)
Expand All @@ -58,11 +66,3 @@ pymbe_add_test(PATH charge_number_map_tests.py)
pymbe_add_test(PATH generate_coordinates_tests.py)
pymbe_add_test(PATH reaction_methods_unit_tests.py)
pymbe_add_test(PATH determine_reservoir_concentrations_unit_test.py)

# functional tests, e.g. long simulations and ensemble averages
pymbe_add_test(PATH globular_protein_tests.py LABELS long)
pymbe_add_test(PATH peptide_tests.py LABELS long)
pymbe_add_test(PATH weak_polyelectrolyte_dialysis_test.py LABELS long)
pymbe_add_test(PATH cph_ideal_tests.py LABELS long)
pymbe_add_test(PATH grxmc_ideal_tests.py LABELS long)
pymbe_add_test(PATH gcmc_tests.py LABELS long)
101 changes: 45 additions & 56 deletions testsuite/globular_protein_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,77 +18,66 @@

from lib import analysis
import sys
import pathlib
import tempfile
import subprocess
import multiprocessing
import numpy as np
import pandas as pd
import unittest as ut
import pathlib


class Test(ut.TestCase):
data_root = pathlib.Path(__file__).parent.resolve() / "globular_protein_tests_data"
samples_root = pathlib.Path(__file__).parent.parent.resolve() / "samples"
root = pathlib.Path(__file__).parent.parent.resolve()
data_root = root / "testsuite" / "globular_protein_tests_data"
script_path = root / "samples" / "Beyer2024" / "globular_protein.py"
test_pH_values = [2, 5, 7]
tasks = ["1beb", "1f6s"]
mode = "test"

def run_protein_test(self, script_path, test_pH_values, protein_pdb, rtol, atol,mode="test"):
"""
Runs a set of tests for a given protein pdb.

Args:
script_path(`str`): Path to the script to run the test.
test_pH_values(`lst`): List of pH values to be tested.
protein_pdb(`str`): PDB code of the protein.
"""
valid_modes=["test","save"]
assert mode in valid_modes, f"Mode {mode} not supported, valid modes: {valid_modes}"
def kernel(protein_pdb):
"""
Runs a set of tests for a given protein pdb.
print(f"Running tests for {protein_pdb}")
with tempfile.TemporaryDirectory() as time_series_path:
for pH in test_pH_values:
print(f"pH = {pH}")
run_command=[sys.executable, script_path, "--pdb", protein_pdb, "--pH", str(pH),
"--path_to_cg", f"parameters/globular_proteins/{protein_pdb}.vtf",
"--mode", "test", "--no_verbose", "--output", time_series_path]
print(subprocess.list2cmdline(run_command))
subprocess.check_output(run_command)
# Analyze all time series
data=analysis.analyze_time_series(path_to_datafolder=time_series_path,
filename_extension="_time_series.csv")
Args:
protein_pdb(`str`): PDB code of the protein.
"""
with tempfile.TemporaryDirectory() as time_series_path:
for pH in test_pH_values:
print(f"pH = {pH}")
run_command=[sys.executable, script_path, "--pdb", protein_pdb, "--pH", str(pH),
"--path_to_cg", f"parameters/globular_proteins/{protein_pdb}.vtf",
"--mode", "test", "--no_verbose", "--output", time_series_path]
print(subprocess.list2cmdline(run_command))
subprocess.check_output(run_command)
# Analyze all time series
data=analysis.analyze_time_series(path_to_datafolder=time_series_path,
filename_extension="_time_series.csv")
return (protein_pdb, data)

if mode == "test":
# Get reference test data
ref_data=pd.read_csv(self.data_root / f"{protein_pdb}.csv", header=[0, 1])
# Check charge
test_charge=np.sort(data["mean","charge"].to_numpy())
ref_charge=np.sort(ref_data["mean","charge"].to_numpy())
np.testing.assert_allclose(test_charge, ref_charge, rtol=rtol, atol=atol)
print(f"Test for {protein_pdb} was successful")
else:
assert mode == "save"
# Save data for future testing
data.to_csv(self.data_root / f"{protein_pdb}.csv", index=False)

class Test(ut.TestCase):

def test_globular_protein(self):
script_path = self.samples_root / "Beyer2024" / "globular_protein.py"
test_pH_values=[2,5,7]
with multiprocessing.Pool(processes=2) as pool:
results = dict(pool.map(kernel, tasks, chunksize=1))

rtol=0.1 # relative tolerance
atol=0.5 # absolute tolerance

# Run test for 1BEB case
protein_pdb = "1beb"
self.run_protein_test(script_path=script_path,
test_pH_values=test_pH_values,
protein_pdb=protein_pdb,
rtol=rtol,
atol=atol)

# Run test for 1F6S case
protein_pdb = "1f6s"
self.run_protein_test(script_path=script_path,
test_pH_values=test_pH_values,
protein_pdb=protein_pdb,
rtol=rtol,
atol=atol)
for protein_pdb, data in results.items():
# Save data for future testing
if mode == "save":
data.to_csv(data_root / f"{protein_pdb}.csv", index=False)
continue
assert mode == "test", f"Mode {mode} not supported, valid modes: ['save', 'test']"
with self.subTest(msg=f"Protein {protein_pdb}"):
# Get reference test data
ref_data=pd.read_csv(data_root / f"{protein_pdb}.csv", header=[0, 1])
# Check charge
test_charge=np.sort(data["mean","charge"].to_numpy())
ref_charge=np.sort(ref_data["mean","charge"].to_numpy())
np.testing.assert_allclose(
test_charge, ref_charge, rtol=rtol, atol=atol)

if __name__ == "__main__":
ut.main()
87 changes: 31 additions & 56 deletions testsuite/henderson_hasselbalch_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,63 +50,38 @@ def test(self):
sequence = sequence2,
model = model)

with self.subTest(msg="Check Henderson-Hasselbalch equation"):
pH_range = np.linspace(2, 12, num=200)
Z_HH_1 = pmb.calculate_HH(molecule_name = "peptide_1",
pH_list = pH_range)
Z_HH_2 = pmb.calculate_HH(molecule_name = "peptide_2",
pH_list = pH_range)

data_path = pmb.get_resource(path=self.data_root)
ref_data_HH = np.loadtxt(f"{data_path}/HH.csv", delimiter=",")
np.testing.assert_allclose(Z_HH_1, ref_data_HH[0,:])
np.testing.assert_allclose(Z_HH_2, ref_data_HH[1,:])

with self.subTest(msg="Check Henderson-Hasselbalch equation + Donnan"):
HH_Donnan_dict = pmb.calculate_HH_Donnan(
c_macro = {"peptide_1": pep1_concentration,
"peptide_2": pep2_concentration},
c_salt = c_salt,
pH_list = pH_range)

ref_data_HH_Donnan = np.loadtxt(f"{data_path}/HH_Donnan.csv", delimiter=",")
np.testing.assert_allclose(HH_Donnan_dict["charges_dict"]["peptide_1"], ref_data_HH_Donnan[0,:])
np.testing.assert_allclose(HH_Donnan_dict["charges_dict"]["peptide_2"], ref_data_HH_Donnan[1,:])

with self.subTest(msg="Check HH and HH_Don are consistentn"):
Z_HH_1 = pmb.calculate_HH(molecule_name = "peptide_1",
pH_list = HH_Donnan_dict["pH_system_list"])
Z_HH_2 = pmb.calculate_HH(molecule_name = "peptide_2",
pH_list = HH_Donnan_dict["pH_system_list"])

np.testing.assert_allclose(Z_HH_1, HH_Donnan_dict["charges_dict"]["peptide_1"])
np.testing.assert_allclose(Z_HH_2, HH_Donnan_dict["charges_dict"]["peptide_2"])

print("*** Check that Henderson-Hasselbalch equation works correctly ***")

# Calculate charge according to Henderson-Hasselbalch equation
pH_range = np.linspace(2, 12, num=200)
Z_HH_1 = pmb.calculate_HH(molecule_name = "peptide_1",
pH_list = pH_range)
Z_HH_2 = pmb.calculate_HH(molecule_name = "peptide_2",
pH_list = pH_range)

"""
with open(self.data_root / "HH.csv", "wb") as f:
np.savetxt(f, np.asarray(Z_HH_1).reshape(1,-1), delimiter=",")
np.savetxt(f, np.asarray(Z_HH_2).reshape(1,-1), delimiter=",")
"""

data_path = pmb.get_resource(path=self.data_root)
ref_data_HH = np.loadtxt(f"{data_path}/HH.csv", delimiter=",")
np.testing.assert_allclose(Z_HH_1, ref_data_HH[0,:])
np.testing.assert_allclose(Z_HH_2, ref_data_HH[1,:])

print("*** Test passed ***\n")


print("*** Check that Henderson-Hasselbalch equation + Donnan works correctly ***")

HH_Donnan_dict = pmb.calculate_HH_Donnan(
c_macro = {"peptide_1": pep1_concentration,
"peptide_2": pep2_concentration},
c_salt = c_salt,
pH_list = pH_range)

"""
with open(self.data_root / "HH_Donnan.csv", "wb") as f:
np.savetxt(f, np.asarray(HH_Donnan_dict["charges_dict"]["peptide_1"]).reshape(1,-1), delimiter=",")
np.savetxt(f, np.asarray(HH_Donnan_dict["charges_dict"]["peptide_2"]).reshape(1,-1), delimiter=",")
"""

ref_data_HH_Donnan = np.loadtxt(f"{data_path}/HH_Donnan.csv", delimiter=",")
np.testing.assert_allclose(HH_Donnan_dict["charges_dict"]["peptide_1"], ref_data_HH_Donnan[0,:])
np.testing.assert_allclose(HH_Donnan_dict["charges_dict"]["peptide_2"], ref_data_HH_Donnan[1,:])

print("*** Test passed ***\n")


print("*** Check that HH and HH_Don are consistent ***")

Z_HH_1 = pmb.calculate_HH(molecule_name = "peptide_1",
pH_list = HH_Donnan_dict["pH_system_list"])
Z_HH_2 = pmb.calculate_HH(molecule_name = "peptide_2",
pH_list = HH_Donnan_dict["pH_system_list"])

np.testing.assert_allclose(Z_HH_1, HH_Donnan_dict["charges_dict"]["peptide_1"])
np.testing.assert_allclose(Z_HH_2, HH_Donnan_dict["charges_dict"]["peptide_2"])


print("*** Test passed***")

if __name__ == "__main__":
ut.main()
Loading

0 comments on commit c48c0f6

Please sign in to comment.