Skip to content

Commit

Permalink
Merge pull request #780 from haddocking/fix_prot_segid
Browse files Browse the repository at this point in the history
Fix prot segid
  • Loading branch information
mgiulini committed Jan 18, 2024
2 parents 4dad36f + a1a5011 commit 1482d85
Show file tree
Hide file tree
Showing 6 changed files with 14 additions and 46 deletions.
8 changes: 0 additions & 8 deletions examples/run_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,14 +153,6 @@ def main(examples, break_on_errors=True):
stderr=sys.stderr,
)

subprocess.run(
"haddock3 docking-protein-protein-test-start-from-cp.cfg --extend-run run2", # noqa: E501
shell=True,
check=break_on_errors,
stdout=sys.stdout,
stderr=sys.stderr,
)

# test exit with extend-run
rmtree("run2", ignore_errors=True)
subprocess.run(
Expand Down
14 changes: 0 additions & 14 deletions src/haddock/gear/prepare_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -1043,22 +1043,8 @@ def populate_topology_molecule_params(topoaa: ParamMap) -> None:
"""Populate topoaa `molX` subdictionaries."""
topoaa_dft = _read_defaults("topoaa.1")

# list of possible prot_segids
uppers = list(string.ascii_uppercase)[::-1]

# removes from the list those prot_segids that are already defined
for param in topoaa:
if param.startswith("mol") and param[3:].isdigit():
with suppress(KeyError):
uppers.remove(topoaa[param]["prot_segid"])

# populates the prot_segids just for those that were not defined
# in the user configuration file. Other parameters are populated as
# well. `prot_segid` is the only one differing per molecule.
for i in range(1, len(topoaa["molecules"]) + 1):
mol = f"mol{i}"
if not (mol in topoaa and "prot_segid" in topoaa[mol]):
topoaa_dft["mol1"]["prot_segid"] = uppers.pop()

topoaa[mol] = recursive_dict_update(
topoaa_dft["mol1"],
Expand Down
6 changes: 6 additions & 0 deletions src/haddock/libs/libcns.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,12 @@ def prepare_cns_input(
libpdb.identify_chainseg(pdb.rel_path, sort=False)

chainsegs = sorted(list(set(segids) | set(chains)))
# check if any of chainsegs is already in chainid_list
if not identifier.endswith("scoring"):
if any(chainseg in chainid_list for chainseg in chainsegs):
raise ValueError(
f"Chain/seg IDs are not unique for pdbs {input_element}."
)
chainid_list.extend(chainsegs)

for i, _chainseg in enumerate(chainid_list, start=1):
Expand Down
5 changes: 5 additions & 0 deletions src/haddock/libs/libpdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,11 @@ def identify_chainseg(pdb_file_path: FilePath,
segids.append(segid)
if chainid:
chains.append(chainid)

if not segid and not chainid:
raise ValueError(
f"Could not identify chainID or segID in pdb {pdb_file_path}, line {line}"
)

if sort:
segids = sorted(list(set(segids)))
Expand Down
10 changes: 0 additions & 10 deletions src/haddock/modules/topology/topoaa/defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -87,16 +87,6 @@ tolerance:
group: module
explevel: expert
mol1:
prot_segid:
default: A
type: string
minchars: 0
maxchars: 4
title: Segment ID
short: Segment ID assigned to this molecule
long: Segment ID assigned to this molecule in CNS. Used to distinguish different molecules
group: molecule
explevel: easy
cyclicpept:
default: false
type: boolean
Expand Down
17 changes: 3 additions & 14 deletions tests/test_gear_prepare_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,6 @@ def test_populate_topoaa_molecules():
}
populate_topology_molecule_params(topoaa)
assert "mol2" in topoaa
assert topoaa["mol2"]["prot_segid"] == "B"
assert topoaa["mol1"]["prot_segid"] == "A"
assert topoaa["mol2"]["cyclicpept"] is False
assert topoaa["mol1"]["cyclicpept"] is True
assert isnan(topoaa["mol2"]["hisd_1"])
Expand All @@ -74,12 +72,10 @@ def test_populate_topoaa_molecules_2():
"""Test mols are polated."""
topoaa = {
"molecules": ["file1.pdb", "file2.pdb"],
"mol2": {"cyclicpept": True, "prot_segid": "D"},
"mol2": {"cyclicpept": True},
}
populate_topology_molecule_params(topoaa)
assert "mol1" in topoaa
assert topoaa["mol1"]["prot_segid"] == "A"
assert topoaa["mol2"]["prot_segid"] == "D"

assert topoaa["mol1"]["cyclicpept"] is False
assert topoaa["mol2"]["cyclicpept"] is True
Expand All @@ -99,27 +95,20 @@ def test_populate_topoaa_molecules_3():
"""Test mols are polated."""
topoaa = {
"molecules": ["file1.pdb", "file2.pdb", "file3.pdb"],
"mol2": {"cyclicpept": True, "prot_segid": "C"},
"mol2": {"cyclicpept": True},
}
populate_topology_molecule_params(topoaa)
assert "mol1" in topoaa
assert topoaa["mol1"]["prot_segid"] == "A"
assert topoaa["mol2"]["prot_segid"] == "C"
assert topoaa["mol3"]["prot_segid"] == "B"


def test_populate_topoaa_molecules_4():
"""Test mols are polated with prot_segid sequence."""
topoaa = {
"molecules": ["file1.pdb", "file2.pdb", "file3.pdb", "file4.pdb"],
"mol3": {"cyclicpept": True, "prot_segid": "A"},
"mol3": {"cyclicpept": True},
}
populate_topology_molecule_params(topoaa)
assert "mol1" in topoaa
assert topoaa["mol1"]["prot_segid"] == "B"
assert topoaa["mol2"]["prot_segid"] == "C"
assert topoaa["mol3"]["prot_segid"] == "A"
assert topoaa["mol4"]["prot_segid"] == "D"


def test_populate_mol_params():
Expand Down

0 comments on commit 1482d85

Please sign in to comment.