biotite-dev · padix-key · Jul 17, 2024 · Apr 25, 2023 · Apr 25, 2023 · Apr 25, 2023
diff --git a/doc/cli.rst b/doc/cli.rst
@@ -38,8 +38,8 @@ used is specified with ``--model``/``-m``. By default, the first model is used.
 
 The addition of hydrogen atoms requires complete information about the
 bonds between atoms.
-Currently, this information can only be read from *MMTF*, *MOL* and *SDF*
-files.
+Currently, this information can only be read from *PDB*, *MMTF*, *MOL* and
+*SDF* files.
 If bond information is absent, *Hydride* automatically connects
 atoms based on the molecule/residue name and the atom names.
 However, the automatic bond detection only works for molecules in the

diff --git a/pyproject.toml b/pyproject.toml
@@ -44,7 +44,7 @@ hydride = "hydride.cli:main"
 requires = [
     "setuptools >= 0.30",
     "wheel >= 0.30",
-    "biotite >= 0.35",
+    "biotite >= 0.37",
     "oldest-supported-numpy",
     "msgpack >= 0.5.6",
     "cython >= 0.29"

diff --git a/src/hydride/cli.py b/src/hydride/cli.py
@@ -276,9 +276,19 @@ def read_structure(path, format, model_number):
                 f"for the input structure with {model_count} models"
             )
         model = pdb.get_structure(
-            pdb_file, model=model_number, extra_fields=["charge"]
+            pdb_file, model=model_number, extra_fields=["charge"],
+            include_bonds=True
         )
-        model.bonds = struc.connect_via_residue_names(model)
+        # Expect that all ANY bonds are actually SINGLE bonds
+        bond_array = model.bonds.as_array()
+        unknown_order_mask = (bond_array[:,2] == struc.BondType.ANY)
+        if unknown_order_mask.any():
+            warnings.warn(
+                "For some bonds the bond order is unknown, "
+                "hence single bonds are assumed"
+            )
+            bond_array[unknown_order_mask, 2] = struc.BondType.SINGLE
+            model.bonds = struc.BondList(model.array_length(), bond_array)
     elif format == "pdbx" or format == "cif":
         pdbx_file = pdbx.PDBxFile.read(path)
         model_count = pdbx.get_model_count(pdbx_file)

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -110,6 +110,47 @@ def test_simple(input_file, output_file):
     assert_hydrogen_addition(output_file)
 
 
+def test_pdbfile_missing_bond_order():
+    """
+    Test CLI run with PDB as input where some bond orders are missing.
+    """
+    mmtf_file = mmtf.MMTFFile.read(join(data_dir(), f"{PDB_ID}.mmtf"))
+    model = mmtf.get_structure(
+        mmtf_file, model=1, include_bonds=True, extra_fields=["charge"]
+    )
+    model = model[model.element != "H"]
+    # Increase a residue ID gap between each residue, so that
+    # inter-residue bonds need to be read from PDB (-> ANY bonds) and
+    # cannot be determined using 'connect_via_residue_names()'
+    model.res_id *= 2
+
+    input_file = tempfile.NamedTemporaryFile(
+        "w", suffix=".pdb", delete=False
+    )
+    strucio.save_structure(input_file.name, model)
+    input_file.close()
+
+    output_file = tempfile.NamedTemporaryFile(
+        "r", suffix=f".pdb", delete=False
+    )
+    output_file.close()
+
+    with pytest.warns(
+        UserWarning, match="For some bonds the bond order is unknown"
+    ):
+        run_cli([
+            "-v",
+            "-i", input_file.name,
+            "-o", output_file.name,
+            "-c", str(PH)
+        ])
+
+    assert_hydrogen_addition(output_file.name)
+
+    os.remove(input_file.name)
+    os.remove(output_file.name)
+
+
 def test_molfile():
     """
     Test usage of MOL/SDF files for input and output.