In process

uibcdf · May 2, 2024 · 7bcf8a2 · 7bcf8a2
1 parent 21cb8d2
commit 7bcf8a2
Show file tree

Hide file tree

Showing 37 changed files with 2,089 additions and 500 deletions.
diff --git a/molsysmt/attribute/bonds_are_required_to_get_attribute.py b/molsysmt/attribute/bonds_are_required_to_get_attribute.py
@@ -15,6 +15,7 @@
     'entity_id', 
     'entity_type', 
     'entity_name', 
+    'chain_type', 
     'n_entities', 
     'bond_index', 
     'bond_type', 

diff --git a/molsysmt/basic/compare.py b/molsysmt/basic/compare.py
@@ -196,12 +196,14 @@ def compare(molecular_system, molecular_system_2, selection='all', structure_ind
                 if key in atts_to_be_compared:
                     atts_to_be_compared.remove(key)
 
-
     atts_of_A = get_attributes(molecular_system, output_type='list')
     atts_of_B = get_attributes(molecular_system_2, output_type='list')
 
     atts_required = set(atts_to_be_compared) & set(atts_of_A) & set(atts_of_B)
 
+    molecular_system = _piped_molecular_system(molecular_system, atts_required)
+    molecular_system_2 = _piped_molecular_system(molecular_system_2, atts_required)
+
     ######   EQUAL   #####
 
     if rule == 'equal':

diff --git a/molsysmt/basic/get.py b/molsysmt/basic/get.py
@@ -176,6 +176,52 @@ def get(molecular_system,
         else:
             indices = select(molecular_system, element=element, selection=mask, syntax=syntax, skip_digestion=True)
 
+    aux_molecular_system = _piped_molecular_system(molecular_system, in_attributes)
+
+    output = []
+
+    for in_attribute in in_attributes:
+
+        if attributes_filter[in_attribute]:
+
+            dict_indices = {}
+            if element != 'system':
+                if attributes[in_attribute]['runs_on_elements']:
+                    dict_indices['indices'] = indices
+            if attributes[in_attribute]['runs_on_structures']:
+                dict_indices['structure_indices'] = structure_indices
+
+            aux_item, aux_form = where_is_attribute(aux_molecular_system, in_attribute, skip_digestion=True)
+
+            if aux_item is None:
+                result = None
+            else:
+                aux_get = getattr(_dict_modules[aux_form], f'get_{in_attribute}_from_{element}')
+                result = aux_get(aux_item, **dict_indices)
+
+        else:
+
+            result = None
+
+        output.append(result)
+
+    if output_type=='values':
+        if len(output) == 1:
+            return output[0]
+        else:
+            return output
+    elif output_type=='dictionary':
+        return dict(zip(in_attributes, output))
+
+
+def _piped_molecular_system(molecular_system, in_attributes):
+
+    from .. import select, where_is_attribute, get_form, convert
+    from molsysmt.form import _dict_modules
+    from molsysmt.attribute import attributes, bonds_are_required_to_get_attribute
+    from molsysmt.attribute import is_topological_attribute, is_structural_attribute
+
+
     piped_topological_attribute = {}
     piped_structural_attribute = {}
     piped_any_attribute = {}
@@ -260,38 +306,4 @@ def get(molecular_system,
                 aux_molecular_system = convert(molecular_system, to_form='molsysmt.MolSys',
                                                get_missing_bonds=bonds_required_by_attributes, skip_digestion=True)
 
-    output = []
-
-    for in_attribute in in_attributes:
-
-        if attributes_filter[in_attribute]:
-
-            dict_indices = {}
-            if element != 'system':
-                if attributes[in_attribute]['runs_on_elements']:
-                    dict_indices['indices'] = indices
-            if attributes[in_attribute]['runs_on_structures']:
-                dict_indices['structure_indices'] = structure_indices
-
-            aux_item, aux_form = where_is_attribute(aux_molecular_system, in_attribute, skip_digestion=True)
-
-            if aux_item is None:
-                result = None
-            else:
-                aux_get = getattr(_dict_modules[aux_form], f'get_{in_attribute}_from_{element}')
-                result = aux_get(aux_item, **dict_indices)
-
-        else:
-
-            result = None
-
-        output.append(result)
-
-    if output_type=='values':
-        if len(output) == 1:
-            return output[0]
-        else:
-            return output
-    elif output_type=='dictionary':
-        return dict(zip(in_attributes, output))
-
+    return aux_molecular_system
diff --git a/molsysmt/build/get_missing_bonds.py b/molsysmt/build/get_missing_bonds.py
@@ -25,7 +25,7 @@
 
 @digest()
 def get_missing_bonds(molecular_system, threshold='2 angstroms', selection='all',
-                      structure_indices=0, syntax='MolSysMT', engine='MolSysMT',
+                      structure_index=0, syntax='MolSysMT', engine='MolSysMT',
                       sorted=True, skip_digestion=False):
     """
     To be written soon...
@@ -70,20 +70,36 @@ def get_missing_bonds(molecular_system, threshold='2 angstroms', selection='all'
 
                 case 'ion':
 
-                    aux_bonds = _bonds_in_ion(group_name, atom_names, atom_indices, sorted=False)
+                    try:
+                        aux_bonds = _bonds_in_ion(group_name, atom_names, atom_indices, sorted=False)
+                    except:
+                        aux_bonds = _bonds_in_unknown_group(molecular_system, atom_indices, atom_names,
+                                                            structure_index=structure_index, threshold=threshold,
+                                                            sorted=False)
                     bonds += aux_bonds
 
                 case 'amino acid':
 
-                    aux_bonds = _bonds_in_amino_acid(group_name, atom_names, atom_indices, sorted=False)
+                    try:
+                        aux_bonds = _bonds_in_amino_acid(group_name, atom_names, atom_indices, sorted=False)
+                    except:
+                        aux_bonds = _bonds_in_unknown_group(molecular_system, atom_indices, atom_names,
+                                                            structure_index=structure_index, threshold=threshold,
+                                                            sorted=False)
+
                     bonds += aux_bonds
 
                     aux_peptidic_bonds_C[group_index]=atom_indices[atom_names.index('C')]
                     aux_peptidic_bonds_N[group_index]=atom_indices[atom_names.index('N')]
 
                 case 'terminal capping':
 
-                    aux_bonds = _bonds_in_terminal_capping(group_name, atom_names, atom_indices, sorted=False)
+                    try:
+                        aux_bonds = _bonds_in_terminal_capping(group_name, atom_names, atom_indices, sorted=False)
+                    except:
+                        aux_bonds = _bonds_in_unknown_group(molecular_system, atom_indices, atom_names,
+                                                            structure_index=structure_index, threshold=threshold,
+                                                            sorted=False)
                     bonds += aux_bonds
 
                     if is_c_terminal_capping(group_name):
@@ -95,7 +111,12 @@ def get_missing_bonds(molecular_system, threshold='2 angstroms', selection='all'
 
                 case 'small molecule':
 
-                    aux_bonds = _bonds_in_small_molecule(group_name, atom_names, atom_indices, sorted=False)
+                    try:
+                        aux_bonds = _bonds_in_small_molecule(group_name, atom_names, atom_indices, sorted=False)
+                    except:
+                        aux_bonds = _bonds_in_unknown_group(molecular_system, atom_indices, atom_names,
+                                                            structure_index=structure_index, threshold=threshold,
+                                                            sorted=False)
                     bonds += aux_bonds
 
                 case 'saccharide':
@@ -198,4 +219,33 @@ def get_missing_bonds(molecular_system, threshold='2 angstroms', selection='all'
 
     return bonds
 
+def _bonds_in_unknown_group(molecular_system, atom_indices, atom_names, structure_index=0,
+                            threshold='2 angstroms', sorted=False):
+
+    from molsysmt.element.atom import get_atom_type_from_atom_name
+    from molsysmt.structure import get_contacts
+
+    heavy_atoms=[]
+    h_atoms=[]
+
+    for index, name in zip(atom_indices, atom_names):
+        if 'H'==get_atom_type_from_atom_name(name):
+            h_atoms.append(index)
+        else:
+            heavy_atoms.append(index)
+
+    heavy_bonds = get_contacts(molecular_system, selection=heavy_atoms,
+                               structure_indices = structure_index, threshold=threshold,
+                               output_type='pairs', output_indices='atom', pbc=True, skip_digestion=True)[0]
+
+    h_bonds = get_contacts(molecular_system, selection=heavy_atoms, selection_2=h_atoms,
+                           structure_indices = structure_index, threshold=threshold,
+                           output_type='pairs', output_indices='atom', pbc=True, skip_digestion=True)[0]
+
+    bonds = heavy_bonds + h_bonds
+
+    if sorted:
+        bonds = _sorted(bonds)
+
+    return bonds
 
diff --git a/molsysmt/element/group/get_group_type.py b/molsysmt/element/group/get_group_type.py
@@ -1,7 +1,7 @@
 from molsysmt._private.digestion import digest
 from .water import is_water
 from .ion import is_ion
-from .small_molecule import is_small_molecule
+from .small_molecule import is_small_molecule, small_molecule_is_amino_acid
 from .amino_acid import is_amino_acid
 from .terminal_capping import is_terminal_capping
 from .nucleotide import is_nucleotide
@@ -20,12 +20,16 @@ def get_group_type(molecular_system, element='atom', selection='all', redefine_t
 
         if element == 'atom':
 
-            group_names_from_atom = get(molecular_system, element='atom', selection=selection, syntax=syntax,
-                                        group_name=True)
+            group_names_from_atom = get(molecular_system, element='atom', selection=selection,
+                                        syntax=syntax, group_name=True)
             unique_group_names = np.unique(group_names_from_atom)
             aux_dict = {}
             for name in unique_group_names:
-                aux_dict[name] = get_group_type_from_group_name(name)
+                tmp_group_type = get_group_type_from_group_name(name)
+                if tmp_group_type == 'small molecule':
+                    if _small_molecule_is_amino_acid(molecular_system, name):
+                        tmp_group_type = 'amino acid'
+                aux_dict[name] = tmp_group_type
 
             output = [aux_dict[ii] for ii in group_names_from_atom]
 
@@ -36,6 +40,10 @@ def get_group_type(molecular_system, element='atom', selection='all', redefine_t
             unique_group_names = np.unique(group_names_from_group)
             aux_dict = {}
             for name in unique_group_names:
+                tmp_group_type = _get_group_type_from_group_name(name)
+                if tmp_group_type == 'small molecule':
+                    if _small_molecule_is_amino_acid(molecular_system, group_index):
+                        tmp_group_type = 'amino acid'
                 aux_dict[name] = _get_group_type_from_group_name(name)
 
             output = [aux_dict[ii] for ii in group_names_from_group]

diff --git a/molsysmt/element/group/small_molecule/__init__.py b/molsysmt/element/group/small_molecule/__init__.py
@@ -2,4 +2,4 @@
 from .group_names import group_names
 from .get_group_db import get_group_db
 from .get_bonded_atom_pairs import get_bonded_atom_pairs
-
+from .small_molecule_is_amino_acid import small_molecule_is_amino_acid
diff --git a/molsysmt/element/group/small_molecule/small_molecule_is_amino_acid.py b/molsysmt/element/group/small_molecule/small_molecule_is_amino_acid.py
@@ -0,0 +1,26 @@
+import numpy as np
+from .group_names import group_names
+
+def small_molecule_is_amino_acid(molecular_system, group_name):
+
+    output = False
+
+    if group_name in group_names:
+
+        from molsysmt import get
+
+        atom_names = get(molecular_system, element='atom', selection=f'group_name=="{group_name}"', atom_name=True)
+        unique_atom_names = np.unique(atom_names)
+
+        if 'CA' in unique_atom_names:
+            if 'CB' in unique_atom_names:
+                if 'C' in unique_atom_names:
+                    if 'O' in unique_atom_names:
+                        if 'N' in unique_atom_names:
+                            output = True
+                            warning_message = f"Warning! The group name {group_name} is reserved in the Protein Data "
+                            warning_message += "Bank for a small molecule, not for an amino acid."
+                            print(warning_message)
+
+    return output
+
diff --git a/molsysmt/form/file_gro/__init__.py b/molsysmt/form/file_gro/__init__.py
@@ -3,9 +3,9 @@
 form_info = ["Gromacs gro file format",
              "http://manual.gromacs.org/documentation/2018/user-guide/file-formats.html#gro"]
 
-piped_topological_attribute = None
-piped_structural_attribute = None
-piped_any_attribute = None
+piped_topological_attribute = 'molsysmt.MolSys'
+piped_structural_attribute = 'molsysmt.Structure'
+piped_any_attribute = 'molsysmt.MolSys'
 
 from .is_form import is_form
 
@@ -30,6 +30,7 @@
 from .to_molsysmt_MolSysOld import to_molsysmt_MolSysOld
 from .to_molsysmt_TopologyOld import to_molsysmt_TopologyOld
 from .to_molsysmt_StructuresOld import to_molsysmt_StructuresOld
+from .to_molsysmt_GROFileHandler import to_molsysmt_GROFileHandler
 from .to_openmm_Topology import to_openmm_Topology
 from .to_openmm_Modeller import to_openmm_Modeller
 from .to_openmm_GromacsGroFile import to_openmm_GromacsGroFile
@@ -46,6 +47,7 @@
         'molsysmt.MolSysOld': to_molsysmt_MolSysOld,
         'molsysmt.TopologyOld': to_molsysmt_TopologyOld,
         'molsysmt.StructuresOld': to_molsysmt_StructuresOld,
+        'molsysmt.GROFileHandler': to_molsysmt_GROFileHandler,
         'openmm.Topology': to_openmm_Topology,
         'openmm.Modeller': to_openmm_Modeller,
         'openmm.GromacsGroFile': to_openmm_GromacsGroFile,