Skip to content

Commit

Permalink
In process
Browse files Browse the repository at this point in the history
  • Loading branch information
dprada committed Mar 6, 2024
1 parent 1345101 commit 8d3aab8
Show file tree
Hide file tree
Showing 6 changed files with 253 additions and 67 deletions.
22 changes: 10 additions & 12 deletions molsysmt/build/get_missing_bonds.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,16 @@ def get_missing_bonds(molecular_system, threshold='2 angstroms', selection='all'

form = get_form(molecular_system)

if form=='molsysmt.Topology':
if form in ['molsysmt.MolSys', 'molsysmt.Topology']:

raise ValueError('The molecular system needs to have coordinates')

if form=='molsysmt.MolSys':
if form == 'molsysmt.MolSys':
aux_item = molecular_system.topology
else:
aux_item = molecular_system

aux_output = [np.arange(molecular_system.topology.groups.shape[0]).tolist(),
molecular_system.topology.groups.group_name.tolist(),
molecular_system.topology.groups.group_type.tolist()]
aux_output = [np.arange(aux_item.groups.shape[0]).tolist(),
aux_item.groups.group_name.tolist(),
aux_item.groups.group_type.tolist()]

former_group_index = -1

Expand All @@ -50,7 +51,7 @@ def get_missing_bonds(molecular_system, threshold='2 angstroms', selection='all'
aux_atom_names = []
aux_atom_types = []

for atom in molecular_system.topology.atoms.itertuples():
for atom in aux_item.atoms.itertuples():
if former_group_index != atom.group_index:
if former_group_index != -1:
atom_indices.append(aux_atom_indices)
Expand Down Expand Up @@ -94,10 +95,7 @@ def get_missing_bonds(molecular_system, threshold='2 angstroms', selection='all'

aux_bonds = _bonds_in_amino_acid(group_name, atom_names, atom_indices)

if aux_bonds is None:

print(f'Bonds by distances with {group_name}')
print('>',atom_names)
if aux_bonds is None and with_distances:

aux_bonds_unk_atoms = []

Expand Down
1 change: 1 addition & 0 deletions molsysmt/form/file_gro/to_molsysmt_Topology.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
def to_molsysmt_Topology(item, atom_indices='all', get_missing_bonds=False, skip_digestion=False):

from molsysmt.native import Topology
from molsysmt.build import get_missing_bonds as _get_missing_bonds

with open(item,'r') as fff:

Expand Down
152 changes: 146 additions & 6 deletions molsysmt/form/molsysmt_PDBFileHandler/to_molsysmt_MolSys.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,156 @@
from molsysmt._private.digestion import digest
from molsysmt import pyunitwizard as puw
import numpy as np

@digest(form='molsysmt.PDBFileHandler')
def to_molsysmt_MolSys(item, atom_indices='all', structure_indices='all', skip_digestion=False):
def to_molsysmt_MolSys(item, atom_indices='all', structure_indices='all', get_missing_bonds=False,
skip_digestion=False):

from .to_molsysmt_Topology import to_molsysmt_Topology
from .to_molsysmt_Structures import to_molsysmt_Structures
from molsysmt.native import MolSys
from molsysmt.build import get_missing_bonds as _get_missing_bonds
from molsysmt.pbc import get_box_from_lengths_and_angles

tmp_item = MolSys()
tmp_item.topology = to_molsysmt_Topology(item, atom_indices=atom_indices, skip_digestion=True)
tmp_item.structures = to_molsysmt_Structures(item, atom_indices=atom_indices, structure_indices=structure_indices,
skip_digestion=True)

atom_id_array = []
atom_name_array = []
group_index_array = []
group_id_array = []
group_name_array = []
chain_index_array = []
chain_name_array = []

occupancy_array = []
alternate_location_array = []
coordinates_array = []

group_index = -1
former_group_id = None

chain_index = -1
former_chain_name = None
aux_dict_chain = {}

for atom_record in item.entry.coordinate.model[0].record:

atom_id_array.append(atom_record.serial)
atom_name_array.append(atom_record.name)

occupancy_array.append(atom_record.occupancy)
alternate_location_array.append(atom_record.altLoc)

if former_group_id!=atom_record.resSeq:
group_id_array.append(int(atom_record.resSeq))
group_name_array.append(atom_record.resName)
group_index += 1
former_group_id = atom_record.resSeq

group_index_array.append(group_index)

if former_chain_name!=atom_record.chainId:
if atom_record.chainId in aux_dict_chain:
chain_index = aux_dict_chain[atom_record.chainId]
else:
chain_index += 1
aux_dict_chain[atom_record.chainId]=chain_index
chain_name_array.append(atom_record.chainId)

chain_index_array.append(chain_index)

coordinates_array.append([atom_record.x, atom_record.y, atom_record.z])

atom_id_array = np.array(atom_id_array, dtype=int)
atom_name_array = np.array(atom_name_array, dtype=str)
group_index_array = np.array(group_index_array, dtype=int)
chain_index_array = np.array(chain_index_array, dtype=int)
group_id_array = np.array(group_id_array, dtype=int)
group_name_array = np.array(group_name_array, dtype=str)
chain_name_array = np.array(chain_name_array, dtype=str)

occupancy_array = np.array(occupancy_array, dtype=float)
alternate_location_array = np.array(alternate_location_array, dtype=str)

alt_atom_indices = np.where(alternate_location_array!=' ')[0]
aux_dict = {}

if len(alt_atom_indices):

alt_atom_names = atom_name_array[alt_atom_indices].to_numpy()
alt_group_index = tmp_item.atoms["group_index"][alt_atom_indices].to_numpy()
alt_chain_index = tmp_item.atoms["chain_index"][alt_atom_indices].to_numpy()
for aux_atom_index, aux_atom_name, aux_group_index, aux_chain_index in zip(alt_atom_indices,
alt_atom_names, alt_group_index,
alt_chain_index):
aux_key = tuple([aux_atom_name, aux_group_index, aux_chain_index])
if aux_key in aux_dict:
aux_dict[aux_key].append(aux_atom_index)
else:
aux_dict[aux_key]=[aux_atom_index]

atoms_to_be_removed_with_alt_loc=[]
for same_atoms in aux_dict.values():
alt_occupancy = occupancy[same_atoms]
alt_loc = alternate_location[same_atoms]
if np.allclose(alt_occupancy, alt_occupancy[0]):
chosen = np.where(alt_loc=='A')[0][0]
else:
chosen = np.argmax(alt_occupancy)
chosen = same_atoms.pop(chosen)
atoms_to_be_removed_with_alt_loc += same_atoms

if len(atoms_to_be_removed_with_alt_loc):
atom_id_array = np.delete(atom_id_array, atoms_to_be_removed_with_alt_loc)
atom_name_array = np.delete(atom_name_array, atoms_to_be_removed_with_alt_loc)
group_index_array = np.delete(group_index_array, atoms_to_be_removed_with_alt_loc)
chain_index_array = np.delete(chain_index_array, atoms_to_be_removed_with_alt_loc)
coordinates_array = np.delete(coordinates_array, atoms_to_be_removed_with_alt_loc)

n_atoms = atom_id_array.shape[0]
n_groups = group_name_array.shape[0]
n_chains = chain_name_array.shape[0]

tmp_item.topology.reset_atoms(n_atoms=n_atoms)
tmp_item.topology.reset_groups(n_groups=n_groups)
tmp_item.topology.reset_chains(n_chains=n_chains)

tmp_item.topology.atoms.atom_id = atom_id_array
tmp_item.topology.atoms.atom_name = atom_name_array
tmp_item.topology.atoms.group_index = group_index_array
tmp_item.topology.atoms.chain_index = chain_index_array

tmp_item.topology.rebuild_atoms(redefine_ids=False, redefine_types=True)

tmp_item.topology.groups.group_id = group_id_array
tmp_item.topology.groups.group_name = group_name_array

tmp_item.topology.rebuild_groups(redefine_ids=False, redefine_types=True)

tmp_item.topology.chains.chain_name = chain_name_array

tmp_item.topology.rebuild_chains(redefine_ids=True, redefine_types=False)

cryst1 = item.entry.crystallographic_and_coordinate_transformation.cryst1
box_lengths = puw.quantity([[cryst1.a, cryst1.b, cryst1.c]], 'angstroms')
box_angles = puw.quantity([[cryst1.alpha, cryst1.beta, cryst1.gamma]], 'degrees')
print(box_lengths, box_angles)
box = get_box_from_lengths_and_angles(box_lengths, box_angles)
print(box)

if get_missing_bonds:

bonds = _get_missing_bonds(tmp_item)
bonds = np.array(bonds)
tmp_item.topology.reset_bonds(n_bonds=bonds.shape[0])
tmp_item.topology.bonds.drop(['order', 'type'], axis=1, inplace=True)
tmp_item.topology.bonds.atom1_index=bonds[:,0]
tmp_item.topology.bonds.atom2_index=bonds[:,1]

tmp_item.topology.rebuild_components()
tmp_item.topology.rebuild_molecules()
tmp_item.topology.rebuild_chains(redefine_ids=False, redefine_types=True)
tmp_item.topology.rebuild_entities()

tmp_item = tmp_item.extract(atom_indices=atom_indices, copy_if_all=False, skip_digestion=True)

return tmp_item

49 changes: 47 additions & 2 deletions molsysmt/form/molsysmt_PDBFileHandler/to_molsysmt_Topology.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
def to_molsysmt_Topology(item, atom_indices='all', get_missing_bonds=False, skip_digestion=False):

from molsysmt.native import Topology
from molsysmt.build import get_missing_bonds as _get_missing_bonds

tmp_item = Topology()

Expand All @@ -15,7 +16,8 @@ def to_molsysmt_Topology(item, atom_indices='all', get_missing_bonds=False, skip
group_index_array = []
group_id_array = []
group_name_array = []
chain_id_array = []
chain_index_array = []
chain_name_array = []

occupancy_array = []
alternate_location_array = []
Expand Down Expand Up @@ -51,7 +53,7 @@ def to_molsysmt_Topology(item, atom_indices='all', get_missing_bonds=False, skip
else:
chain_index += 1
aux_dict_chain[atom_record.chainId]=chain_index
chain_id_array.append(atom_record.chainId)
chain_name_array.append(atom_record.chainId)

chain_index_array.append(chain_index)

Expand Down Expand Up @@ -94,7 +96,50 @@ def to_molsysmt_Topology(item, atom_indices='all', get_missing_bonds=False, skip
chosen = same_atoms.pop(chosen)
atoms_to_be_removed_with_alt_loc += same_atoms

atom_id_array = np.delete(atom_id_array, atoms_to_be_removed_with_alt_loc)
atom_name_array = np.delete(atom_name_array, atoms_to_be_removed_with_alt_loc)
group_index_array = np.delete(group_index_array, atoms_to_be_removed_with_alt_loc)
chain_index_array = np.delete(chain_index_array, atoms_to_be_removed_with_alt_loc)

n_atoms = atom_id_array.shape[0]
n_groups = group_name_array.shape[0]
n_chains = chain_name_array.shape[0]

tmp_item.reset_atoms(n_atoms=n_atoms)
tmp_item.reset_groups(n_groups=n_groups)
tmp_item.reset_chains(n_chains=n_chains)

tmp_item.atoms.atom_id = atom_id_array
tmp_item.atoms.atom_name = atom_name_array
tmp_item.atoms.group_index = group_index_array
tmp_item.atoms.chain_index = chain_index_array

tmp_item.rebuild_atoms(redefine_ids=False, redefine_types=True)

tmp_item.groups.group_id = group_id_array
tmp_item.groups.group_name = group_name_array

tmp_item.rebuild_groups(redefine_ids=False, redefine_types=True)

tmp_item.chains.chain_name = chain_name_array

tmp_item.rebuild_chains(redefine_ids=True, redefine_types=False )

if get_missing_bonds:

bonds = _get_missing_bonds(tmp_item, with_distances=False)
bonds = np.array(bonds)
tmp_item.reset_bonds(n_bonds=bonds.shape[0])
tmp_item.bonds.drop(['order', 'type'], axis=1, inplace=True)
tmp_item.bonds.atom1_index=bonds[:,0]
tmp_item.bonds.atom2_index=bonds[:,1]

tmp_item.rebuild_components()
tmp_item.rebuild_molecules()
tmp_item.rebuild_chains(redefine_ids=False, redefine_types=True)
tmp_item.rebuild_entities()

tmp_item = tmp_item.extract(atom_indices=atom_indices, copy_if_all=False, skip_digestion=True)

return tmp_item

15 changes: 2 additions & 13 deletions molsysmt/native/pdb_file_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -900,7 +900,7 @@ def parse_format33(file):
cryst1.alpha = float(line[33:40])
cryst1.beta = float(line[40:47])
cryst1.gamma = float(line[47:54])
cryst1.gamma = line[55:66].strip()
cryst1.sGroup = line[55:66].strip()

counter += 1
line = lines[counter]
Expand Down Expand Up @@ -1183,6 +1183,7 @@ def __init__(self, filename, io_mode='r', closed=False, skip_digestion=False):

self.file = open(filename, "r")
self.format_version = guess_format_version(self.file)
self.load()

else:

Expand All @@ -1205,15 +1206,3 @@ def dump(self):

pass

def get_atoms_with_alternate_locations(self):

output = {}

for model in self.entry.coordinate.model:
output[model.serial]=[]
for record in model.record:
if record.altLoc != ' ':
output[model.serial].append(deepcopy(record))

return output

Loading

0 comments on commit 8d3aab8

Please sign in to comment.