Skip to content

Commit

Permalink
layout extracting distances from matrix coordinates
Browse files Browse the repository at this point in the history
  • Loading branch information
menoliu committed Apr 15, 2024
1 parent 706b02d commit ed391ce
Show file tree
Hide file tree
Showing 3 changed files with 158 additions and 31 deletions.
17 changes: 4 additions & 13 deletions src/idpconfgen/cli_complex.py
Original file line number Diff line number Diff line change
Expand Up @@ -733,19 +733,10 @@ def main(
# TODO make a savepoint here (sub-database) for all
# the filtered contacts/distances
# TODO extracting distance distributions from database
# 1. Need a way to use the knowledge-based coordinates from
# `selected_contacts` and see where the exact sequence
# combinations lie (take as many as up to 2 residues on either side).
# - Need to make sure they're within the same cluster of surface
# accessible residues
# - For IDP-IDP complexes, just have up to 2 on either side as well]
# (most of the time it will be 5-mers)
# 2. For custom-contacts we would need to rescan the database for
# residue pairs
# - Allow for single residues to make contact here
# (extend 1 residue on either side)
if folded_structure:
pass
# For custom-contacts we would need to rescan the database for
# residue pairs
# - Make a d_mtx for every custom contact and align it with
# `cus_inter_res` and `cus_intra_res`


if __name__ == "__main__":
Expand Down
96 changes: 78 additions & 18 deletions src/idpconfgen/libs/libcomplex.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
"""Functions that are useful for generating dynamic complexes."""
from collections import Counter
from random import randint
from random import choices, randint

import numpy as np

from idpconfgen import log
from idpconfgen.core.definitions import pk_aa_dict
from idpconfgen.libs.libparse import has_consecutive_match
from idpconfgen.libs.libparse import (
get_string_element,
get_substring_characters,
has_consecutive_match,
)


contact_type = ["intra", "inter"]
Expand Down Expand Up @@ -727,31 +731,87 @@ def update_distance_distribution_matrix(dist, d_mtx):
return d_mtx


def reverse_position_lookup(coords, location_mtx, database):
def get_contact_distances(
coords,
res_combo,
d_mtx,
custom=False,
folded=None
):
"""
Return database entry based on a point in the contacts frequency heatmap.
NOTE this will be heavily remodified given the updated way of database
processing. Essentially this will take in all 3 aligned matrices and
return the distance that we want.
Return the set of Cα distances based on selected coordinates.
Parameters
----------
coords : tuple of int
The location of the contact per the contacts frequency heatmap.
location_mtx : dict of np.ndarray
Keys are the segid as seen in the database. Values are an array
of list of indicies of where the contact is.
res_combo : tuple of int
Residue number combination where the first sequence is
aligned to the Y-axis and second to the X-axis.
database : dict
IDPConformerGenerator database
d_mtx : np.ndarray of dict
Array of distance distributions where each value is a dictionary
of tuple distances and number of counts.
custom : Bool
Custom contact or not?
Defaults to False.
folded : tuple of tuple
First tuple element is the combination of chain and sequence.
Second tuple element is the combination of residues.
Returns
-------
entry : dict
Key-value pairs of the entry for a specific segid in the database.
TODO: this return can be changed to only the torsion angle? or remain
as we need information on the secondary structure as well
distances : tuple of float
Cα distances for selected residues in question
residues : tuple of tuple of int
Residue numbers of the selected distances
"""
return
# Coords should be a tuple of (y, x)
# Coords will be (0, 0) if given a custom contact.
if custom:
x = 0
y = 0
else:
x = coords[1]
y = coords[0]

dist_distribution = d_mtx[x, y]
total_distances = list(dist_distribution.keys())
total_counts = list(dist_distribution.values())
selected_idx = choices(
range(len(total_counts)),
weights=total_counts,
k=1)[0]

distances = total_distances[selected_idx]
nres = len(distances)

if custom:
residues = (coords[0], coords[1])
return distances, residues

if folded:
# For the IDP-Folded contact case
folded_res = res_combo[0]
idp_res = res_combo[1]

fld_sub_seq, _ = get_string_element(folded_res, y, nres)
idp_sub_seq = get_substring_characters(idp_res, x, nres)
# TODO fix what if fld_sub_seq is less than idp_sub_seq?

residues = (fld_sub_seq, idp_sub_seq)
else:
# For the IDP-IDP contact case
seq1 = res_combo[0]
seq2 = res_combo[1]
sub_seq1 = get_substring_characters(seq1, x, nres)
sub_seq2 = get_substring_characters(seq2, y, nres)
assert len(sub_seq1) == len(sub_seq2) == nres

residues = (sub_seq1, sub_seq2)

return distances, residues
76 changes: 76 additions & 0 deletions src/idpconfgen/libs/libparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -693,3 +693,79 @@ def create_coordinate_combinations(data, modifier=0):
coordinates.append((item1 + modifier, item2 + modifier))

return coordinates


def get_string_element(strings, index):
"""
Get the whole string in the list of strings using an index.
Parameters
----------
strings : list of str
List of strings
index : int
Global index relative to the list of strings
Returns
-------
string : str
String of interest
position : int
Relative index in the string that was chosen
Nonetype if no matches were found
"""
flattened_index = 0

for string in strings:
string_length = len(string)
flattened_index += string_length
if index < flattened_index:
position = index - (flattened_index - string_length)
return string, position

return None, None


def get_substring_characters(string, char_position, max_chars):
"""
Get a substring where the middle character is the target.
Parameters
----------
string : str
String of interest, typically expecting a sequence.
char_position : int
Integer position of the AA of interest.
max_chars : int
Maximum number of AA to return surrounding the target.
Returns
-------
substring : str
Substring from string of target that has the char
position and maxes out the number of characters.
"""
half_max_chars = max_chars // 2

start_pos = max(char_position - half_max_chars, 0)
end_pos = min(char_position + half_max_chars + 1, len(string))

if end_pos - start_pos < max_chars:
if start_pos == 0:
end_pos = min(end_pos + (max_chars - (end_pos - start_pos)), len(string)) # noqa: E501
else:
start_pos = max(start_pos - (max_chars - (end_pos - start_pos)), 0)

substring = string[start_pos:end_pos]

# If the string is over maximum allowed characters
# it will only be 1 over so remove the first character
if len(substring) > max_chars:
substring = substring[1:]

return substring

0 comments on commit ed391ce

Please sign in to comment.