Source code for mmtfPyspark.interactions.ligandInteractionFingerprint
#!/user/bin/env python
'''ligandInteractionFIngerprint.py
Finds interactions between ligands and polymer chains and maps them onto polymer sequences.
'''
__author__ = "Mars (Shih-Cheng) Huang"
__maintainer__ = "Mars (Shih-Cheng) Huang"
__email__ = "marshuang80@gmail.com"
__version__ = "0.2.0"
__status__ = "done"
from mmtfPyspark.interactions import InteractionFilter, AtomInteraction, InteractionCenter
from mmtfPyspark.utils import ColumnarStructure
from mmtfPyspark.utils import DistanceBox
from pyspark.sql import Row
import numpy as np
[docs]class LigandInteractionFingerprint(object):
def __init__(self, interactionFilter):
self.filter = interactionFilter
def __call__(self, t):
structureId = t[0]
structure = t[1]
return self.get_interactions(structureId, structure)
[docs] def get_interactions(self, structureId, structure):
rows = []
cutoffDistanceSquared = self.filter.get_distance_cutoff() ** 2
arrays = ColumnarStructure(structure, True)
chainNames = arrays.get_chain_names()
groupNames = arrays.get_group_names()
groupNumbers = arrays.get_group_numbers()
atomNames = arrays.get_atom_names()
entityIndices = arrays.get_entity_indices()
elements = arrays.get_elements()
polymer = arrays.is_polymer()
sequenceMapIndices = arrays.get_sequence_positions()
x = arrays.get_x_coords()
y = arrays.get_y_coords()
z = arrays.get_z_coords()
# create a distance box for quick lookup interactions of polymer atoms
# of the specified elements
box = DistanceBox(self.filter.get_distance_cutoff())
for i in range(arrays.get_num_atoms()):
if polymer[i] \
and self.filter.is_target_group(groupNames[i]) \
and self.filter.is_target_atom_name(atomNames[i]) \
and self.filter.is_target_element(elements[i]) \
and not self.filter.is_prohibited_target_group(groupNames[i]):
newPoint = np.array([x[i],y[i],z[i]])
box.add_point(newPoint, i)
groupToAtomIndices = arrays.get_group_to_atom_indices()
for g in range(arrays.get_num_groups()):
# position of first and last atom +1 in group
start = groupToAtomIndices[g]
end = groupToAtomIndices[g+1]
# skip polymer groups
if polymer[start]:
continue
# the specified filter conditions (some groups may be excluded,
# e.g. water)
if self.filter.is_query_group(groupNames[start]):
print(groupNames[start])
# create list of atoms that interact within the cutoff distance
neighbors = []
for a in range(start,end):
if self.filter.is_query_atom_name(atomNames[a]) \
and self.filter.is_query_element(elements[a]):
p = np.array([x[a], y[a], z[a]])
# loop up neighbors that are within a cubic
for j in box.get_neighbors(p):
dx = x[j] - x[a]
dy = y[j] - y[a]
dz = z[j] - z[a]
dSq = dx * dx + dy * dy + dz * dz
if dSq <= cutoffDistanceSquared:
neighbors.append(j)
if len(neighbors) == 0:
continue
interactions2 = {}
for neighbor in neighbors:
if chainNames[neighbor] not in interactions2:
interactions2[chainNames[neighbor]] = []
# keep track of which group is interacting
seqPos = sequenceMapIndices[neighbor]
# non-polymer groups have a negative index and are exlcuded here
if seqPos > 0:
l = [seqPos, groupNumbers[neighbor], entityIndices[neighbor]]
interactions2[chainNames[neighbor]].append(l)
for key, val in interactions2.items():
sequenceIndices = set()
residueNames = set()
sequence = None
for v in val:
sequenceIndices.add(int(v[0]))
residueNames.add(int(v[1]))
if sequence is None:
sequence = structure.entity_list[v[2]]['sequence']
if len(sequenceIndices) > 0:
rows.append(Row(structureId + "." + key, groupNames[start], \
groupNumbers[start], chainNames[start], \
key, sorted(list(residueNames)), \
sorted(list(sequenceIndices)), sequence,\
len(interactions2)))
return rows