Source code for mmtfPyspark.interactions.atomInteraction

#!/user/bin/env python
'''atomInteraction.py:

AtomInteraction contains interaction information of a central atom with it's
interacting neighbors (coordination sphere). Once this data structure is filled
with interaction centers, this class calculates various geometric properties
such as distance, angles, and order parameters for the interacting atoms.
Finally, it provides methods for creating row-rise representations of the data
in Spark Datasets.

'''
__author__ = "Mars (Shih-Cheng) Huang"
__maintainer__ = "Mars (Shih-Cheng) Huang"
__email__ = "marshuang80@gmail.com"
__version__ = "0.2.0"
__status__ = "done"

from mmtfPyspark.interactions import CoordinateGeometry, InteractionCenter
from pyspark.sql import Row
from pyspark.sql.types import *
import numpy as np


[docs]class AtomInteraction(object): q3 = None q4 = None q5 = None q6 = None distances = None angles = None def __init__(self): self.structure_id = None self.center = None self.neighbors = []
[docs] def set_structure_id(self, structureId): '''Sets the structure identifier Parameters ---------- structureId : str the structure identifier ''' self.structureId = structureId
[docs] def get_structure_id(self): '''Returns the structure identifier Returns ------- str structure identifier ''' return self.structureId
[docs] def set_center(self, center): '''Sets the central atom information of a coordination sphere. Parameters ---------- center : InteractionCenter central atom information ''' self.center = center
[docs] def get_center(self): '''Returns information about the central atom of a coordination sphere Returns ------- list centeral atom information ''' return self.center
[docs] def add_neighbor(self, neighbor): '''Adds a neighbor interaction center. Parameters ---------- neighbor : InteractionCenter an interation with the central atom ''' self.neighbors.append(neighbor)
[docs] def get_interactions(self): '''Returns information about the interacting neighbor atoms. Returns ------- list interaction centers ''' return self.neighbors
[docs] def get_num_interactions(self): '''Returns the number of neighboring atoms that interact with the central atom Returns ------- int number of neighboring atoms that interact with the central atom ''' return len(self.neighbors)
[docs] def calc_coordination_geometry(self, maxInteraction): '''Calculates geometric properties of the coordination sphere. The geometric properties include orientational order parameters that describe the arrangement of the atoms in the coordination sphere, distances and angles of the neighcor atoms with the cnter atom. Parameters ---------- maxInteraction : int maximum number of interaction ''' neighborPoints = [n.get_coordinates() for n in self.neighbors if n.get_coordinates() is not None] geom = CoordinateGeometry( self.center.get_coordinates(), neighborPoints) # calculate distances to the central atom #self.distances = np.empty(maxInteraction) self.distances = [0.0] * maxInteraction for i, dist in enumerate(geom.get_distance()): self.distances[i] = dist # calculate angles among all interacting atoms with the central atom numInteraction = int(maxInteraction * (maxInteraction - 1) / 2) ang = geom.get_angles() self.angles = [np.NaN] * numInteraction self.angles[:len(ang[:numInteraction])] = ang[:numInteraction] # TODO: points or neighbor points if len(neighborPoints) > 2: self.q3 = geom.q3() if len(neighborPoints) > 3: self.q4 = geom.q4() if len(neighborPoints) > 4: self.q5 = geom.q5() if len(neighborPoints) > 5: self.q6 = geom.q6()
[docs] def get_multiple_interactions_as_row(self, maxInteractions): '''Returns interactions and geometric information in a single row Returns ------- int row of itneractions and geometric information ''' while self.get_num_interactions() < maxInteractions: self.neighbors.append(InteractionCenter()) self.length = InteractionCenter.get_length() self.calc_coordination_geometry(maxInteractions) data = [self.structureId, self._get_number_of_polymer_chains(), self.q3, self.q4, self.q5, self.q6] # Copy data for query atom data += self.center.get_as_object() # Copy data for interaction atoms for i, neighbor in enumerate(self.neighbors): data += neighbor.get_as_object() data.append(self.distances[i]) data += self.angles return Row(data)
[docs] def get_pair_interactions_as_rows(self): '''Return rows of pairwise interactions with the central atom Returns ------- list rows of pairwise interactions with the central atom ''' rows = [] length = InteractionCenter.get_length() self.calc_coordination_geometry() for i, neighbor in enumerate(self.neighbors): index = 0 return rows
[docs] def get_schema(self, maxInteractions): '''Returns the schema for a row of atom interaction inforamtion. The schema is used to create a Dataset<Row> from the row information. Parameters ---------- maxInteraction : int maximum number of interactions Returns ------- pyspark.sql.types.StructType schema for dataset ''' sf = [] sf.append(StructField("pdbId", StringType(), False)) sf.append(StructField("polyChains", IntegerType(), False)) sf.append(StructField("q3", FloatType(), True)) sf.append(StructField("q4", FloatType(), True)) sf.append(StructField("q5", FloatType(), True)) sf.append(StructField("q6", FloatType(), True)) # Copy schema for query atom sf += InteractionCenter.get_struct_fields(0) # Copy schema info for interacting atoms and their distances for i in range(maxInteractions): sf += InteractionCenter.get_struct_fields(i + 1) sf.append(StructField(f"distance{i+1}", FloatType(), True)) # Add schema for angles for i in range(maxInteractions - 1): for j in range(i + 1, maxInteractions): sf.append(StructField(f"angle{i+1}-{j+1}", FloatType(), True)) return StructType(sf)
[docs] def get_pair_interaction_schema(self): '''Returns the schema for a row of pairwise atom interactions. The schema is used to create a Dataset<Row> from the row information Returns ------- pyspark.sql.types.StructType schema for dataset ''' sf = [] sf.append(StructField("pdbId", StringType(), False)) # copy schema info for query atom sf += InteractionCenter.get_struct_fields(0) # copy schema infor for interacting atoms and their distnce sf += InteractionCenter.get_struct_fields(1) sf.append(StructField("distance1", FloatType(), True)) return StructType(sf)
def _get_number_of_polymer_chains(self): '''Returns the number of unique polymer chains in the coordination sphere. Returns ------- number of unique polymer chains in the coordination sphere. ''' return len({center.get_chain_name for center in self.neighbors if center.atomName is not None and center.get_sequence_position() >= 0}) def _get_num_columns(self, maxInteractions): '''Returns the number of columns in a Row The number of columns: structureId + polymerChains + q3 + q4 + q5 + q6: 6 query + interaction centers: (maxInteractions + 1) * length distances: maxInteractions angles: maxInteractions * (maxInteractions - 1) / 2 Parameters ---------- maxInteractions : int maximum number of interactions Returns ------- int number of columns in a row ''' numAngles = maxInteractions * (maxInteractions - 1) / 2 return int(6 + (maxInteractions + 1) * self.length + maxInteractions * numAngles) def _set(self, data, value, i): '''Sets the ith index of data to value and increase i by one Parameters ---------- data : list value : int value to increase i : int ith index of data to value ''' data[i] = value i += 1 return data, i