Source code for mmtfPyspark.datasets.groupInteractionExtractor

#!/user/bin/env python
'''groupInteractionExtractor.py:

Creates a dataset of interactions of a specified group within
a cutoff distance. Groups are specified by there
Chemical Component identifier (residue name), e.g., "ZN", "ATP".

'''
__author__ = "Mars (Shih-Cheng) Huang"
__maintainer__ = "Mars (Shih-Cheng) Huang"
__email__ = "marshuang80@gmail.com"
__version__ = "0.2.0"
__status__ = "Obsolete"

from mmtfPyspark.ml import pythonRDDToDataset
from mmtfPyspark.utils import StructureToAllInteractions

[docs]class groupInteractionExtractor(object): '''Class that creates a dataset of interactions of a specifed gorup within a cutoff distance Attributes ---------- groupName : str name of the group to be analyzed distance : float cutoff distance ''' def __init__(self, groupName, distance): self.groupName = groupName self.distance = distance
[docs] def get_dataset(self, structures): '''Returns a dataset of residues that interact with specified group within a specified cutoff distance Parameters ---------- structure : pythonRdd a set of PDB structures Returns ------- dataset dataset with interacting residue and atom information ''' # create a list of all residues with a threshold distance rows = structures.flatMap(StructureToAllInteractions(self.groupName, self.distance)) # convert to a dataset colNames = ["structureId", "residue1", "atom1", "element1", "index1", "residue2", "atom2", "element2", "index2", "distance"] return pythonRDDToDataset.get_dataset(rows, colNames)