Source code for mmtfPyspark.mappers.structureToSecondaryStructureElements

#!/user/bin/env python
'''structureToSecondaryStructureElements.py:

Maps chain sequences to its sequence segments.

'''
__author__ = "Mars (Shih-Cheng) Huang"
__maintainer__ = "Mars (Shih-Cheng) Huang"
__email__ = "marshuang80@gmail.com"
__version__ = "0.2.0"
__status__ = "done"
from pyspark.sql import Row


[docs]class StructureToSecondaryStructureElements(object): '''Constructor sets the segment length. Attributes ---------- label : str label of the structure length : int segment length [4] ''' def __init__(self, label, length=4): self.label = label self.length = length def __call__(self, t): sequence = t[1] dsspQ3 = t[6] sequences = [] i = 0 while i < len(sequence): currLength = 0 currSequence = "" for j in range(i, len(sequence)): if dsspQ3[j:j + 1] == self.label: currLength += 1 currSequence += sequence[j: j + 1] else: break i += currLength + 1 if currLength >= self.length: sequences.append(Row(currSequence, self.label)) i += 1 return sequences