Source code for mmtfPyspark.mappers.structureToSecondaryStructureSegments
#!/user/bin/env python
'''structureToSecondaryStructureSegments.py:
Maps chain seuqnce to its sequence segments
'''
__author__ = "Mars (Shih-Cheng) Huang"
__maintainer__ = "Mars (Shih-Cheng) Huang"
__email__ = "marshuang80@gmail.com"
__version__ = "0.2.0"
__status__ = "done"
from pyspark.sql import Row
[docs]class StructureToSecondaryStructureSegments(object):
'''Constructor sets the segment length.
Attributes
----------
length : int
length of the secondary structure segments
'''
def __init__(self, length):
if length % 2 != 1:
print("length has to be an odd number")
self.length = length
def __call__(self, t):
structureChainId = t[0]
sequence = t[1]
dsspQ3 = t[6]
dsspQ8 = t[5]
numSegments = max(0, len(sequence) - self.length)
sequences = []
for i in range(len(sequence) - self.length):
currSeq = sequence[i:i + self.length]
# print(dsspQ3)
labelQ3 = dsspQ3[i + int(self.length / 2): i + int(self.length / 2) + 1]
labelQ8 = dsspQ8[i + int(self.length / 2): i + int(self.length / 2) + 1]
if (labelQ8 != "X" and labelQ3 != "X"):
sequences.append(
Row(structureChainId, currSeq, labelQ8, labelQ3))
return sequences