Example demonstrating how to extract protein dimers from PDB entries. This example uses a flatMap function to transform a strucure to its dimers.
In [1]:
from pyspark import SparkConf, SparkContext
from mmtfPyspark.io import mmtfReader
from mmtfPyspark.mappers import StructureToProteinDimers, StructureToBioassembly
In [2]:
conf = SparkConf().setMaster("local[*]") \
.setAppName("MapToProteinDimersDemo")
sc = SparkContext(conf = conf)
In [3]:
protein = mmtfReader.download_mmtf_files(["1STP"], sc)
In [4]:
cutoffDistance = 8.0
contacts = 20
useAllAtoms = False
exclusive = True
dimers = protein.flatMap(StructureToBioassembly()) \
.flatMap(StructureToProteinDimers(cutoffDistance, contacts, useAllAtoms, exclusive))
In [5]:
print(f"Number of structures : {dimers.count()}")
Number of structures : 2
In [6]:
sc.stop()