This demo shows how to use Map and Reduce to count the total number of atoms in PDB
In [1]:
from pyspark import SparkConf, SparkContext
from mmtfPyspark.io import mmtfReader
In [2]:
conf = SparkConf().setMaster("local[*]") \
.setAppName("MapReduceExample")
sc = SparkContext(conf = conf)
In [3]:
path = "../../resources/mmtf_full_sample/"
pdb = mmtfReader.read_sequence_file(path, sc)
In [4]:
numAtoms = pdb.map(lambda t: t[1].num_atoms).reduce(lambda a,b: a+b)
print(f"Total number of atoms in PDB: {numAtoms}")
Total number of atoms in PDB: 29059439
In [5]:
sc.stop()