Map To List Demo

This example shows how to filter pdb proteins by X-Ray Diffraction, and store information (protein name, resolution, rFree, rWork) of the results in a list

Imports

In [8]:
from pyspark import SparkConf, SparkContext
from mmtfPyspark.filters import ExperimentalMethods
from mmtfPyspark.io import mmtfReader

Configure Spark

In [9]:
conf = SparkConf().setMaster("local[*]") \
                  .setAppName("MapToListDemo")
sc = SparkContext(conf = conf)

Read in MMTF Files and sample a small fraction

In [10]:
path = "../../resources/mmtf_full_sample/"
fraction = 0.001
seed = 123

pdb = mmtfReader.read_sequence_file(path, sc, fraction = fraction, seed = seed)

Filter by X-Ray Diffraction experimental method

In [11]:
pdb = pdb.filter(ExperimentalMethods(ExperimentalMethods.X_RAY_DIFFRACTION))

Map results to a list of information, and print each list

In [12]:
pdb.map(lambda t: [t[0], t[1].resolution, t[1].r_free, t[1].r_work]).collect()
Out[12]:
[['4DOI', 1.5499999523162842, 0.20280000567436218, 0.18170000612735748],
 ['2QNI', 1.7999999523162842, 0.22066999971866608, 0.19931000471115112],
 ['3RNQ', 1.600000023841858, 0.20679999887943268, 0.1826999932527542],
 ['2W6A', 1.399999976158142, 0.19900000095367432, 0.16099999845027924],
 ['4QKW', 1.7000000476837158, 0.24490000307559967, 0.21230000257492065],
 ['4CVO', 1.850000023841858, 0.25529998540878296, 0.23639999330043793],
 ['2V0Z', 2.200000047683716, 0.26600000262260437, 0.20499999821186066],
 ['1MNN', 1.399999976158142, 0.20635999739170074, 0.19483999907970428],
 ['3CLO', 2.0399999618530273, 0.21699999272823334, 0.18700000643730164],
 ['1GVP', 1.600000023841858, 0.2879999876022339, 0.20900000631809235]]

Terminate Spark

In [13]:
sc.stop()