This example shows how to filter pdb proteins by X-Ray Diffraction, and store information (protein name, resolution, rFree, rWork) of the results in a list
In [8]:
from pyspark import SparkConf, SparkContext
from mmtfPyspark.filters import ExperimentalMethods
from mmtfPyspark.io import mmtfReader
In [9]:
conf = SparkConf().setMaster("local[*]") \
.setAppName("MapToListDemo")
sc = SparkContext(conf = conf)
In [10]:
path = "../../resources/mmtf_full_sample/"
fraction = 0.001
seed = 123
pdb = mmtfReader.read_sequence_file(path, sc, fraction = fraction, seed = seed)
In [11]:
pdb = pdb.filter(ExperimentalMethods(ExperimentalMethods.X_RAY_DIFFRACTION))
In [12]:
pdb.map(lambda t: [t[0], t[1].resolution, t[1].r_free, t[1].r_work]).collect()
Out[12]:
[['4DOI', 1.5499999523162842, 0.20280000567436218, 0.18170000612735748],
['2QNI', 1.7999999523162842, 0.22066999971866608, 0.19931000471115112],
['3RNQ', 1.600000023841858, 0.20679999887943268, 0.1826999932527542],
['2W6A', 1.399999976158142, 0.19900000095367432, 0.16099999845027924],
['4QKW', 1.7000000476837158, 0.24490000307559967, 0.21230000257492065],
['4CVO', 1.850000023841858, 0.25529998540878296, 0.23639999330043793],
['2V0Z', 2.200000047683716, 0.26600000262260437, 0.20499999821186066],
['1MNN', 1.399999976158142, 0.20635999739170074, 0.19483999907970428],
['3CLO', 2.0399999618530273, 0.21699999272823334, 0.18700000643730164],
['1GVP', 1.600000023841858, 0.2879999876022339, 0.20900000631809235]]
In [13]:
sc.stop()