This example demonstrates how to filter structures with specified groups (residues). Groups are specified by their one, two or three letter codes e.g. “F”, “MG”, “ATP”.
For full list, please refer to PDB Chemical Component Dictionary
In [1]:
from pyspark import SparkConf, SparkContext
from mmtfPyspark.io import mmtfReader
from mmtfPyspark.filters import ContainsGroup
from mmtfPyspark.structureViewer import view_structure
In [2]:
conf = SparkConf().setMaster("local[*]") \
.setAppName("FilterByGroupsDate")
sc = SparkContext(conf = conf)
In [3]:
path = "../../resources/mmtf_reduced_sample/"
pdb = mmtfReader.read_sequence_file(path, sc)
In [5]:
filtered_structures = pdb.filter(ContainsGroup("ATP","MG"))
print(f"Number of structure with ATP + MG : {filtered_structures.count()}")
Number of structure with ATP + MG : 275
In [12]:
structure_names = filtered_structures.keys().collect()
view_structure(structure_names, style='stick')
Out[12]:
<function mmtfPyspark.structureViewer.view_structure.<locals>.view3d>
In [13]:
sc.stop()