Not Filter Demo

Example how to wrap a filter in a not filter to negate a filter

Imports

In [1]:
from pyspark import SparkConf, SparkContext
from mmtfPyspark.io import mmtfReader
from mmtfPyspark.filters import ContainsDnaChain, ContainsLProteinChain, NotFilter
from mmtfPyspark.structureViewer import view_structure

Configure Spark

In [6]:
conf = SparkConf().setMaster("local[*]") \
                      .setAppName("notFilterExample")
sc = SparkContext(conf = conf)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-6-77ae329f866f> in <module>()
      1 conf = SparkConf().setMaster("local[*]")                       .setAppName("notFilterExample")
----> 2 sc = SparkContext(conf = conf)

/srv/spark/python/pyspark/context.py in __init__(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, gateway, jsc, profiler_cls)
    113         """
    114         self._callsite = first_spark_call() or CallSite(None, None, None)
--> 115         SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
    116         try:
    117             self._do_init(master, appName, sparkHome, pyFiles, environment, batchSize, serializer,

/srv/spark/python/pyspark/context.py in _ensure_initialized(cls, instance, gateway, conf)
    273                         " created by %s at %s:%s "
    274                         % (currentAppName, currentMaster,
--> 275                             callsite.function, callsite.file, callsite.linenum))
    276                 else:
    277                     SparkContext._active_spark_context = instance

ValueError: Cannot run multiple SparkContexts at once; existing SparkContext(app=notFilterExample, master=local[*]) created by __init__ at <ipython-input-2-77ae329f866f>:2

Read in MMTF Files

In [7]:
path = "../../resources/mmtf_reduced_sample/"

pdb = mmtfReader.read_sequence_file(path, sc)

Filter by contains L Protein Chain

In [8]:
structures = pdb.filter(ContainsLProteinChain())

Using Not filter to reverse a filter

Get entires that does not contain DNA Chains

In [9]:
structures = structures.filter(NotFilter(ContainsDnaChain()))

Count number of entires

In [10]:
count = structures.count()

print(f"PDB entires without DNA chains : {count}")
PDB entires without DNA chains : 4820

Visualize Structures

In [11]:
view_structure(structures.keys().collect())
Out[11]:
<function mmtfPyspark.structureViewer.view_structure.<locals>.view3d>

Terminate Spark

In [7]:
sc.stop()