In [1]:
from pyspark import SparkConf, SparkContext
from mmtfPyspark.datasets import groupInteractionExtractor
from mmtfPyspark.io import mmtfReader
from mmtfPyspark.webfilters import Pisces
In [2]:
conf = SparkConf().setMaster("local[*]") \
.setAppName("simpleZincInteractionDemo")
sc = SparkContext(conf = conf)
In [3]:
path = "../../resources/mmtf_full_sample/"
pdb = mmtfReader.read_sequence_file(path, sc)
In [4]:
seqId = 40
resolution = 2.0
pdb = pdb.filter(Pisces(seqId, resolution))
In [5]:
finder = groupInteractionExtractor("ZN",3)
interactions = finder.get_dataset(pdb).cache()
In [6]:
interactions.printSchema()
interactions.show(20)
print(f"Number of interactions: {interactions.count()}")
root
|-- structureId: string (nullable = false)
|-- residue1: string (nullable = false)
|-- atom1: string (nullable = false)
|-- element1: string (nullable = false)
|-- index1: integer (nullable = false)
|-- residue2: string (nullable = false)
|-- atom2: string (nullable = false)
|-- element2: string (nullable = false)
|-- index2: integer (nullable = false)
|-- distance: float (nullable = false)
+-----------+--------+-----+--------+------+--------+-----+--------+------+---------+
|structureId|residue1|atom1|element1|index1|residue2|atom2|element2|index2| distance|
+-----------+--------+-----+--------+------+--------+-----+--------+------+---------+
| 1FN9| ZN| ZN| Zn| 730| CYS| SG| S| 50|2.3709755|
| 1FN9| ZN| ZN| Zn| 730| CYS| SG| S| 53|2.3940797|
| 1FN9| ZN| ZN| Zn| 730| HIS| NE2| N| 70|2.2196307|
| 1FN9| ZN| ZN| Zn| 730| CYS| SG| S| 72|2.3465357|
| 1FN9| ZN| ZN| Zn| 731| CYS| SG| S| 415|2.3747551|
| 1FN9| ZN| ZN| Zn| 731| CYS| SG| S| 418|2.3680198|
| 1FN9| ZN| ZN| Zn| 731| HIS| NE2| N| 435|2.1647959|
| 1FN9| ZN| ZN| Zn| 731| CYS| SG| S| 437|2.3763454|
| 1E4M| ZN| ZN| Zn| 519| HIS| CE1| C| 53|2.9807622|
| 1E4M| ZN| ZN| Zn| 519| HIS| NE2| N| 53| 2.040789|
| 1E4M| ZN| ZN| Zn| 519| ASP| CG| C| 67| 2.754825|
| 1E4M| ZN| ZN| Zn| 519| ASP| OD1| O| 67|2.8967845|
| 1E4M| ZN| ZN| Zn| 519| ASP| OD2| O| 67|1.9672809|
| 1BF6| ZN| ZN| Zn| 582| HIS| NE2| N| 10|2.2776458|
| 1BF6| ZN| ZN| Zn| 582| HIS| NE2| N| 12|2.1644206|
| 1BF6| ZN| ZN| Zn| 582| GLU| OE2| O| 123|2.3778422|
| 1BF6| ZN| ZN| Zn| 582| ASP| OD1| O| 241| 2.41581|
| 1BF6| ZN| ZN| Zn| 583| GLU| CD| C| 123|2.7811828|
| 1BF6| ZN| ZN| Zn| 583| GLU| OE1| O| 123|2.1997967|
| 1BF6| ZN| ZN| Zn| 583| HIS| ND1| N| 156|2.2733805|
+-----------+--------+-----+--------+------+--------+-----+--------+------+---------+
only showing top 20 rows
Number of interactions: 238
In [7]:
interactions.groupBy("residue2") \
.count() \
.sort("count", ascending = False) \
.show(10)
+--------+-----+
|residue2|count|
+--------+-----+
| HIS| 76|
| CYS| 43|
| HOH| 37|
| GLU| 34|
| ASP| 27|
| ACT| 12|
| TRP| 4|
| LYS| 2|
| VAL| 2|
| CL| 1|
+--------+-----+
In [8]:
sc.stop()