Example of using PySpark to find ligand interaction fingerprint

Demo how to calculate ligand-polymer interaction data and maps it to polymer chains.

Imports and variables

In [2]:
from pyspark import SparkConf, SparkContext
from mmtfPyspark.io import mmtfReader
from mmtfPyspark.interactions import InteractionFilter, InteractionFingerprinter

# Create variables
APP_NAME = "MMTF_Spark"

# Configure Spark
conf = SparkConf().setAppName(APP_NAME).setMaster("local[*]")
sc = SparkContext(conf=conf)

Download 1OHR structure

In [2]:
pdb = mmtfReader.download_mmtf_files(['1OHR'], sc)

Find interactions of small molecules (except water)

In [3]:
interactionFilter = InteractionFilter()
interactionFilter.set_distance_cutoff(4.0)
interactionFilter.set_query_groups(False, "HOH") # ignore water interactions

interactions = InteractionFingerprinter.get_ligand_polymer_interactions(pdb, interactionFilter)
interactions.toPandas().head(10)
Out[3]:
structureChainId queryLigandId queryLigandNumber queryLigandChainId targetChainId groupNumbers sequenceIndices sequence interactingChains
0 1OHR.A 1UN 201 A A [25, 27, 28, 29, 30, 49, 50, 80, 81, 82, 84] [24, 26, 27, 28, 29, 48, 49, 79, 80, 81, 83] PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKM... 2
1 1OHR.B 1UN 201 A B [8, 23, 25, 27, 28, 29, 30, 32, 47, 48, 49, 50... [7, 22, 24, 26, 27, 28, 29, 31, 46, 47, 48, 49... PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKM... 2

Terminate Spark

In [3]:
sc.stop()
In [ ]: