{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Example of using mmtfPyspark to find water interactions\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Imports and variables" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "scrolled": true }, "outputs": [], "source": [ "from pyspark import SparkConf, SparkContext\n", "from pyspark.sql.functions import col\n", "from mmtfPyspark.io import mmtfReader\n", "from mmtfPyspark.interactions import InteractionFilter, GroupInteractionExtractor, ExcludedLigandSets\n", "from mmtfPyspark.filters import ContainsLProteinChain, Resolution\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "import py3Dmol\n", "import time\n", "\n", " \n", "# Create variables \n", "APP_NAME = \"MMTF_Spark\" \n", "path = \"../../resources/mmtf_full_sample/\"\n", "\n", "# Configure Spark \n", "conf = SparkConf().setAppName(APP_NAME).setMaster(\"local[*]\") \n", "sc = SparkContext(conf=conf) " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Define Variables" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# input parameters\n", "resolution = 2.0\n", "minInteractions = 2\n", "maxInteractions = 4\n", "distanceCutoff = 3.0\n", "bFactorCutoff = 1.645\n", "includeWaters = True" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Read PDB and filter by resolution and only include proteins" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "pdb = mmtfReader.read_sequence_file(path, sc)\n", "pdb = pdb.filter(Resolution(minResolution=0.0, maxResolution=2.0))\\\n", " .filter(ContainsLProteinChain(exclusive=True))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Setup criteria for metal interactions" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "interactions_filter = InteractionFilter()\n", "interactions_filter.set_distance_cutoff(3.0)\n", "interactions_filter.set_normalized_b_factor_cutoff(1.645)\n", "interactions_filter.set_min_interactions(2)\n", "interactions_filter.set_max_interactions(4)\n", "interactions_filter.set_query_groups(True, [\"HOH\"])\n", "interactions_filter.set_query_elements(True, \"O\") # Only use water oxygen\n", "interactions_filter.set_target_elements(True, [\"O\", \"N\", \"S\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Exclude \"uninteresting\" ligands " ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "prohibitedGroups = ExcludedLigandSets.ALL_GROUPS\n", "if not includeWaters:\n", " prohibitedGroups.add(\"HOH\")\n", "interactions_filter.set_prohibited_target_groups(prohibitedGroups)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Calculate interactions" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "data = GroupInteractionExtractor().get_interactions(structures=pdb, interactionFilter=interactions_filter)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Define Filter Bridging Water Interactions Function" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def filter_bridging_water_interactions(data, maxInteractions):\n", " if maxInteractions == 2:\n", " data = data.filter((col(\"type1\") == \"LGO\") | \\\n", " (col(\"type2\") == \"LGO\"))\n", " data = data.filter((col(\"type1\") == \"PRO\") | \\\n", " (col(\"type2\") == \"PRO\"))\n", " elif maxInteractions == 3:\n", " data = data.filter((col(\"type1\") == \"LGO\") | \\\n", " (col(\"type2\") == \"LGO\") | \\\n", " (col(\"type3\") == \"LGO\"))\n", " data = data.filter((col(\"type1\") == \"PRO\") | \\\n", " (col(\"type2\") == \"PRO\") | \\\n", " (col(\"type3\") == \"PRO\"))\n", " elif maxInteractions == 4:\n", " data = data.filter((col(\"type1\") == \"LGO\") | \\\n", " (col(\"type2\") == \"LGO\") | \\\n", " (col(\"type3\") == \"LGO\") | \\\n", " (col(\"type4\") == \"LGO\"))\n", " data = data.filter((col(\"type1\") == \"PRO\") | \\\n", " (col(\"type2\") == \"PRO\") | \\\n", " (col(\"type3\") == \"PRO\") | \\\n", " (col(\"type4\") == \"PRO\"))\n", " else:\n", " raise ValueError(\"maxInteractions > 4 are not supported yet\")\n", " return data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Keep only interactions with at least one organic ligand and one protein interaction" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Hits(all): 25050\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pdbIdpolyChainsq3q4q5q6atom0element0group0groupNum0...type4chain4nbFactor4distance4angle1-2angle1-3angle1-4angle2-3angle2-4angle3-4
05VNX20.917888NaNNoneNoneOOHOH802...NoneNoneNaN0.0000001.8082432.4428522.0320560.000000NaNNaN
15VNX20.923362NaNNoneNoneOOHOH720...NoneNoneNaN0.0000001.9995102.4447441.8377540.000000NaNNaN
25VNX10.9812360.859927NoneNoneOOHOH832...WATA0.1074152.8352262.2017671.8999491.9553271.8689611.9870201.355446
35VNX20.913232NaNNoneNoneOOHOH753...NoneNoneNaN0.0000002.4806311.9560031.8447490.000000NaNNaN
44X9D1NaNNaNNoneNoneOOHOH233...NoneNoneNaN0.0000001.821897NaNNaNNaNNaNNaN
55H9N10.997502NaNNoneNoneOOHOH341...NoneNoneNaN0.0000002.0368272.0992682.1439450.000000NaNNaN
65H9N10.694474NaNNoneNoneOOHOH349...NoneNoneNaN0.0000002.3972841.3923142.2677340.000000NaNNaN
75LCA10.920665NaNNoneNoneOOHOH621...NoneNoneNaN0.0000001.7369142.2965262.1031820.000000NaNNaN
85LCA30.9575950.839895NoneNoneOOHOH633...PROA-0.6775932.8746401.9897872.0610191.5973581.8190182.4301541.556288
95LCF30.9775550.801750NoneNoneOOHOH634...PROA-0.0908812.7928262.4657411.9898511.9713291.4507821.5662211.943373
105LCF10.916752NaNNoneNoneOOHOH665...NoneNoneNaN0.0000002.0789051.7597372.3734340.000000NaNNaN
115LCH30.9719020.788855NoneNoneOOHOH648...LGOA0.0794712.6832382.0092601.6089321.9111902.4788631.9541191.394372
125LE130.9337100.769073NoneNoneOOHOH649...PROA0.5972682.7438882.3968442.1976421.9763041.4120551.5523281.759869
135MMN30.8055790.840680NoneNoneOOHOH424...PROA-0.1913792.6600591.8351541.5479622.3376031.5301422.0670271.857040
145MMN20.857262NaNNoneNoneOOHOH473...NoneNoneNaN0.0000001.8924281.8908191.6447180.000000NaNNaN
155MMO30.7946420.836989NoneNoneOOHOH417...PROA-0.3859452.6282431.8411651.5381252.3141251.5298922.1018351.814811
165MMO20.817830NaNNoneNoneOOHOH462...NoneNoneNaN0.0000001.9190411.8232021.5902120.000000NaNNaN
175MMO1NaNNaNNoneNoneOOHOH472...NoneNoneNaN0.0000002.270356NaNNaNNaNNaNNaN
185N4910.923594NaNNoneNoneOOHOH810...NoneNoneNaN0.0000002.1501821.7165592.0099100.000000NaNNaN
195UEW10.911381NaNNoneNoneOOHOH611...NoneNoneNaN0.0000001.7170242.1029651.9052250.000000NaNNaN
205UEW10.904136NaNNoneNoneOOHOH602...NoneNoneNaN0.0000001.6910511.9444442.1791450.000000NaNNaN
215UEZ10.939357NaNNoneNoneOOHOH607...NoneNoneNaN0.0000001.9656401.7874362.2237770.000000NaNNaN
225UEZ10.895376NaNNoneNoneOOHOH642...NoneNoneNaN0.0000001.7167112.3917612.1653440.000000NaNNaN
233U9620.886328NaNNoneNoneOOHOH481...NoneNoneNaN0.0000002.1449691.8178511.7081720.000000NaNNaN
243U981NaNNaNNoneNoneOOHOH1043...NoneNoneNaN0.0000002.333080NaNNaNNaNNaNNaN
253U9H10.8503420.863505NoneNoneOOHOH226...PROA0.0321562.7682582.4311111.7149311.6551021.8117432.0354611.652409
263U9N10.966877NaNNoneNoneOOHOH435...NoneNoneNaN0.0000002.0945251.8391432.1567030.000000NaNNaN
273U9N1NaNNaNNoneNoneOOHOH498...NoneNoneNaN0.0000001.831271NaNNaNNaNNaNNaN
283UAG30.8036130.563889NoneNoneOOHOH501...PROA-0.5546932.7930611.1277102.6014181.7379041.5481082.3795341.935428
293UAG20.860623NaNNoneNoneOOHOH502...NoneNoneNaN0.0000002.2739101.6252901.9234010.000000NaNNaN
303UAG20.123106NaNNoneNoneOOHOH600...NoneNoneNaN0.0000001.3601622.4087051.0630540.000000NaNNaN
313UAG10.2035240.364127NoneNoneOOHOH837...LGOA-0.5621742.7778431.4625542.4368852.0618692.2592922.5348060.862076
323UAL1NaNNaNNoneNoneOOHOH258...NoneNoneNaN0.0000002.087371NaNNaNNaNNaNNaN
333UAL10.888170NaNNoneNoneOOHOH293...NoneNoneNaN0.0000002.1610191.6665362.3052440.000000NaNNaN
343UAZ10.921715NaNNoneNoneOOHOH262...NoneNoneNaN0.0000002.2906872.2210301.7547880.000000NaNNaN
353UB610.8991680.672460NoneNoneOOHOH407...PROA-0.4759942.8634471.3112761.7408302.4323272.2513881.4567392.101638
363UB610.9014120.670772NoneNoneOOHOH403...PROB-0.6538652.8739851.7415371.3118032.4215662.2628112.1096841.452782
373UB710.8965400.692493NoneNoneOOHOH407...PROA-0.4051712.8556741.3292611.7408922.4458192.2264691.4848742.083471
383UB710.8972680.685547NoneNoneOOHOH407...PROB-0.6104872.9027151.7398711.3302802.4403972.2332402.0925971.465206
393UBD1NaNNaNNoneNoneOOHOH530...NoneNoneNaN0.0000002.258912NaNNaNNaNNaNNaN
403UBW10.7136060.820527NoneNoneOOHOH307...WATA-0.6086212.5843182.0068311.4491881.8490122.0640861.6509562.424514
413UBW1NaNNaNNoneNoneOOHOH365...NoneNoneNaN0.0000002.160672NaNNaNNaNNaNNaN
423UD51NaNNaNNoneNoneOOHOH242...NoneNoneNaN0.0000001.420573NaNNaNNaNNaNNaN
433UDE1NaNNaNNoneNoneOOHOH221...NoneNoneNaN0.0000001.464474NaNNaNNaNNaNNaN
443UDH1NaNNaNNoneNoneOOHOH843...NoneNoneNaN0.0000002.213616NaNNaNNaNNaNNaN
453UDP1NaNNaNNoneNoneOOHOH821...NoneNoneNaN0.0000002.270605NaNNaNNaNNaNNaN
463UDR1NaNNaNNoneNoneOOHOH684...NoneNoneNaN0.0000002.133965NaNNaNNaNNaNNaN
473UDV1NaNNaNNoneNoneOOHOH263...NoneNoneNaN0.0000002.302813NaNNaNNaNNaNNaN
483UDY1NaNNaNNoneNoneOOHOH654...NoneNoneNaN0.0000001.924380NaNNaNNaNNaNNaN
493UEQ20.860294NaNNoneNoneOOHOH808...NoneNoneNaN0.0000001.6648702.4684952.0811990.000000NaNNaN
\n", "

50 rows × 51 columns

\n", "
" ], "text/plain": [ " pdbId polyChains q3 q4 q5 q6 atom0 element0 group0 \\\n", "0 5VNX 2 0.917888 NaN None None O O HOH \n", "1 5VNX 2 0.923362 NaN None None O O HOH \n", "2 5VNX 1 0.981236 0.859927 None None O O HOH \n", "3 5VNX 2 0.913232 NaN None None O O HOH \n", "4 4X9D 1 NaN NaN None None O O HOH \n", "5 5H9N 1 0.997502 NaN None None O O HOH \n", "6 5H9N 1 0.694474 NaN None None O O HOH \n", "7 5LCA 1 0.920665 NaN None None O O HOH \n", "8 5LCA 3 0.957595 0.839895 None None O O HOH \n", "9 5LCF 3 0.977555 0.801750 None None O O HOH \n", "10 5LCF 1 0.916752 NaN None None O O HOH \n", "11 5LCH 3 0.971902 0.788855 None None O O HOH \n", "12 5LE1 3 0.933710 0.769073 None None O O HOH \n", "13 5MMN 3 0.805579 0.840680 None None O O HOH \n", "14 5MMN 2 0.857262 NaN None None O O HOH \n", "15 5MMO 3 0.794642 0.836989 None None O O HOH \n", "16 5MMO 2 0.817830 NaN None None O O HOH \n", "17 5MMO 1 NaN NaN None None O O HOH \n", "18 5N49 1 0.923594 NaN None None O O HOH \n", "19 5UEW 1 0.911381 NaN None None O O HOH \n", "20 5UEW 1 0.904136 NaN None None O O HOH \n", "21 5UEZ 1 0.939357 NaN None None O O HOH \n", "22 5UEZ 1 0.895376 NaN None None O O HOH \n", "23 3U96 2 0.886328 NaN None None O O HOH \n", "24 3U98 1 NaN NaN None None O O HOH \n", "25 3U9H 1 0.850342 0.863505 None None O O HOH \n", "26 3U9N 1 0.966877 NaN None None O O HOH \n", "27 3U9N 1 NaN NaN None None O O HOH \n", "28 3UAG 3 0.803613 0.563889 None None O O HOH \n", "29 3UAG 2 0.860623 NaN None None O O HOH \n", "30 3UAG 2 0.123106 NaN None None O O HOH \n", "31 3UAG 1 0.203524 0.364127 None None O O HOH \n", "32 3UAL 1 NaN NaN None None O O HOH \n", "33 3UAL 1 0.888170 NaN None None O O HOH \n", "34 3UAZ 1 0.921715 NaN None None O O HOH \n", "35 3UB6 1 0.899168 0.672460 None None O O HOH \n", "36 3UB6 1 0.901412 0.670772 None None O O HOH \n", "37 3UB7 1 0.896540 0.692493 None None O O HOH \n", "38 3UB7 1 0.897268 0.685547 None None O O HOH \n", "39 3UBD 1 NaN NaN None None O O HOH \n", "40 3UBW 1 0.713606 0.820527 None None O O HOH \n", "41 3UBW 1 NaN NaN None None O O HOH \n", "42 3UD5 1 NaN NaN None None O O HOH \n", "43 3UDE 1 NaN NaN None None O O HOH \n", "44 3UDH 1 NaN NaN None None O O HOH \n", "45 3UDP 1 NaN NaN None None O O HOH \n", "46 3UDR 1 NaN NaN None None O O HOH \n", "47 3UDV 1 NaN NaN None None O O HOH \n", "48 3UDY 1 NaN NaN None None O O HOH \n", "49 3UEQ 2 0.860294 NaN None None O O HOH \n", "\n", " groupNum0 ... type4 chain4 nbFactor4 distance4 angle1-2 angle1-3 \\\n", "0 802 ... None None NaN 0.000000 1.808243 2.442852 \n", "1 720 ... None None NaN 0.000000 1.999510 2.444744 \n", "2 832 ... WAT A 0.107415 2.835226 2.201767 1.899949 \n", "3 753 ... None None NaN 0.000000 2.480631 1.956003 \n", "4 233 ... None None NaN 0.000000 1.821897 NaN \n", "5 341 ... None None NaN 0.000000 2.036827 2.099268 \n", "6 349 ... None None NaN 0.000000 2.397284 1.392314 \n", "7 621 ... None None NaN 0.000000 1.736914 2.296526 \n", "8 633 ... PRO A -0.677593 2.874640 1.989787 2.061019 \n", "9 634 ... PRO A -0.090881 2.792826 2.465741 1.989851 \n", "10 665 ... None None NaN 0.000000 2.078905 1.759737 \n", "11 648 ... LGO A 0.079471 2.683238 2.009260 1.608932 \n", "12 649 ... PRO A 0.597268 2.743888 2.396844 2.197642 \n", "13 424 ... PRO A -0.191379 2.660059 1.835154 1.547962 \n", "14 473 ... None None NaN 0.000000 1.892428 1.890819 \n", "15 417 ... PRO A -0.385945 2.628243 1.841165 1.538125 \n", "16 462 ... None None NaN 0.000000 1.919041 1.823202 \n", "17 472 ... None None NaN 0.000000 2.270356 NaN \n", "18 810 ... None None NaN 0.000000 2.150182 1.716559 \n", "19 611 ... None None NaN 0.000000 1.717024 2.102965 \n", "20 602 ... None None NaN 0.000000 1.691051 1.944444 \n", "21 607 ... None None NaN 0.000000 1.965640 1.787436 \n", "22 642 ... None None NaN 0.000000 1.716711 2.391761 \n", "23 481 ... None None NaN 0.000000 2.144969 1.817851 \n", "24 1043 ... None None NaN 0.000000 2.333080 NaN \n", "25 226 ... PRO A 0.032156 2.768258 2.431111 1.714931 \n", "26 435 ... None None NaN 0.000000 2.094525 1.839143 \n", "27 498 ... None None NaN 0.000000 1.831271 NaN \n", "28 501 ... PRO A -0.554693 2.793061 1.127710 2.601418 \n", "29 502 ... None None NaN 0.000000 2.273910 1.625290 \n", "30 600 ... None None NaN 0.000000 1.360162 2.408705 \n", "31 837 ... LGO A -0.562174 2.777843 1.462554 2.436885 \n", "32 258 ... None None NaN 0.000000 2.087371 NaN \n", "33 293 ... None None NaN 0.000000 2.161019 1.666536 \n", "34 262 ... None None NaN 0.000000 2.290687 2.221030 \n", "35 407 ... PRO A -0.475994 2.863447 1.311276 1.740830 \n", "36 403 ... PRO B -0.653865 2.873985 1.741537 1.311803 \n", "37 407 ... PRO A -0.405171 2.855674 1.329261 1.740892 \n", "38 407 ... PRO B -0.610487 2.902715 1.739871 1.330280 \n", "39 530 ... None None NaN 0.000000 2.258912 NaN \n", "40 307 ... WAT A -0.608621 2.584318 2.006831 1.449188 \n", "41 365 ... None None NaN 0.000000 2.160672 NaN \n", "42 242 ... None None NaN 0.000000 1.420573 NaN \n", "43 221 ... None None NaN 0.000000 1.464474 NaN \n", "44 843 ... None None NaN 0.000000 2.213616 NaN \n", "45 821 ... None None NaN 0.000000 2.270605 NaN \n", "46 684 ... None None NaN 0.000000 2.133965 NaN \n", "47 263 ... None None NaN 0.000000 2.302813 NaN \n", "48 654 ... None None NaN 0.000000 1.924380 NaN \n", "49 808 ... None None NaN 0.000000 1.664870 2.468495 \n", "\n", " angle1-4 angle2-3 angle2-4 angle3-4 \n", "0 2.032056 0.000000 NaN NaN \n", "1 1.837754 0.000000 NaN NaN \n", "2 1.955327 1.868961 1.987020 1.355446 \n", "3 1.844749 0.000000 NaN NaN \n", "4 NaN NaN NaN NaN \n", "5 2.143945 0.000000 NaN NaN \n", "6 2.267734 0.000000 NaN NaN \n", "7 2.103182 0.000000 NaN NaN \n", "8 1.597358 1.819018 2.430154 1.556288 \n", "9 1.971329 1.450782 1.566221 1.943373 \n", "10 2.373434 0.000000 NaN NaN \n", "11 1.911190 2.478863 1.954119 1.394372 \n", "12 1.976304 1.412055 1.552328 1.759869 \n", "13 2.337603 1.530142 2.067027 1.857040 \n", "14 1.644718 0.000000 NaN NaN \n", "15 2.314125 1.529892 2.101835 1.814811 \n", "16 1.590212 0.000000 NaN NaN \n", "17 NaN NaN NaN NaN \n", "18 2.009910 0.000000 NaN NaN \n", "19 1.905225 0.000000 NaN NaN \n", "20 2.179145 0.000000 NaN NaN \n", "21 2.223777 0.000000 NaN NaN \n", "22 2.165344 0.000000 NaN NaN \n", "23 1.708172 0.000000 NaN NaN \n", "24 NaN NaN NaN NaN \n", "25 1.655102 1.811743 2.035461 1.652409 \n", "26 2.156703 0.000000 NaN NaN \n", "27 NaN NaN NaN NaN \n", "28 1.737904 1.548108 2.379534 1.935428 \n", "29 1.923401 0.000000 NaN NaN \n", "30 1.063054 0.000000 NaN NaN \n", "31 2.061869 2.259292 2.534806 0.862076 \n", "32 NaN NaN NaN NaN \n", "33 2.305244 0.000000 NaN NaN \n", "34 1.754788 0.000000 NaN NaN \n", "35 2.432327 2.251388 1.456739 2.101638 \n", "36 2.421566 2.262811 2.109684 1.452782 \n", "37 2.445819 2.226469 1.484874 2.083471 \n", "38 2.440397 2.233240 2.092597 1.465206 \n", "39 NaN NaN NaN NaN \n", "40 1.849012 2.064086 1.650956 2.424514 \n", "41 NaN NaN NaN NaN \n", "42 NaN NaN NaN NaN \n", "43 NaN NaN NaN NaN \n", "44 NaN NaN NaN NaN \n", "45 NaN NaN NaN NaN \n", "46 NaN NaN NaN NaN \n", "47 NaN NaN NaN NaN \n", "48 NaN NaN NaN NaN \n", "49 2.081199 0.000000 NaN NaN \n", "\n", "[50 rows x 51 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = filter_bridging_water_interactions(data, maxInteractions=4).cache()\n", "\n", "print(f\"Hits(all): {data.count()}\")\n", "data = data.toPandas()\n", "data.head(50)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Terminate Spark" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sc.stop()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" } }, "nbformat": 4, "nbformat_minor": 2 }