{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Map To List Demo\n", "\n", "This example shows how to filter pdb proteins by X-Ray Diffraction, and store information (protein name, resolution, rFree, rWork) of the results in a list\n", "\n", "## Imports" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "from pyspark import SparkConf, SparkContext\n", "from mmtfPyspark.filters import ExperimentalMethods\n", "from mmtfPyspark.io import mmtfReader" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Configure Spark" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "conf = SparkConf().setMaster(\"local[*]\") \\\n", " .setAppName(\"MapToListDemo\")\n", "sc = SparkContext(conf = conf)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Read in MMTF Files and sample a small fraction" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "path = \"../../resources/mmtf_full_sample/\"\n", "fraction = 0.001\n", "seed = 123\n", "\n", "pdb = mmtfReader.read_sequence_file(path, sc, fraction = fraction, seed = seed)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Filter by X-Ray Diffraction experimental method" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "pdb = pdb.filter(ExperimentalMethods(ExperimentalMethods.X_RAY_DIFFRACTION))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Map results to a list of information, and print each list" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[['4DOI', 1.5499999523162842, 0.20280000567436218, 0.18170000612735748],\n", " ['2QNI', 1.7999999523162842, 0.22066999971866608, 0.19931000471115112],\n", " ['3RNQ', 1.600000023841858, 0.20679999887943268, 0.1826999932527542],\n", " ['2W6A', 1.399999976158142, 0.19900000095367432, 0.16099999845027924],\n", " ['4QKW', 1.7000000476837158, 0.24490000307559967, 0.21230000257492065],\n", " ['4CVO', 1.850000023841858, 0.25529998540878296, 0.23639999330043793],\n", " ['2V0Z', 2.200000047683716, 0.26600000262260437, 0.20499999821186066],\n", " ['1MNN', 1.399999976158142, 0.20635999739170074, 0.19483999907970428],\n", " ['3CLO', 2.0399999618530273, 0.21699999272823334, 0.18700000643730164],\n", " ['1GVP', 1.600000023841858, 0.2879999876022339, 0.20900000631809235]]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pdb.map(lambda t: [t[0], t[1].resolution, t[1].r_free, t[1].r_work]).collect()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Terminate Spark" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "sc.stop()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" } }, "nbformat": 4, "nbformat_minor": 2 }