Secondary Structure Shifted Word2Vec Encoder

This demo creates a dataset of sequence segments derived from a non-redundent set. The dataset contains the seuqence segment, the DSSP Q8 and DSSP Q3 code of the center residue in a sequnece segment, and a 3-gram shifted Word2Vec encoding of the seuqnece segment.

Imports

In [1]:
from pyspark import SparkConf, SparkContext, SQLContext
from mmtfPyspark.ml import ProteinSequenceEncoder
from mmtfPyspark.mappers import StructureToPolymerChains
from mmtfPyspark.filters import ContainsLProteinChain
from mmtfPyspark.datasets import secondaryStructureSegmentExtractor
from mmtfPyspark.webfilters import Pisces
from mmtfPyspark.io import mmtfReader
import time

Configure Spark Context

In [2]:
conf = SparkConf() \
        .setMaster("local[*]") \
        .setAppName("secondaryStructureShiftedWord2VecEncodeDemo")
sc = SparkContext(conf = conf)

Read in, filter and sample Hadoop Sequence Files

In [3]:
path = "../../resources/mmtf_reduced_sample/"

sequenceIdentity = 20
resolution = 2.0
fraction = 0.1
seed = 123

pdb = mmtfReader \
        .read_sequence_file(path, sc) \
        .flatMap(StructureToPolymerChains()) \
        .filter(Pisces(sequenceIdentity, resolution)) \
        .filter(ContainsLProteinChain()) \
        .sample(False, fraction, seed)

Extract Secondary Structure Segments

In [4]:
segmentLength = 25
data = secondaryStructureSegmentExtractor.get_dataset(pdb, segmentLength).cache()

Add Word2Vec encoded feature vector

In [6]:
encoder = ProteinSequenceEncoder(data)

windowSize = (segmentLength -1) // 2
vectorSize = 50
# overlapping_ngram_word2vec_encode uses keyword attributes
data = encoder.shifted_3gram_word2vec_encode(windowSize=windowSize, vectorSize=vectorSize).cache()
root
 |-- ngram0: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- ngram1: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- ngram2: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- structureChainId: string (nullable = false)
 |-- sequence: string (nullable = false)
 |-- labelQ8: string (nullable = false)
 |-- labelQ3: string (nullable = false)
 |-- feature2: vector (nullable = true)
 |-- feature1: vector (nullable = true)
 |-- feature0: vector (nullable = true)
 |-- features: vector (nullable = true)

Show dataset schema and few rows of data

In [7]:
data.printSchema()
data.show(10, False)
root
 |-- structureChainId: string (nullable = false)
 |-- sequence: string (nullable = false)
 |-- labelQ8: string (nullable = false)
 |-- labelQ3: string (nullable = false)
 |-- ngram0: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- ngram1: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- ngram2: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- feature0: vector (nullable = true)
 |-- feature1: vector (nullable = true)
 |-- feature2: vector (nullable = true)
 |-- features: vector (nullable = true)

+----------------+-------------------------+-------+-------+----------------------------------------+----------------------------------------+---------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|structureChainId|sequence                 |labelQ8|labelQ3|ngram0                                  |ngram1                                  |ngram2                                 |feature0                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |feature1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |feature2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |features                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+----------------+-------------------------+-------+-------+----------------------------------------+----------------------------------------+---------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|1B8D.K          |ACFAKYSYLKNAGEAGDSPEKINKC|T      |C      |[ACF, AKY, SYL, KNA, GEA, GDS, PEK, INK]|[CFA, KYS, YLK, NAG, EAG, DSP, EKI, NKC]|[FAK, YSY, LKN, AGE, AGD, SPE, KIN, KC]|[-2.5167602449655533,0.41110057570040226,-0.9742291159927845,-2.8470662012696266,0.9648916684091091,1.0285223391838372,0.001150625292211771,1.2885766252875328,-0.09930689074099064,1.407139576971531,-0.3486442621797323,-0.7173879102338105,-1.1011961586773396,-1.1487109400331974,0.5706137120723724,1.575491147581488,0.8980174884200096,1.4153222925961018,-3.7715277671813965,3.2217957973480225,1.4599277088418603,-0.0911189615726471,3.4717509746551514,0.8062625145539641,-0.33254433795809746,-0.11376025527715683,0.13917105086147785,0.04604405933059752,-1.8407416604459286,-0.9247598350048065,2.1728615909814835,-0.39766245940700173,-2.104358658194542,0.2887059743516147,0.9250973165035248,1.42127257399261,2.1061294823884964,1.0034770676866174,-1.0981312915682793,-0.26306624710559845,2.402161940932274,0.973731255158782,-0.3186478989664465,-1.4835131987929344,-3.4183479249477386,1.5926773101091385,0.6395504199899733,-1.0964065743610263,-0.35471800714731216,1.0176744535565376]                |[-0.9313944121822715,0.8716795854270458,-2.840606167912483,-0.10387025400996208,1.6138809770345688,0.15769858483690768,1.2219947800040245,0.5822487422265112,0.5221223346889019,-0.38784077391028404,-1.6556438207626343,1.4649942889809608,0.6820312328636646,-1.1681912392377853,1.2649499271064997,0.827448139898479,0.6651678285561502,-0.8827157318592072,0.005972708109766245,-0.036749313585460186,-0.40281209722161293,0.8575221933424473,1.0337270461022854,-0.4540816433727741,-0.4002405386418104,4.086934179067612,-1.7742436937987804,-0.4067212697118521,-1.5438722493126988,-2.3720456808805466,1.4653990715742111,-0.5951968412846327,-3.604078695178032,-0.6127490177750587,-0.0032192599028348923,-1.4219077564775944,2.6555567365139723,1.3034075051546097,0.6668211445212364,-0.7932155653834343,-0.45656544901430607,-1.6704068705439568,1.5382748022675514,-0.0953745199367404,0.11393586546182632,2.519601780921221,-1.8591778986155987,1.0615660399198532,1.005025876685977,1.7040398325771093]|[-0.3790398994460702,2.511866755783558,-3.5664734542369843,-1.2993557080626488,2.9532619267702103,1.0406750477850437,2.015357533469796,-0.8009799160063267,-0.6751335377339274,-0.3021529670804739,-0.8251077253371477,-1.3513113409280777,-0.2959672808647156,-1.339887797832489,1.3141291439533234,0.4225901812314987,0.11309268325567245,1.6759792119264603,-2.9049714356660843,1.2852151952683926,1.8867864608764648,-1.715254195034504,3.6069695949554443,2.0801507234573364,0.7089274059981108,1.8859793469309807,1.045213669538498,-0.6357348766177893,-2.355926785618067,-0.15536213107407093,2.74931438267231,1.5573025941848755,-1.3923211228102446,-1.414733212441206,1.5341008454561234,0.23730647563934326,-1.3458229769021273,3.168752908706665,-1.861640028655529,-0.9484710758551955,1.198159808292985,1.7480188980698586,1.1749723590910435,-0.4304179302416742,-1.3185899294912815,0.6060928218066692,0.2999028731137514,-1.1897292463108897,-1.4133381769061089,0.48606223426759243]                      |[-1.2757315188646317,1.264882305637002,-2.460436246047417,-1.4167640544474125,1.844011524071296,0.7422986572685962,1.0795009795886774,0.3566151505025725,-0.08410603126200537,0.23904861199359098,-0.9431319360931715,-0.20123498739364246,-0.23837740222613016,-1.2189299923678238,1.0498975943773985,0.9418431562371552,0.5587593334106108,0.7361952575544516,-2.223508831579238,1.4900872263436515,0.9813006908322374,-0.3162836544215679,2.704149205237627,0.8107771982128421,-0.007952490200599035,1.9530510902404785,-0.19661965779960155,-0.3321373623330146,-1.9135135651255648,-1.1507225489864747,2.129191681742668,0.18814776449774703,-2.3669194920609393,-0.5795920852882167,0.8186596340189377,0.07889043105145295,1.1386210806667805,1.8252124938492973,-0.7643167252341906,-0.6682509627814094,1.0479187667369843,0.3504477608948946,0.7981997541307161,-0.6697685496571163,-1.541000662992398,1.5727906376123428,-0.306574868503958,-0.4081899269173543,-0.25434343578914803,1.0692588401337464]                           |
|1AOL.A          |CNTAWNRLKLDQVTHKSSEGFYVCP|H      |H      |[CNT, AWN, RLK, LDQ, VTH, KSS, EGF, YVC]|[NTA, WNR, LKL, DQV, THK, SSE, GFY, VCP]|[TAW, NRL, KLD, QVT, HKS, SEG, FYV, CP]|[-0.8749945722520351,-2.3850620687007904,-0.9591096360236406,1.9488045200705528,-0.16176823899149895,-2.361344341188669,1.5523064099252224,1.7677657306194305,2.5747810751199722,2.2461508214473724,1.360827692784369,-1.119172306265682,1.403134043328464,1.2144876532256603,-1.184457833878696,-1.2881279587745667,-1.9241969138383865,-1.7829678133130074,-1.3400870864279568,2.0928035527467728,3.193344622850418,-1.7700832376722246,-3.000414788722992,1.2379086762666702,0.6209115190431476,1.7831069082021713,0.9801567485556006,0.6748019102960825,1.1705026775598526,-1.39455908536911,-3.7340111434459686,-1.6399068981409073,1.002204678952694,1.3039009934291244,-0.8630187809467316,0.7595242718234658,-0.6134175658226013,-0.6766404123045504,1.9464991856366396,-0.24298349767923355,-0.26678529288619757,-0.10621064295992255,-0.1937673930078745,2.2555521205067635,-1.7164401039481163,2.0487791039049625,-0.31262913905084133,1.231461577117443,-0.07170518022030592,0.4120531603693962]                      |[-1.5751862302422523,2.3965483605861664,-1.9082001000642776,0.7708999700844288,1.6688142605125904,-0.2091626077890396,2.9943602681159973,1.1393068917095661,0.44232150726020336,-1.5763466972857714,0.5430389028042555,-3.0842064321041107,1.216230109333992,1.3201781548559666,-0.7373930085450411,0.6800840348005295,3.181833803653717,1.881282925605774,-1.6091054677963257,1.3585318215191364,1.134154349565506,-2.8635005056858063,3.0916261076927185,0.3162462189793587,-1.375136412680149,0.4933837908320129,0.6565325153060257,0.8242096453905106,-0.3088548630475998,0.06764748692512512,-0.7074224315583706,2.2043756283819675,1.7672868445515633,-0.4557509422302246,0.8224408300593495,-0.3385403286665678,-1.1575689287856221,1.19766366481781,-2.7077700197696686,2.279412090778351,0.7209427170455456,1.2737916007172316,0.4409019276499748,-2.228353038430214,-0.7511747404932976,-2.1272102743387222,2.066150538623333,-0.1671935305930674,2.10979013890028,0.3388356864452362]                       |[-0.9588506408035755,-0.6263859607279301,3.711090862751007,0.35937364399433136,0.6883916184306145,-2.0693296045064926,-0.10366672649979591,-0.0980080240406096,0.10416990146040916,0.21229935251176357,0.5808655018918216,1.2410561293363571,-1.613322782330215,-1.5077514415606856,1.0391174778342247,2.272442638874054,1.4221012579509988,1.598120205104351,0.24927867949008942,0.28526682406663895,-2.9150954633951187,0.923704132437706,-0.873876579105854,-2.2037372142076492,0.18408258771523833,0.4218266196548939,-2.9863907396793365,0.7966075614094734,0.27925676479935646,1.2721479684114456,-0.17140691727399826,-1.0450899302959442,-1.9417413622140884,0.2902057711035013,-1.7802518904209137,0.7011928996071219,2.429913818836212,-2.00313681922853,-1.7666854783892632,-1.146609790623188,1.8424944058060646,0.9883973971009254,1.1385902725160122,-0.0074856821447610855,0.8175747208297253,-2.2164763063192368,0.41440874710679054,0.39456793665885925,3.6405158042907715,0.6161672435700893]              |[-1.136343814432621,-0.20496655628085136,0.28126037555436295,1.026359378049771,0.7318125466505686,-1.5466121844947338,1.4809999838471413,0.9363548660961291,1.040424161280195,0.2940344922244549,0.828244032493482,-0.9874408696778119,0.33534712344408035,0.34230478884031373,-0.2942444548631708,0.5547995716333389,0.8932460492554432,0.5654784391323725,-0.8999712915780643,1.2455340661108494,0.4708011696736018,-1.2366265369734417,-0.2608884200453758,-0.21652743965387344,-0.1900474353072544,0.8994391062296927,-0.4499004919392367,0.7652063723653555,0.3803015264372031,-0.01825454334417979,-1.5376134974261124,-0.16020706668496132,0.2759167204300563,0.379451940767467,-0.6069432804360986,0.3740589475880067,0.21964244140932956,-0.4940378555717568,-0.8426521041740974,0.29660626749197644,0.7655506099884709,0.7186594516194115,0.46190826905270416,0.0065711333105961485,-0.5500133745372295,-0.7649691589176655,0.7226433822264274,0.4862786610610783,1.8928669209902484,0.4556853634615739]                          |
|1FO8.A          |DPSLWCVSAWNDNGKEQMVDSSKPE|T      |C      |[DPS, LWC, VSA, WND, NGK, EQM, VDS, SKP]|[PSL, WCV, SAW, NDN, GKE, QMV, DSS, KPE]|[SLW, CVS, AWN, DNG, KEQ, MVD, SSK, PE]|[0.11070175841450691,-1.486862190067768,-1.5935404784977436,1.5209185928106308,1.21102574467659,-0.24003497511148453,1.4447756111621857,-0.7691714763641357,1.1232565288082696,-0.7248301580548286,-0.4735450241714716,-2.004910096526146,1.7610824033617973,0.7160377092659473,0.08553061820566654,0.0470068184658885,0.07292179111391306,-0.18871041759848595,1.3982071951031685,0.48172979801893234,0.8981019053608179,-2.4158772230148315,0.9278556052595377,-0.03243331424891949,-0.44000305444933474,-0.025180071592330933,3.0448215156793594,-0.1391153922304511,0.381427600979805,-2.953428566455841,-1.73341104388237,1.8045079857110977,2.6773067116737366,0.05714104883372784,1.5919539518654346,-0.8060395112261176,-2.1258633583784103,1.279235191643238,0.7501037400215864,-0.1689485206734389,-0.5209691589698195,-1.3916213102638721,1.2358281426131725,-1.078914562240243,0.08453352469950914,-1.213794432580471,0.46890437975525856,0.37695081159472466,-0.16148049384355545,-2.245404824614525]                |[1.1518815904855728,-0.13194116204977036,3.5109526216983795,0.07906617689877748,-1.8725860863924026,-0.04389195144176483,0.5427215844392776,-0.1332920677959919,-0.9055205285549164,1.7623174972832203,1.5601475834846497,-0.5169037152081728,-2.404917135834694,-0.10797023586928844,0.7702396102249622,-1.1878940872848034,-2.200541988015175,1.7192873433232307,1.4355204533785582,1.9266403764486313,1.4601783603429794,1.5219778884202242,-0.4749095290899277,-0.07191818160936236,2.0843387246131897,-2.9780492782592773,0.35485232900828123,-2.0553689748048782,-0.7884012069553137,1.4135253354907036,-0.8520875480026007,0.8578666374087334,2.2140540778636932,3.0365646183490753,-1.8036518655717373,1.8222705200314522,-1.4469136893749237,-0.587627999484539,0.8960399408824742,0.7449759766459465,-0.9975911248475313,2.0715517699718475,-0.2663208171725273,0.4602264421992004,0.8810718636959791,-0.8022859990596771,2.4971166253089905,0.32739410921931267,0.7479538060724735,1.4046440720558167]      |[-0.0534269493073225,-1.546366237103939,-0.8318886794149876,3.019452378153801,-0.7034096904098988,-1.766436755657196,-1.3649640679359436,2.2501664757728577,0.6676182607188821,0.8691947683691978,1.2327622210141271,0.6872831918299198,0.5752576435916126,0.46088949777185917,-0.36731327418237925,-0.30627243826165795,-0.4867006912827492,-0.04493131674826145,-0.047522529028356075,-0.5728800892829895,1.2162565551698208,-0.7649948559701443,0.587266406044364,-1.5402372106909752,0.45861928444355726,0.0656986478716135,-2.167999416589737,1.0150048211216927,2.2239068150520325,0.012581955641508102,-0.5331162856891751,-1.4638197124004364,1.8225427120923996,0.24284160695970058,-1.7261623367667198,-0.6465383381582797,-0.007603004574775696,-0.18907177820801735,0.06262210384011269,-0.14512521587312222,0.43823713436722755,-0.8162072044797242,-0.3526321332901716,0.20940214861184359,0.2001667134463787,-0.5528057422488928,0.8689652998000383,-0.04899022914469242,1.50699966493994,0.866228424012661]  |[0.40305213319758576,-1.0550565297404926,0.3618411545952161,1.5398123826210697,-0.4549900107085705,-0.6834545607368151,0.20751104255517325,0.4492343105375767,0.2951180869907451,0.6355607025325298,0.773121593442435,-0.6115102066347996,-0.022859029627094667,0.35631899038950604,0.1628189847494165,-0.4823865690268576,-0.871440296061337,0.4952152029921611,0.9287350398177902,0.611830028394858,1.1915122736245394,-0.5529647301882505,0.34673749407132465,-0.548196235516419,0.7009849848691374,-0.9791769006599983,0.41055814269930124,-0.3931598486378789,0.6056444030255079,-0.5091070917745432,-1.0395382925247152,0.39951830357313156,2.23796783387661,1.1121824247141678,-0.6459534168243408,0.12323089021568497,-1.1934600174427032,0.16751180465022722,0.5695885949147245,0.14363408003312847,-0.3601077164833744,-0.04542558159058293,0.20562506405015787,-0.13642865714306632,0.3885907006139557,-0.856295391296347,1.2783287682880957,0.21845156388978162,0.6978243257229527,0.008489223817984263]                        |
|1FSG.C          |GCCYDFNEMFRDFDHVAVLSDAARK|C      |C      |[GCC, YDF, NEM, FRD, FDH, VAV, LSD, AAR]|[CCY, DFN, EMF, RDF, DHV, AVL, SDA, ARK]|[CYD, FNE, MFR, DFD, HVA, VLS, DAA, RK]|[2.2771085798740387,-1.5183069882914424,-0.5159789621829987,0.4578652177006006,1.2164042368531227,-0.3421694249846041,0.7532360162585974,-1.7057997956871986,-0.029285148717463017,-1.970352802425623,1.1907497867941856,-0.30514839943498373,1.7557942867279053,0.32348300609737635,0.3768870458006859,-2.408259764313698,-0.6834471076726913,-0.7562738675624132,2.0755004063248634,-1.3693122267723083,-0.2666115101892501,-1.3402757085859776,-1.0694497264921665,0.8683559447526932,0.6738018412142992,-0.30020878184586763,2.471928521990776,-0.492443460971117,1.2775334641337395,0.5902089327573776,-0.07173505565151572,0.8327170107513666,1.4225731622427702,-1.6535230362787843,1.4797164499759674,0.5339679643511772,-1.2272072061896324,1.0035167261958122,-0.27838711021468043,-0.847806986887008,-1.8506232500076294,0.6006052754819393,0.6953180599957705,-1.1219000592827797,-0.12937748804688454,-1.332934357225895,-0.30345823720563203,2.5949824303388596,-2.0814936086535454,-0.6839073561131954]            |[2.024966076016426,0.7575673609972,-1.0574407801032066,-0.5926201809197664,-1.0597501769661903,2.187953472137451,1.3131920620799065,-0.46419759653508663,-2.0138872116804123,-0.5929473964497447,-0.9836027771234512,-1.4559084177017212,-0.08800704404711723,-0.6220664791762829,-0.8641017489135265,-0.022495979443192482,-1.703911691904068,0.8617219999432564,-0.5454287007451057,1.8494813144207,2.1224654372781515,0.9511913694441319,1.9577395021915436,1.9236545264720917,-0.2843426950275898,-1.4805774465203285,0.8744483646005392,-0.03745176177471876,-1.9532251209020615,1.7798036634922028,2.693154980894178,1.648189254105091,-0.5901545137166977,-4.779247224330902,0.7574559599161148,0.871131457388401,-0.5542276501655579,-0.2212514840066433,-0.33308135718107224,1.2095318995416164,-0.5565609857439995,1.8279463201761246,-1.1027900278568268,0.47484637051820755,-1.3460534140467644,0.09413556382060051,-2.53048000857234,-0.43906835466623306,-2.8203126788139343,-0.8671386744827032]        |[1.3174011707305908,-0.8285085475072265,-0.4388067852705717,0.38912047166377306,-0.2934603728353977,0.6762879518792033,-1.1833822429180145,-1.359791338443756,1.653559423983097,-0.42519209161400795,-0.4495155231561512,-0.20669117383658886,0.8953103125095367,0.66706776432693,-0.0563760856166482,-2.2260952591896057,-0.1524185671005398,-2.151533827185631,0.8610069574788213,0.49078185856342316,-0.6602802388370037,0.8252322524785995,-1.7116134390234947,1.0314278975129128,-1.3898665234446526,-1.7423550710082054,1.0970663242042065,0.6584132798016071,0.46167587861418724,0.3903691447339952,-0.04137971065938473,0.6036010719835758,-2.5865439204499125,-2.940976172685623,2.3582600676454604,-1.679496267810464,-1.1360564790666103,-2.236722230911255,0.6139406934380531,-1.7739221584051847,-2.6865654066205025,-0.7237020432949066,-1.0250144805759192,0.10607603844255209,-0.16317578684538603,-0.3314519925042987,-3.397866502404213,-0.0571436220780015,-1.3635811284184456,-2.241725578904152]        |[1.8731586088736851,-0.5297493916004896,-0.670742175852259,0.08478850281486909,-0.04560210431615511,0.8406906663440168,0.29434861180682975,-1.1765962435553472,-0.12987097880492607,-0.9961640968297919,-0.08078950449513893,-0.6559159969910979,0.8543658517301083,0.1228280970826745,-0.18119692957649627,-1.552283667648832,-0.8465924555590997,-0.6820285649349292,0.7970262210195264,0.3236503154039383,0.3985245627506326,0.14538263777891794,-0.2744412211080392,1.2744794562458992,-0.3334691257526477,-1.1743804331248004,1.4811477369318407,0.042839352351923786,-0.07133859271804492,0.9201272469945252,0.8600134048610926,1.0281691122800112,-0.5847084239746133,-3.12458214443177,1.5318108258458476,-0.09146561535696189,-0.9724971118072668,-0.4848189962406953,8.240753474334875E-4,-0.4707324152501921,-1.6979165474573772,0.5682831841210524,-0.4774954828123252,-0.18032588344067335,-0.546202229646345,-0.523416928636531,-2.0772682493940615,0.6995901511982083,-2.088462471961975,-1.2642572031666834]                |
|1FSG.C          |IDKILLPGGLVKDRVEKLAYDIHRT|H      |H      |[IDK, ILL, PGG, LVK, DRV, EKL, AYD, IHR]|[DKI, LLP, GGL, VKD, RVE, KLA, YDI, HRT]|[KIL, LPG, GLV, KDR, VEK, LAY, DIH, RT]|[0.0011718850582838058,-0.8450929443351924,2.1879174262285233,-0.09608905389904976,0.2545490232296288,-0.7139255590736866,0.06390967592597008,-1.5223259925842285,-0.5340810045599937,0.4789855359122157,1.973416954278946,0.210376787930727,0.5840373197570443,-2.3175208270549774,-0.7240085303783417,0.8457834534347057,-0.9102122336626053,0.33517053350806236,1.9557270407676697,-3.196206033229828,0.9837179183959961,1.0185461640357971,0.3151991742197424,-0.06535956147126853,1.1250180709175766,-1.9910160899162292,0.4132579490542412,0.6632806407287717,0.5989269332494587,0.9299607160501182,-2.1158312633633614,-0.19778327085077763,0.9805412106215954,-0.8899021372199059,-0.5809737797826529,0.8363611288368702,0.940087303519249,0.6162578114308417,-0.04694719659164548,-1.453425221145153,2.7849104553461075,-0.17180505441501737,1.5046942681074142,-2.677586391568184,-0.7771087568253279,-0.8997226096689701,2.5057591795921326,0.6804224327206612,-1.0681501575745642,0.16880337009206414]                |[0.6603811718523502,-1.688733596354723,0.21004100888967514,2.18814018368721,-0.47515304014086723,0.521423701196909,-1.8298866972327232,-0.805654626339674,1.449854202568531,0.09958846122026443,-1.304968398064375,2.208309479057789,1.0042259711772203,0.3822454698383808,1.2090888749808073,-1.6219857237301767,-0.3760288953781128,-1.1664314344525337,1.980121273547411,-2.8441652804613113,-2.4003348350524902,-0.5027901008725166,-2.2807891741394997,-2.110265366733074,0.679475300014019,1.7616894636303186,1.6926171351224184,-0.3426139794755727,1.7056130170822144,-2.561446189880371,-0.16305002942681313,-0.04584665596485138,-0.8985776104964316,2.073562730103731,-0.39712410420179367,0.8302349895238876,-1.4121216256171465,0.04091126471757889,1.0984010696411133,-0.8626506514847279,-1.0959608554840088,-1.9765258692204952,0.7845079004764557,0.24884821847081184,3.0589908957481384,1.4165362603962421,-0.4622044018469751,-0.5345578007400036,0.8742599261458963,-1.68886236846447]             |[-0.14117270801216364,0.501228928565979,-0.490829162299633,-1.1259897071868181,0.20587251894176006,-0.7100284174084663,0.8953029671683908,0.4784214710816741,0.13393845409154892,0.38389462418854237,1.3787413127720356,0.8969475775957108,-0.7003457536920905,0.5528628174215555,-2.8711675703525543,-0.015255114063620567,-1.608394593000412,-1.3862174078822136,1.6704374174587429,-0.8658035742118955,0.14239560440182686,-0.5241378620266914,0.7512945607304573,0.9038015808910131,0.0425470769405365,0.23507868708111346,-1.4734411388635635,0.7937529268674552,0.6585322692990303,0.13827987015247345,-3.0521865002810955,-1.599974811077118,0.1309700938872993,1.2791875898838043,-1.6839988008141518,0.016399651765823364,0.05483295023441315,1.496060261502862,-0.5365324467420578,0.059273432940244675,-0.5218462813645601,1.846670426428318,1.8020758591592312,-0.2163916165009141,0.4910247940570116,1.5901122018694878,0.6389886133838445,1.6120226830244064,-0.7925170063972473,1.4438992850482464]           |[0.17346011629949012,-0.6775325373746455,0.6357097576061884,0.3220204742004474,-0.004910499323159456,-0.30084342509508133,-0.29022468471278745,-0.6165197159474095,0.3499038840333621,0.3208228737736742,0.6823966229955355,1.1052112815280755,0.29597251241405803,-0.4608041799316804,-0.7953624085833629,-0.26381912811969715,-0.96487857401371,-0.7391594362755617,1.8687619105912745,-2.302058295967678,-0.4247404374182224,-0.0027939329544703164,-0.4047651463964333,-0.4239411157711099,0.6156801492907107,0.0019173535984009504,0.21081131510436535,0.37147319604021806,0.9876907398769011,-0.49773520122592646,-1.7770225976904233,-0.6145349126309156,0.07097789800415437,0.8209493942558765,-0.8873655615995327,0.5609985900421938,-0.1390671239544948,0.7177431125504276,0.17164047543580332,-0.7522674798965454,0.38903443949917954,-0.10055349906906486,1.3637593425810337,-0.8817099298660954,0.9243023109932741,0.7023086175322533,0.894181130376334,0.5859624383350214,-0.3288024126086384,-0.02538657110805313]           |
|1C1K.A          |KAYRKILNIDSQKAKNVFIETVKSC|H      |H      |[KAY, RKI, LNI, DSQ, KAK, NVF, IET, VKS]|[AYR, KIL, NID, SQK, AKN, VFI, ETV, KSC]|[YRK, ILN, IDS, QKA, KNV, FIE, TVK, SC]|[0.8315972853451967,0.043749348260462284,0.9963656216859818,-1.7556808441877365,1.5407079458236694,0.5786184147000313,0.2913429494947195,-2.5242234766483307,-0.01595531590282917,-0.790187381207943,0.47971696499735117,0.6644763052463531,-0.06552690267562866,-0.18635870632715523,-0.24011939484626055,0.3150351010262966,1.2192784249782562,-1.495406448841095,1.2220869734883308,-1.7729653716087341,-1.29938605427742,0.8938945829868317,-1.0290195047855377,0.4054016247391701,-0.7008511871099472,-0.4426448754966259,-0.13887380436062813,0.4661693535745144,-1.0362043976783752,1.019883632659912,0.009940383024513721,0.29537490755319595,-1.6740040928125381,-0.8497995864599943,1.030547171831131,-1.2693421393632889,1.4273883253335953,-0.9445273503661156,-0.842797540128231,0.03878272045403719,-0.25146413035690784,0.45147474855184555,0.5709333997219801,-1.503302440047264,-0.4454676490277052,-0.27577484026551247,-0.21870895475149155,1.114164985716343,-0.2043338567018509,-0.7566720992326736]         |[-0.14495163690298796,-1.5973218269646168,1.1002884656190872,-0.9294215068221092,-0.8730706516653299,-2.105044849216938,0.3343663904815912,-1.5587959289550781,-1.6957567781209946,-0.1929185390472412,0.5080687068402767,0.2381153106689453,-0.32131364196538925,0.47839550022035837,-2.2029463946819305,-0.40206904523074627,-1.666477620601654,-1.2508568316698074,2.85426065325737,-1.6087262406945229,0.37689287052489817,-0.6581116076558828,0.7255065347999334,0.5320571474730968,1.0463262051343918,-0.18357564182952046,1.224573753774166,-0.31073305755853653,0.264577254652977,0.5218420438468456,-1.9498009495437145,-1.0226307203993201,1.274782975204289,-1.1397498100996017,-0.5693619083613157,0.24066838063299656,0.6125580742955208,1.0700722485780716,-0.23715126886963844,-0.9029702544212341,1.513337817043066,0.6851513087749481,0.2643822096288204,-0.5900561853777617,-0.2733938228338957,-0.28871870785951614,0.640851978212595,1.3491133116185665,-2.265309253707528,-0.0503608388826251]    |[-0.09335422236472368,-2.975793592631817,0.47707685036584735,-0.07341541349887848,-0.4345288439653814,-0.3944376967847347,-1.1281227767467499,-0.4732705242931843,-1.605573982000351,-0.9254512805491686,-1.4582859873771667,0.6906826351769269,0.47606476210057735,0.2603663057088852,0.6698054596781731,-1.2353470027446747,-0.35411082953214645,-0.4805983491241932,0.12740415427833796,-0.0839422196149826,-0.5413149166852236,0.6453290078788996,-0.7880362402647734,-0.6801928170025349,-0.37101451493799686,0.5396933853626251,3.3346361219882965,-1.048930611461401,0.06973668932914734,-1.0668584555387497,-0.02895064651966095,-0.5435519181191921,0.32841650396585464,-2.022835612297058,2.628590777516365,1.6671877969056368,0.7210907228291035,-0.07554971426725388,0.7018777765333652,-0.58881950750947,-0.16005998849868774,-2.735073670744896,-0.029146071523427963,1.2348634004592896,1.079444631934166,-0.37752725556492805,-2.455977290868759,-0.8030869700014591,-1.9081055503338575,-2.3897109627723694]|[0.19776380869249502,-1.5097886904453237,0.8579103125569721,-0.9195059215029081,0.07770281673098604,-0.6402880437672138,-0.1674711455901464,-1.518763309965531,-1.1057620253413916,-0.6361857336014509,-0.15683343851317963,0.5310914170307418,0.029741405819853146,0.18413436653402945,-0.5910867766166726,-0.44079364898304146,-0.26710334171851474,-1.0756205432116985,1.4012505936746795,-1.1552112773060799,-0.48793603347924847,0.2937039944032828,-0.3638497367501259,0.08575531840324402,-0.008513165637850761,-0.028842377321173746,1.4734453571339448,-0.2978314384818077,-0.23396348456541696,0.15828907365600267,-0.6562704043462873,-0.4236025769884388,-0.02360153788079818,-1.3374616696188848,1.0299253469953935,0.21283801272511482,0.9203457074860731,0.016665061314900715,-0.1260236774881681,-0.4843356804922223,0.3672712327291568,-0.5328158711393675,0.26872317927579087,-0.28616507498857874,0.12019438669085503,-0.3140069345633189,-0.6779447558025519,0.5533971091111501,-1.4592495535810788,-1.0655813002958894]|
|1A9X.F          |LSSYLKRHNIVAIADIDTRKLTRLL|E      |E      |[LSS, YLK, RHN, IVA, IAD, IDT, RKL, TRL]|[SSY, LKR, HNI, VAI, ADI, DTR, KLT, RLL]|[SYL, KRH, NIV, AIA, DID, TRK, LTR, LL]|[2.108544853515923,2.1560158729553223,-3.1133848782628775,0.5561599703505635,0.7304713167250156,0.6248435862362385,1.3306696601212025,-0.011689445236697793,2.683310717344284,0.7065228053834289,0.7513110712170601,-0.8652135767042637,0.06088611111044884,-0.40404776064679027,0.14235249906778336,-2.002258694730699,-0.47506802063435316,-0.008863645256496966,0.6520567536354065,-0.6912552826106548,2.6441287845373154,0.23053075885400176,-0.23508251970633864,1.2382791340351105,-0.3171096555888653,0.9383813589811325,-1.2636808641254902,1.0173301994800568,0.019001231528818607,-1.3249326683580875,0.5870154346339405,0.8698606602847576,-2.599312389269471,-1.5171150006353855,-0.6266827266663313,-1.5915501937270164,-1.8760247696191072,1.5040200501680374,1.2622322514653206,-0.30223866552114487,-2.971720688045025,-0.6495114527642727,1.4339256789535284,-1.6492272093892097,-0.07635836116969585,2.1090922243893147,-1.7355042286217213,1.374142061918974,-1.0391269214451313,0.009142358787357807]         |[-1.3656953491736203,2.262380998581648,-0.1461292915046215,1.969050515908748,-0.1737185763195157,-1.9211842939257622,-1.1948372460901737,2.052053041756153,5.114864304661751,1.4535717070102692,1.912816371768713,-1.8731888644397259,2.017546884715557,0.10656195506453514,0.5758709786459804,-2.5464148819446564,1.1840807721018791,2.3689589351415634,-1.420125757344067,0.607261598110199,0.7797040343284607,-0.28604825492948294,-1.120248343795538,0.3765058405697346,-0.6957631884142756,-0.6895492132753134,1.280192032456398,1.5281146056950092,0.7685442945221439,-1.7754551768302917,-1.9240268170833588,2.4303556382656097,0.9964980389922857,3.212188944220543,1.0133401975035667,-0.4970829728990793,-1.9212884716689587,1.6305637955665588,1.609117865562439,-1.0815647169947624,-0.7190599404275417,-1.4993143677711487,0.5630251839756966,-2.513417422771454,-0.20005721191409975,-1.780095398426056,1.1745162680745125,-0.17344455514103174,3.2183327823877335,-0.3815215271897614]                  |[-0.920694176107645,-0.6884733289480209,0.31896160915493965,-1.372283961623907,-0.4847866874188185,-1.154514449648559,0.9442515475675464,0.7275347355753183,0.5013213194906712,0.323440283536911,-0.24745767191052437,-1.631956558674574,0.29878798546269536,-0.4759113844484091,0.6609521172940731,0.020364935509860516,0.6519736160989851,-0.45746519044041634,-0.3921414166688919,1.7272226475179195,0.34865038096904755,-1.581941345706582,-0.16742620524019003,-0.1956513747572899,-0.6220873631536961,1.2586500672623515,0.9750607963651419,-0.3616542648524046,0.6748571284115314,-2.4976582676172256,-1.7809041142463684,-0.8222400080412626,0.010203697718679905,1.186923272907734,-0.9469131901860237,0.9032065235078335,1.2501162998378277,1.072108183056116,0.362057214602828,0.1391479237936437,0.3345924010500312,1.2501245588064194,0.5012695649638772,-0.00963654275983572,-2.106096714735031,0.2566782934591174,-0.1739109270274639,0.6215358087792993,1.4774949960410595,-0.17169226240366697]             |[-0.059281557255114116,1.2433078475296497,-0.9801841868708531,0.3843088415451348,0.02398868432889382,-0.8169517191126943,0.36002798719952506,0.9226327773649246,2.766498780498902,0.8278449319768697,0.8055565903584162,-1.4567863332728546,0.7924069937629005,-0.25779906334355474,0.4597251983359456,-1.5094362137218316,0.45366212252217036,0.6342100331482167,-0.3867368067925175,0.5477429876724879,1.2574943999449413,-0.5458196139273545,-0.5075856895806888,0.4730445332825184,-0.5449867357189456,0.5024940709893903,0.33052398823201656,0.7279301801075538,0.487467551487498,-1.8660153709352016,-1.0393051655652623,0.8259920968363682,-0.5308702175195018,0.9606657388309637,-0.18675190644959608,-0.3951422143727541,-0.8490656471500794,1.4022306762635708,1.0778024438768625,-0.4148851529074212,-1.118729409140845,-0.2995670872430007,0.832740142631034,-1.3907603916401665,-0.7941707626062756,0.19522503980745873,-0.24496629585822424,0.6074111051857471,1.2189002856612206,-0.1813571436020235]                        |
|1B8D.K          |SAYIAAFTFTRDRLCVPRDMSSQAG|H      |H      |[SAY, IAA, FTF, TRD, RLC, VPR, DMS, SQA]|[AYI, AAF, TFT, RDR, LCV, PRD, MSS, QAG]|[YIA, AFT, FTR, DRL, CVP, RDM, SSQ, AG]|[-0.2355625682976097,0.7051106672734022,-1.920936569571495,0.34762217476963997,0.11081899795681238,-0.551957453135401,2.7813909351825714,-1.1944835856556892,1.6380264442414045,0.9252552725374699,1.1597816217690706,-4.059203177690506,0.17605009209364653,3.403637558221817,-2.920738935470581,-0.2167189121246338,-1.7344603165984154,-0.9959599375724792,0.6662982478737831,2.14886412397027,1.855818472802639,-3.533353716135025,0.10422096401453018,5.229652583599091,0.6159852724522352,-0.32319993712008,0.2155357263982296,0.16223841160535812,-1.680720940232277,3.082412511110306,1.1684160977602005,1.2927870452404022,-1.644936925265938,-2.6714674830436707,0.6972438434604555,-0.6396772228181362,-2.4566360861063004,-1.1146053820848465,-1.407238227315247,-1.0547863505780697,-1.5330027667805552,2.3846621364355087,-0.44634881196543574,-1.4874711632728577,-2.4342856854200363,2.1368952095508575,-1.1957266991958022,1.3552419636398554,-4.070890933275223,0.25026608165353537]                            |[1.7952079474925995,0.06272168457508087,-2.583307296037674,0.46255958639085293,3.013937771320343,1.8696111589670181,4.280999153852463,-3.302396595478058,2.288519434630871,0.1654946282505989,-1.1078321728855371,-2.1531009823083878,1.9193978235125542,-1.176563061773777,1.7840423285961151,-1.0152955185621977,1.336071953177452,0.9866439397446811,0.4427692727185786,1.0244233813136816,1.5057770144194365,-0.06615107133984566,2.6193689480423927,1.8782661706209183,-1.1201509684324265,0.7212670501321554,1.9458320438861847,1.1799133643507957,-3.398435205221176,-1.8867838308215141,0.7030458562076092,3.7676814198493958,-2.3247402608394623,-3.0528917387127876,1.9107196889817715,-0.7959130704402924,-1.1992070693522692,0.34569686371833086,-1.4758710637688637,1.3017341941595078,-1.7336471676826477,3.034512013196945,1.3709502145648003,-1.9579762630164623,0.11206316575407982,-0.36597617343068123,-0.8419242799282074,0.32283943658694625,1.1829194389283657,-1.662440948188305]               |[0.07700180681422353,1.658301018178463,-1.8273828625679016,-1.5721308216452599,2.538229327648878,-0.0951214109081775,3.7674053013324738,-0.48042474314570427,-0.3330909386277199,-1.855537161231041,1.6112911030650139,-3.0677910447120667,-0.10953499190509319,1.4308972544968128,-1.949615977704525,0.7341262167319655,0.29329523257911205,0.3908341694623232,0.3003198131918907,1.2219286682084203,1.2905295118689537,-0.8265717756003141,2.368467539548874,3.572900179773569,0.38344161212444305,1.1912685185670853,2.168658010661602,-0.10165900504216552,-2.783925984054804,1.581010242458433,1.0708794556558132,1.4173080623149872,0.5466471500694752,-3.783757507801056,1.0850983271375299,-0.38153889030218124,0.5319278389215469,1.009333774447441,-1.5684102550148964,-0.14288812782615423,0.018477976322174072,2.1293507888913155,-0.1228507049381733,-2.76300960034132,-3.680723048746586,0.09269606694579124,-0.054558479227125645,0.23160417564213276,-2.6863569617271423,0.03331323037855327]                |[0.5455490620030711,0.8087111233423153,-2.1105422427256904,-0.25398302016158897,1.887662032308678,0.40751076497447986,3.6099317967891693,-1.6591016414264839,1.197818313414852,-0.2549290868143241,0.5544135173161825,-3.0933650682369866,0.6619709745670358,1.2193239169816177,-1.0287708615263302,-0.1659627379849553,-0.03503104361395041,0.12717272387817502,0.46979577792808414,1.4650720578307908,1.550708333030343,-1.4753588543583949,1.697352483868599,3.5602729779978595,-0.04024136128524939,0.5297785438597202,1.4433419269820054,0.41349759030466277,-2.6210273765027523,0.9255463075824082,0.9807804698745409,2.1592588424682617,-1.141010012011975,-3.169372243185838,1.231020619859919,-0.6057097278535366,-1.0413051055123408,0.08014175202697515,-1.4838398486996691,0.034686571918427944,-1.0827239860470097,2.516174979507923,0.2672502325537304,-2.0694856755435467,-2.000981856137514,0.6212050343553225,-0.6974031527837118,0.6365618586229781,-1.8581094853579998,-0.4596205453854054]                              |
|1A9X.F          |TTAEAYSWTQGSWTLTGGLPQAKKE|C      |C      |[TTA, EAY, SWT, QGS, WTL, TGG, LPQ, AKK]|[TAE, AYS, WTQ, GSW, TLT, GGL, PQA, KKE]|[AEA, YSW, TQG, SWT, LTG, GLP, QAK, KE]|[-0.5131183154881,-1.3824960142374039,0.25376382656395435,1.8815849963575602,0.42456314340233803,-0.43922753259539604,1.2180235683918,1.2661815471947193,-0.5956995207816362,-0.1232803724706173,1.7507944721728563,-0.20300733763724566,-2.3613752722740173,1.4926847517490387,0.3393649556674063,2.0707089379429817,-0.08713245019316673,1.89300736784935,-1.1928603611886501,0.6479572877287865,0.9754217928275466,-0.3588095773011446,2.5493194460868835,-0.18323247134685516,1.7533278055489063,-1.168042761972174,-1.2043057158589363,-0.3860053368844092,-0.6184094622731209,1.6831379234790802,2.4432255029678345,-0.767640758305788,1.7791106887161732,-1.646114026196301,0.43171804025769234,0.7264275290071964,0.6890315171331167,-1.0663194954395294,-1.7342959716916084,-0.5108097456395626,0.27289138548076153,1.1682550245895982,-0.343986937077716,-0.8825663179159164,-0.011732869781553745,-2.031363781541586,1.8395745791494846,-0.6085375808179379,-0.9304177314043045,0.1347821895033121]                    |[-0.7859331574290991,-2.3178150579333305,2.604906350374222,0.4165639542043209,-1.3845726698637009,-1.4343773126602173,-2.477336436510086,2.9008269011974335,1.753437228500843,-0.029327647760510445,-0.15149745624512434,3.8913730084896088,-0.7852945374324918,-0.11634675785899162,0.8308861274272203,0.7137060821987689,-1.499210886657238,-1.545018807053566,-1.4133384600281715,0.0730559192597866,-1.0774972923099995,0.6488555260002613,-3.522120237350464,-3.4260272532701492,0.6090141460299492,-0.5853617683751509,-0.9274829430505633,0.38358839554712176,2.888038769364357,-2.437918573617935,-0.6139942742884159,-1.2140733003616333,0.500672722235322,1.8626229986548424,-0.4823124052491039,1.2186763249337673,1.762550763785839,-2.023864395916462,2.329172298312187,-1.1862886529415846,1.343526430428028,-4.103940069675446,-0.500224462710321,1.1927498914301395,0.23489337041974068,-1.346204500645399,-1.3368341103196144,1.1174151860177517,-0.77524034678936,-0.6064285258762538]               |[-0.1625298084691167,0.3564658295363188,-0.8471960946917534,1.6348321475088596,0.9846962690353394,0.8039786303415895,2.2493163347244263,1.2427338063716888,0.5671191271394491,-2.054475888609886,0.2734578214585781,-0.9604621082544327,-1.1355220954865217,-1.0272682765498757,0.6895532021299005,1.5934484079480171,1.330396220088005,-1.4085962027311325,0.11269399896264076,-1.6955770291388035,0.434757417999208,-0.6648368537425995,-0.01123660709708929,-0.6115081645548344,-0.8189486861228943,-0.724785141646862,-0.5027765785343945,0.5899669583886862,1.2803489565849304,-0.7646423950791359,-1.2542270980775356,-0.2795898374170065,1.0665595941245556,0.09806957561522722,-0.8601688593626022,-2.9216733425855637,-1.7174906432628632,-0.7034023143351078,0.5399208217859268,0.8860075559932739,0.1779284905642271,-0.20272281765937805,0.3811299642547965,-0.37481122743338346,0.18879294581711292,0.2425433285534382,1.2484535202383995,-1.1580745540559292,0.8208122849464417,-1.2014817260205746]           |[-0.4871937604621053,-1.1146150808781385,0.6704913607488076,1.3109936993569136,0.008228914191325506,-0.35654207163800794,0.33000115553538006,1.8032474182546139,0.5749522782862186,-0.7356946362803379,0.6242516124621034,0.9093011875326434,-1.4273973017310102,0.11635657244672377,0.6199347617415091,1.459287809363256,-0.08531570558746655,-0.35353588064511615,-0.8311682740847269,-0.32485460738341015,0.11089397283891837,-0.12493030168116093,-0.3280124661202232,-1.4069226297239463,0.5144644218186537,-0.8260632239980623,-0.8781884124812981,0.1958500056837996,1.1833260878920555,-0.5064743484059969,0.19166804353396097,-0.753767965361476,1.1154476683586836,0.10485951602458954,-0.30358774145133793,-0.3255231628815333,0.2446972125520309,-1.2645287352303665,0.37826571613550186,-0.2703636141959578,0.5981154354910055,-1.0461359542484086,-0.15436047851108015,-0.021542551306386788,0.13731781548509994,-1.0450083178778489,0.5837313296894232,-0.21639898295203844,-0.2949485977490743,-0.5577093541311721]         |
|1FPO.B          |ADTCRKLRFLDKLRSSAEQLEEKLL|H      |H      |[ADT, CRK, LRF, LDK, LRS, SAE, QLE, EKL]|[DTC, RKL, RFL, DKL, RSS, AEQ, LEE, KLL]|[TCR, KLR, FLD, KLR, SSA, EQL, EEK, LL]|[0.5554861556738615,-1.1574508603662252,1.1349009722471237,-0.6976140160113573,-0.35459863394498825,-0.2653488591313362,0.003453163430094719,-0.7754270508885384,-0.7707704603672028,-1.281157549470663,-0.8733213841915131,0.9339175866916776,-0.7194259241223335,-0.5865279212594032,-0.25343677029013634,0.20408974960446358,0.5894439606927335,-0.8636046499013901,1.2940451130270958,-0.7757947873324156,-0.4149734042584896,1.3031713915988803,0.1912793042138219,-0.9648358598351479,-0.9830865953117609,-1.1513532623648643,-0.7732661440968513,0.23110825940966606,0.5594087168574333,0.3629984799772501,-0.0012647658586502075,-0.10625619068741798,-0.3454243806190789,-2.422784134745598,-0.10493384301662445,-0.5304899848997593,1.615607038140297,-1.3307701363228261,-0.6895696576684713,0.28803718369454145,0.6838466823101044,0.4565105917863548,-0.6299549448303878,-0.2291095145046711,-0.20115602714940906,-1.3117680624127388,-0.8496581390500069,0.22730981558561325,-0.6977918315678835,-0.782618947327137]|[0.33273782185278833,-2.0636640461161733,1.2148131746798754,-0.6491845697164536,-1.2683077156543732,-0.8156023994088173,0.32503095734864473,-0.5036848932504654,-0.4854583591222763,-0.22278815507888794,0.14800740033388138,1.0071449726819992,-0.9827362969517708,0.974461100064218,-1.5294614136219025,-1.0604724362492561,-1.070550442673266,0.9985935399308801,1.0819804351776838,-0.5734447930008173,0.7803174592554569,1.495366906747222,0.24593698419630527,1.9129843264818192,0.6537598697468638,0.0361276688054204,-0.2984302891418338,0.6022757329046726,-1.1092976480722427,0.6917073763906956,0.9910483458079398,-1.1088784458115697,-0.13571936823427677,-3.1070220917463303,0.6418740095105022,0.9076450727880001,1.712648332118988,-0.8606112599372864,0.762262518517673,0.3621760122478008,0.06111794523894787,-0.332454614341259,-1.0830181566998363,-0.3689580513164401,-0.17366084037348628,0.6131803886964917,-1.9756878465414047,0.44074620865285397,-3.5046023428440094,-0.19075225829146802]   |[0.20934702455997467,-0.699911005795002,-0.9386138319969177,-1.6223662123084068,-1.7913143038749695,0.2121964432299137,-0.03111310675740242,-1.5630945153534412,-1.9531667530536652,-1.7501071989536285,-2.30035338178277,1.7419663332402706,-0.7412234041839838,0.06878069043159485,-0.7093469835817814,0.23938211798667908,0.8330525942146778,-0.8675770834088326,1.283548928797245,-1.337278138846159,0.884598046541214,0.7677742578089237,2.0800666958093643,0.5611337572336197,-1.156651847064495,0.6551853073760867,-0.42920611798763275,-0.8593218009918928,-0.30414705723524094,-0.32126195169985294,0.5191336572170258,-0.6314235627651215,-0.3233811715617776,-1.5830085389316082,1.0428319377824664,-1.200994398444891,1.4418189227581024,0.2379303313791752,-0.5069212429225445,-0.021101519465446472,1.447510770522058,-1.051062848418951,-0.271819282323122,-0.1071696225553751,0.6488500479608774,0.4563357047736645,-0.9797017723321915,-0.388039780780673,-3.047518290579319,-0.6076468173414469]           |[0.3658570006955415,-1.3070086374258,0.47036677164336044,-0.9897215993454059,-1.1380735511581104,-0.2895849384367466,0.09912367134044568,-0.9474021531641483,-1.0697985241810481,-1.0846843011677265,-1.0085557885468006,1.2276762975379825,-0.8144618750860294,0.15223795641213655,-0.8307483891646067,-0.20566685621937117,0.11731537074471514,-0.24419606445978084,1.219858159000675,-0.8955059063931307,0.41664736717939377,1.1887708520516753,0.8390943280731639,0.5030940746267637,-0.49532619087646407,-0.15334676206111908,-0.5003008504087726,-0.008645936225851377,-0.28467866281668347,0.2444813015560309,0.5029724123887718,-0.615519399754703,-0.2681749734717111,-2.3709382551411786,0.5265907014254481,-0.2746131035188834,1.5900247643391292,-0.6511503549603125,-0.1447427940244476,0.20970389215896526,0.7308251326903701,-0.3090022903246184,-0.6615974612844487,-0.2350790627921621,0.09134439347932737,-0.0807506563141942,-1.268349252641201,0.09333874781926473,-2.416637488330404,-0.5270060076533506]              |
+----------------+-------------------------+-------+-------+----------------------------------------+----------------------------------------+---------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
only showing top 10 rows

In [8]:
df = data.toPandas()

df.head(10)
Out[8]:
structureChainId sequence labelQ8 labelQ3 ngram0 ngram1 ngram2 feature0 feature1 feature2 features
0 1B8D.K ACFAKYSYLKNAGEAGDSPEKINKC T C [ACF, AKY, SYL, KNA, GEA, GDS, PEK, INK] [CFA, KYS, YLK, NAG, EAG, DSP, EKI, NKC] [FAK, YSY, LKN, AGE, AGD, SPE, KIN, KC] [-2.5167602449655533, 0.41110057570040226, -0.... [-0.9313944121822715, 0.8716795854270458, -2.8... [-0.3790398994460702, 2.511866755783558, -3.56... [-1.2757315188646317, 1.264882305637002, -2.46...
1 1AOL.A CNTAWNRLKLDQVTHKSSEGFYVCP H H [CNT, AWN, RLK, LDQ, VTH, KSS, EGF, YVC] [NTA, WNR, LKL, DQV, THK, SSE, GFY, VCP] [TAW, NRL, KLD, QVT, HKS, SEG, FYV, CP] [-0.8749945722520351, -2.3850620687007904, -0.... [-1.5751862302422523, 2.3965483605861664, -1.9... [-0.9588506408035755, -0.6263859607279301, 3.7... [-1.136343814432621, -0.20496655628085136, 0.2...
2 1FO8.A DPSLWCVSAWNDNGKEQMVDSSKPE T C [DPS, LWC, VSA, WND, NGK, EQM, VDS, SKP] [PSL, WCV, SAW, NDN, GKE, QMV, DSS, KPE] [SLW, CVS, AWN, DNG, KEQ, MVD, SSK, PE] [0.11070175841450691, -1.486862190067768, -1.5... [1.1518815904855728, -0.13194116204977036, 3.5... [-0.0534269493073225, -1.546366237103939, -0.8... [0.40305213319758576, -1.0550565297404926, 0.3...
3 1FSG.C GCCYDFNEMFRDFDHVAVLSDAARK C C [GCC, YDF, NEM, FRD, FDH, VAV, LSD, AAR] [CCY, DFN, EMF, RDF, DHV, AVL, SDA, ARK] [CYD, FNE, MFR, DFD, HVA, VLS, DAA, RK] [2.2771085798740387, -1.5183069882914424, -0.5... [2.024966076016426, 0.7575673609972, -1.057440... [1.3174011707305908, -0.8285085475072265, -0.4... [1.8731586088736851, -0.5297493916004896, -0.6...
4 1FSG.C IDKILLPGGLVKDRVEKLAYDIHRT H H [IDK, ILL, PGG, LVK, DRV, EKL, AYD, IHR] [DKI, LLP, GGL, VKD, RVE, KLA, YDI, HRT] [KIL, LPG, GLV, KDR, VEK, LAY, DIH, RT] [0.0011718850582838058, -0.8450929443351924, 2... [0.6603811718523502, -1.688733596354723, 0.210... [-0.14117270801216364, 0.501228928565979, -0.4... [0.17346011629949012, -0.6775325373746455, 0.6...
5 1C1K.A KAYRKILNIDSQKAKNVFIETVKSC H H [KAY, RKI, LNI, DSQ, KAK, NVF, IET, VKS] [AYR, KIL, NID, SQK, AKN, VFI, ETV, KSC] [YRK, ILN, IDS, QKA, KNV, FIE, TVK, SC] [0.8315972853451967, 0.043749348260462284, 0.9... [-0.14495163690298796, -1.5973218269646168, 1.... [-0.09335422236472368, -2.975793592631817, 0.4... [0.19776380869249502, -1.5097886904453237, 0.8...
6 1A9X.F LSSYLKRHNIVAIADIDTRKLTRLL E E [LSS, YLK, RHN, IVA, IAD, IDT, RKL, TRL] [SSY, LKR, HNI, VAI, ADI, DTR, KLT, RLL] [SYL, KRH, NIV, AIA, DID, TRK, LTR, LL] [2.108544853515923, 2.1560158729553223, -3.113... [-1.3656953491736203, 2.262380998581648, -0.14... [-0.920694176107645, -0.6884733289480209, 0.31... [-0.059281557255114116, 1.2433078475296497, -0...
7 1B8D.K SAYIAAFTFTRDRLCVPRDMSSQAG H H [SAY, IAA, FTF, TRD, RLC, VPR, DMS, SQA] [AYI, AAF, TFT, RDR, LCV, PRD, MSS, QAG] [YIA, AFT, FTR, DRL, CVP, RDM, SSQ, AG] [-0.2355625682976097, 0.7051106672734022, -1.9... [1.7952079474925995, 0.06272168457508087, -2.5... [0.07700180681422353, 1.658301018178463, -1.82... [0.5455490620030711, 0.8087111233423153, -2.11...
8 1A9X.F TTAEAYSWTQGSWTLTGGLPQAKKE C C [TTA, EAY, SWT, QGS, WTL, TGG, LPQ, AKK] [TAE, AYS, WTQ, GSW, TLT, GGL, PQA, KKE] [AEA, YSW, TQG, SWT, LTG, GLP, QAK, KE] [-0.5131183154881, -1.3824960142374039, 0.2537... [-0.7859331574290991, -2.3178150579333305, 2.6... [-0.1625298084691167, 0.3564658295363188, -0.8... [-0.4871937604621053, -1.1146150808781385, 0.6...
9 1FPO.B ADTCRKLRFLDKLRSSAEQLEEKLL H H [ADT, CRK, LRF, LDK, LRS, SAE, QLE, EKL] [DTC, RKL, RFL, DKL, RSS, AEQ, LEE, KLL] [TCR, KLR, FLD, KLR, SSA, EQL, EEK, LL] [0.5554861556738615, -1.1574508603662252, 1.13... [0.33273782185278833, -2.0636640461161733, 1.2... [0.20934702455997467, -0.699911005795002, -0.9... [0.3658570006955415, -1.3070086374258, 0.47036...

Terminate Spark Context

In [9]:
sc.stop()