Source code for mmtfPyspark.utils.mmtfStructure
#!/usr/bin/env python
'''mmtfStructure.py
Decode msgpack unpacked data to mmtf structure
'''
__author__ = "Mars (Shih-Cheng) Huang"
__maintainer__ = "Mars (Shih-Cheng) Huang"
__email__ = "marshuang80@gmail.com"
__version__ = "0.2.0"
__status__ = "Done"
import numpy as np
import time
import struct
from mmtf.utils import decoder_utils
from mmtfPyspark.utils import mmtfDecoder
[docs]class MmtfStructure(object):
model_counter = 0
chain_counter = 0
group_counter = 0
atom_counter = 0
def __init__(self, input_data):
"""Decodes a msgpack unpacked data to mmtf structure"""
# Variables that are not in all mmtf files
if "bFactorList" in input_data:
int_array = np.frombuffer(input_data["bFactorList"][12:], '>i2')
decode_num = np.frombuffer(input_data["bFactorList"][8:12], '>i')
self.b_factor_list = mmtfDecoder.recursive_index_decode(
int_array, decode_num)
else:
self.b_factor_list = []
if 'resolution' in input_data:
self.resolution = input_data['resolution']
else:
self.resolution = None
if "rFree" in input_data:
self.r_free = input_data["rFree"]
else:
self.r_free = None
if "rWork" in input_data:
self.r_work = input_data["rWork"]
else:
self.r_work = None
if "bioAssemblyList" in input_data:
self.bio_assembly = input_data["bioAssemblyList"]
else:
self.bio_assembly = []
if "unitCell" in input_data:
self.unit_cell = input_data["unitCell"]
else:
self.unit_cell = None
if "releaseDate" in input_data:
self.release_date = input_data["releaseDate"]
else:
self.release_date = None
if "depositionDate" in input_data:
self.deposition_date = input_data["depositionDate"]
else:
self.deposition_date = None
if "title" in input_data:
self.title = input_data["title"]
else:
self.title = None
if "mmtfVersion" in input_data:
self.mmtf_version = input_data["mmtfVersion"]
else:
self.mmtf_version = None
if "mmtfProducer" in input_data:
self.mmtf_producer = input_data["mmtfProducer"]
else:
self.mmtf_producer = None
if "structureId" in input_data:
self.structure_id = input_data["structureId"]
else:
self.structure_id = None
if "spaceGroup" in input_data:
self.space_group = input_data["spaceGroup"]
else:
self.space_group = None
if "bondAtomList" in input_data:
self.bond_atom_list = np.frombuffer(
input_data["bondAtomList"][12:], '>i4')
else:
self.bond_atom_list = None
if "bondOrderList" in input_data:
self.bond_order_list = np.frombuffer(
input_data["bondOrderList"][12:], '>i1')
else:
self.bond_order_list = None
if "secStructList" in input_data:
self.sec_struct_list = np.frombuffer(
input_data["secStructList"][12:], '>i1')
else:
self.sec_struct_list = []
if "atomIdList" in input_data:
self.atom_id_list = np.cumsum(mmtfDecoder.run_length_decoder_numpy(
np.frombuffer(input_data['atomIdList'][12:], '>i4')).astype(np.int16))
else:
self.atom_id_list = []
if "sequenceIndexList" in input_data:
self.sequence_index_list = np.cumsum(mmtfDecoder.run_length_decoder_numpy(
np.frombuffer(input_data['sequenceIndexList'][12:], '>i4')).astype(np.int16))
else:
self.sequence_index_list = []
if "occupancyList" in input_data:
self.occupancy_list = mmtfDecoder.run_length_decoder_numpy(
np.frombuffer(input_data["occupancyList"][12:], ">i4")) / 100
else:
self.occupancy_list = []
if "experimentalMethods" in input_data:
self.experimental_methods = input_data["experimentalMethods"]
else:
self.experimental_methods = None
if "insCodeList" in input_data:
self.ins_code_list = [chr(a) for a in mmtfDecoder.run_length_decoder_numpy(
np.frombuffer(input_data["insCodeList"][12:], ">i4")).astype(np.int16)]
else:
self.ins_code_list = []
if "entityList" in input_data:
self.entity_list = input_data["entityList"]
else:
self.entity_list = []
if "chainNameList" in input_data:
self.chain_name_list = np.frombuffer(
input_data["chainNameList"][12:], 'S4').astype(str)
else:
self.chain_name_list = []
# Variables gaurenteed in mmtf files
self.num_bonds = input_data["numBonds"]
self.num_chains = input_data["numChains"]
self.num_models = input_data["numModels"]
self.num_atoms = input_data["numAtoms"]
self.num_groups = input_data["numGroups"]
self.chains_per_model = input_data["chainsPerModel"]
self.groups_per_chain = input_data["groupsPerChain"]
self.group_id_list = np.cumsum(mmtfDecoder.run_length_decoder_numpy(
np.frombuffer(input_data['groupIdList'][12:], '>i4'))).astype(np.int32)
self.group_type_list = np.frombuffer(
input_data['groupTypeList'][12:], '>i4')
self.x_coord_list = mmtfDecoder.recursive_index_decode(np.frombuffer(
input_data['xCoordList'][12:], '>i2'), np.frombuffer(input_data['xCoordList'][8:12], '>i'))
self.y_coord_list = mmtfDecoder.recursive_index_decode(np.frombuffer(
input_data['yCoordList'][12:], '>i2'), np.frombuffer(input_data['yCoordList'][8:12], '>i'))
self.z_coord_list = mmtfDecoder.recursive_index_decode(np.frombuffer(
input_data['zCoordList'][12:], '>i2'), np.frombuffer(input_data['zCoordList'][8:12], '>i'))
self.group_list = input_data['groupList']
self.chain_id_list = np.frombuffer(
input_data["chainIdList"][12:], 'S4').astype(str)
self.alt_loc_list = input_data['altLocList'][12:]
self.alt_loc_set = False
[docs] def pass_data_on(self, data_setters):
"""Write the data from the getters to the setters.
Parameters
----------
data_setters : DataTransferInterface
a series of functions that can fill a chemical
"""
self.set_alt_loc_list()
data_setters.init_structure(self.num_bonds, len(self.x_coord_list), len(self.group_type_list),
len(self.chain_id_list), len(self.chains_per_model), self.structure_id)
decoder_utils.add_entity_info(self, data_setters)
decoder_utils.add_atomic_information(self, data_setters)
decoder_utils.add_header_info(self, data_setters)
decoder_utils.add_xtalographic_info(self, data_setters)
decoder_utils.generate_bio_assembly(self, data_setters)
decoder_utils.add_inter_group_bonds(self, data_setters)
data_setters.finalize_structure()
[docs] def set_alt_loc_list(self):
"""Set the alternative location list for structure"""
self.alt_loc_list = [chr(x) for x in mmtfDecoder.run_length_decoder_numpy(
np.frombuffer(self.alt_loc_list, ">i4")).astype(np.int16)]
self.alt_loc_set = True
return self