Source code for mmtfPyspark.utils.mmtfDecoder

'''mmtfDecoder.py

Provides efficient methods to decode mmtf structures
'''
__author__ = "Mars (Shih-Cheng) Huang"
__maintainer__ = "Mars (Shih-Cheng) Huang"
__email__ = "marshuang80@gmail.com"
__version__ = "0.2.0"
__status__ = "done"

import numpy as np


[docs]def run_length_decoder_numpy(in_array): """Decodes a run length encoded array Parameters ---------- in_array : list the input list to apply run length decoder on """ lengths = np.array(in_array[1::2]) values = np.array(in_array[0::2]) starts = np.insert(np.array([0]), 1, np.cumsum(lengths))[:-1] ends = starts + lengths n = ends[-1] x = np.full(n, np.nan) for l, h, v in zip(starts, ends, values): x[l:h] = v return x
[docs]def recursive_index_decode(int_array, decode_num=1000): """Unpack an array of integers using recursive indexing. Parameters ---------- int_array : list the input array of integers decode_num : int the number used for decoding [1000] Returns ------- numpy.array return the numpy.array of integers after recursive index decoding """ maximum = 32767 minimum = -32768 out_arr = np.cumsum(int_array) / decode_num return out_arr[(int_array != maximum) & (int_array != minimum)]
[docs]def decode_entity_list(input_data): """Convert byte strings to strings in the entity list. Parameters ---------- input_data : list the list of entities Returns ------- list decoded entity list """ return [convert_entity(entry) for entry in input_data]
[docs]def decode_group_list(input_data): """Convert byte strings to strings in the group map. Parameters ---------- input_data : list the list of groups Returns ------- list decoded group list """ return [convert_group(entry) for entry in input_data]
[docs]def convert_group(input_group): """Convert an individual group from byte strings to regula strings. Parameters ---------- input_group : list the list of input groups Returns ------- dict """ output_group = {} for key in input_group: if key in [b'elementList', b'atomNameList']: output_group[key.decode('ascii')] = [x.decode('ascii') for x in input_group[key]] elif key in [b'chemCompType', b'groupName', b'singleLetterCode']: output_group[key.decode( 'ascii')] = input_group[key].decode('ascii') else: output_group[key.decode('ascii')] = input_group[key] return output_group
[docs]def convert_entity(input_entity): """Convert an individual entity from byte strings to regular strings Parameters ---------- input_entity : list entities to decode Returns ------- dict decoded entity """ output_entity = {} for key in input_entity: if key in [b'description', b'type', b'sequence']: output_entity[key.decode('ascii')] = input_entity[key].decode('ascii') else: output_entity[key.decode('ascii')] = input_entity[key] return output_entity