Source code for mmtfPyspark.utils.columnarStructureX

#!/user/bin/env python
'''columnarStructureX.py

Inheritance class of ColumnarStructure

'''
__author__ = "Mars (Shih-Cheng) Huang"
__maintainer__ = "Mars (Shih-Cheng) Huang"
__email__ = "marshuang80@gmail.com"
__version__ = "0.2.0"
__status__ = "Done"

import numpy as np
import sys
from mmtfPyspark.utils import ColumnarStructure
from sympy import Point3D


[docs]class ColumnarStructureX(ColumnarStructure): '''Inheritance of class ColumnarStructure with additional functions Attributes ---------- structure : mmtfStructure) mmtf structure firstModelOnly : bool flag to use only the first model of the structure ''' def __init__(self, structure, firstModelOnly = True): ColumnarStructure.__init__(self, structure, firstModelOnly) self.normalizedbFactors = None self.clampedNormalizedbFactor = None
[docs] def get_normalized_b_factors(self): '''Returns z-scores for B-factors (normalized B-factors). Critical z-score values: Confidence level Tail Area z critical 90% 0.05 +- 1.645 95% 0.025 +- 1.96 99% 0.005 +- 2.576 ''' if self.normalizedbFactors is None: self.get_entity_types() self.bFactors = self.get_b_factors() self.entityTypes = self.get_entity_types() # Filter out DOD and HOH stats = np.array([self.bFactors[i] for i in range(self.get_num_atoms())\ if self.entityTypes[i] is not 'WAT']) # Define normalize function normalize = lambda x: (x - stats.mean()) / stats.std() if stats.std() != 0: self.normalizedbFactors = [float(n) for n in normalize(self.bFactors)] else: self.normalizedbFactors = [sys.float_info.max] * len(self.bFactors) return self.normalizedbFactors
[docs] def get_clamped_normalized_b_factors(self): '''Returns a normalized B-factors that are clamped to the [-1,1] interval using the method of Liu et at. B-factors are normalized and scaled the 90% Confidenceinterval of the B-factors to [-1,1]. Any value outside of the 90% confidence interval is set to either -1 or 1, whichever is closer. References ---------- - Liu et al. BMC Bioinformatics 2014, 15(Suppl 16):S3, Use B-factor related features for accurate classification between protein binding interfaces and crystal packing contacts https://doi.org/10.1186/1471-2105-15-S16-S3 ''' if self.clampedNormalizedbFactor is None: self.get_normalized_b_factors() self.clampedNormalizedbFactor = self.normalizedbFactors.copy() # Normalize and scale the 90% confidence interval of the B factor to [-1,1] self.clampedNormalizedbFactor = self.clampedNormalizedbFactor / 1.645 # Set any value outside the 90% interval to either -1 or 1 self.clampedNormalizedbFactor[self.clampedNormalizedbFactor < -1.0] = -1.0 self.clampedNormalizedbFactor[self.clampedNormalizedbFactor > 1.0] = 1.0 return self.clampedNormalizedbFactor
[docs] def get_calpha_coordinates(self): '''Get the coordinates for Calpha atoms ''' self.get_calpha_atom_indices() x = self.get_x_coords() y = self.get_y_coords() z = self.get_z_coords() # TODO: Point3D extremely slow, only use if nessassary #calpha_coords_list = [Point3D(x[i], y[i], z[i]) for i in self.caIndices] calpha_coords_list = [np.array([x[i], y[i], z[i]]) for i in self.caIndices] self.calpha_coords = np.array(calpha_coords_list) return self.calpha_coords
[docs] def get_calpha_atom_indices(self): '''Get the indices of Calpha atoms ''' self.get_entity_types() self.get_atom_names() caIndices_list = [i for i in range(self.get_num_atoms()) \ if (self.atomNames[i] == "CA" \ and self.entityTypes[i] == "PRO")] self.caIndices = np.array(caIndices_list) return self.caIndices