Source code for mmtfPyspark.utils.columnarStructureX
#!/user/bin/env python
'''columnarStructureX.py
Inheritance class of ColumnarStructure
'''
__author__ = "Mars (Shih-Cheng) Huang"
__maintainer__ = "Mars (Shih-Cheng) Huang"
__email__ = "marshuang80@gmail.com"
__version__ = "0.2.0"
__status__ = "Done"
import numpy as np
import sys
from mmtfPyspark.utils import ColumnarStructure
from sympy import Point3D
[docs]class ColumnarStructureX(ColumnarStructure):
'''Inheritance of class ColumnarStructure with additional functions
Attributes
----------
structure : mmtfStructure)
mmtf structure
firstModelOnly : bool
flag to use only the first model of the structure
'''
def __init__(self, structure, firstModelOnly = True):
ColumnarStructure.__init__(self, structure, firstModelOnly)
self.normalizedbFactors = None
self.clampedNormalizedbFactor = None
[docs] def get_normalized_b_factors(self):
'''Returns z-scores for B-factors (normalized B-factors).
Critical z-score values: Confidence level Tail Area z critical
90% 0.05 +- 1.645
95% 0.025 +- 1.96
99% 0.005 +- 2.576
'''
if self.normalizedbFactors is None:
self.get_entity_types()
self.bFactors = self.get_b_factors()
self.entityTypes = self.get_entity_types()
# Filter out DOD and HOH
stats = np.array([self.bFactors[i] for i in range(self.get_num_atoms())\
if self.entityTypes[i] is not 'WAT'])
# Define normalize function
normalize = lambda x: (x - stats.mean()) / stats.std()
if stats.std() != 0:
self.normalizedbFactors = [float(n) for n in normalize(self.bFactors)]
else:
self.normalizedbFactors = [sys.float_info.max] * len(self.bFactors)
return self.normalizedbFactors
[docs] def get_clamped_normalized_b_factors(self):
'''Returns a normalized B-factors that are clamped to the [-1,1] interval
using the method of Liu et at. B-factors are normalized and scaled the
90% Confidenceinterval of the B-factors to [-1,1]. Any value outside of
the 90% confidence interval is set to either -1 or 1, whichever is closer.
References
----------
- Liu et al. BMC Bioinformatics 2014, 15(Suppl 16):S3,
Use B-factor related features for accurate classification between
protein binding interfaces and crystal packing contacts
https://doi.org/10.1186/1471-2105-15-S16-S3
'''
if self.clampedNormalizedbFactor is None:
self.get_normalized_b_factors()
self.clampedNormalizedbFactor = self.normalizedbFactors.copy()
# Normalize and scale the 90% confidence interval of the B factor to [-1,1]
self.clampedNormalizedbFactor = self.clampedNormalizedbFactor / 1.645
# Set any value outside the 90% interval to either -1 or 1
self.clampedNormalizedbFactor[self.clampedNormalizedbFactor < -1.0] = -1.0
self.clampedNormalizedbFactor[self.clampedNormalizedbFactor > 1.0] = 1.0
return self.clampedNormalizedbFactor
[docs] def get_calpha_coordinates(self):
'''Get the coordinates for Calpha atoms
'''
self.get_calpha_atom_indices()
x = self.get_x_coords()
y = self.get_y_coords()
z = self.get_z_coords()
# TODO: Point3D extremely slow, only use if nessassary
#calpha_coords_list = [Point3D(x[i], y[i], z[i]) for i in self.caIndices]
calpha_coords_list = [np.array([x[i], y[i], z[i]]) for i in self.caIndices]
self.calpha_coords = np.array(calpha_coords_list)
return self.calpha_coords
[docs] def get_calpha_atom_indices(self):
'''Get the indices of Calpha atoms
'''
self.get_entity_types()
self.get_atom_names()
caIndices_list = [i for i in range(self.get_num_atoms()) \
if (self.atomNames[i] == "CA" \
and self.entityTypes[i] == "PRO")]
self.caIndices = np.array(caIndices_list)
return self.caIndices