Source code for mmtfPyspark.filters.secondaryStructure

#!/user/bin/env python
'''secondaryStructure.py

This filter returns entries that contain polymer chain(s) with the specified
fraction of secondary structure assignments, obtained by DSSP. Note, DSSP
secondary structure in MMTF files is assigned by the BioJava implementation of
DSSP. It may differ in some cases from the original DSSP implementation.

'''
__author__ = "Mars (Shih-Cheng) Huang"
__maintainer__ = "Mars (Shih-Cheng) Huang"
__email__ = "marshuang80@gmail.com"
__version__ = "0.2.0"
__status__ = "Done"

from mmtfPyspark.utils import DsspSecondaryStructure


[docs]class SecondaryStructure(object): '''The default constructor returns entries that contain at least one polymer chain that matches the criteria. If the "exclusive" flag is set to true in the constructor, all polymer chains must match the criteria. For a multi-model structure, this filter only checks the first model. Attributes ---------- helixFractionMin : float minimum value for helix fraction [default: 0.0] helixFractionMax : float maximum value for helix fraction [default: 1.0] sheetFractionMin : float minimum value for sheet fraction [default: 0.0] sheetFractionMax : float maximum value for sheet fraction [default: 1.0] coilFractionMin : float minimum value for coil fractions [default: 0.0] coilFractionMax : float maximum value for coil fractions [default: 1.0] exclusive : bool exclusive flag [False] ''' def __init__(self, helixFractionMin=0.0, helixFractionMax=1.0, sheetFractionMin=0.0, sheetFractionMax=1.0, coilFractionMin=0.0, coilFractionMax=1.0, exclusive=False): self.helixFractionMax = helixFractionMax self.helixFractionMin = helixFractionMin self.sheetFractionMax = sheetFractionMax self.sheetFractionMin = sheetFractionMin self.coilFractionMax = coilFractionMax self.coilFractionMin = coilFractionMin self.exclusive = exclusive def __call__(self, t): structure = t[1] contains_polymer = False global_match = False num_chains = structure.chains_per_model[0] sec_struct = structure.sec_struct_list group_counter = 0 for i in range(num_chains): helix = 0.0 sheet = 0.0 coil = 0.0 other = 0.0 match = True chain_type = [chain['type'] for chain in structure.entity_list if i in chain['chainIndexList']][0] polymer = chain_type == 'polymer' if polymer: contains_polymer = True else: match = False for j in range(structure.groups_per_chain[i]): if match and polymer: code = sec_struct[group_counter] secondary_structure = DsspSecondaryStructure.get_q3_code( code) if secondary_structure == DsspSecondaryStructure.ALPHA_HELIX: helix += 1 elif secondary_structure == DsspSecondaryStructure.EXTENDED: sheet += 1 elif secondary_structure == DsspSecondaryStructure.COIL: coil += 1 else: other += 1 group_counter += 1 if match and polymer: n = structure.groups_per_chain[i] - other helix /= n sheet /= n coil /= n match = helix >= self.helixFractionMin and \ helix <= self.helixFractionMax and \ sheet >= self.sheetFractionMin and \ sheet <= self.sheetFractionMax and \ coil >= self.coilFractionMin and \ coil <= self.coilFractionMax if (polymer and match and not self.exclusive): return True if (polymer and not match and self.exclusive): return False if match: global_match = True return global_match and contains_polymer