Source code for mmtfPyspark.filters.containsPolymerChainType

#!/user/bin/env python
'''containsPolymerChainType.py

This filter returns entries that contain chains made of the specified
monomer types. The default constructor returns entries that contain at least
one chain that matches the conditions. If the "exclusive" flag is set to true
in the constructor, all chains must match the conditions. For a multi-model
structure, this filter only checks the first model.

'''
__author__ = "Mars (Shih-Cheng) Huang"
__maintainer__ = "Mars (Shih-Cheng) Huang"
__email__ = "marshuang80@gmail.com"
__version__ = "0.2.0"
__status__ = "Done"


[docs]class ContainsPolymerChainType(object): '''Default constructor matches any entry that contains a chain with only the specified monomer type Attributes ---------- monomer_type : list list of monomer types in a polymer chain ''' D_PEPTIDE_COOH_CARBOXY_TERMINUS = "D-PEPTIDE COOH CARBOXY TERMINUS" D_PEPTIDE_NH3_AMINO_TERMINUS = "D-PEPTIDE NH3 AMINO TERMINUS" D_PEPTIDE_LINKING = "D-PEPTIDE LINKING" D_SACCHARIDE = "D-SACCHARIDE" D_SACCHARIDE_14_and_14_LINKING = "D-SACCHARIDE 1,4 AND 1,4 LINKING" D_SACCHARIDE_14_and_16_LINKING = "D-SACCHARIDE 1,4 AND 1,6 LINKING" DNA_OH_3_PRIME_TERMINUS = "DNA OH 3 PRIME TERMINUS" DNA_OH_5_PRIME_TERMINUS = "DNA OH 5 PRIME TERMINUS" DNA_LINKING = "DNA LINKING" L_PEPTIDE_COOH_CARBOXY_TERMINUS = "L-PEPTIDE COOH CARBOXY TERMINUS" L_PEPTIDE_NH3_AMINO_TERMINUS = "L-PEPTIDE NH3 AMINO TERMINUS" L_PEPTIDE_LINKING = "L-PEPTIDE LINKING" L_SACCHARIDE = "L-SACCHARIDE" L_SACCHARIDE_14_AND_14_LINKING = "L-SACCHARDIE 1,4 AND 1,4 LINKING" L_SACCHARIDE_14_AND_16_LINKING = "L-SACCHARIDE 1,4 AND 1,6 LINKING" PEPTIDE_LINKING = "PEPTIDE LINKING" RNA_OH_3_PRIME_TERMINUS = "RNA OH 3 PRIME TERMINUS" RNA_OH_5_PRIME_TERMINUS = "RNA OH 5 PRIME TERMINUS" RNA_LINKING = "RNA LINKING" NON_POLYMER = "NON-POLYMER" OTHER = "OTHER" SACCHARIDE = "SACCHARIDE" def __init__(self, monomer_type, exclusive=False): if type(monomer_type) == str: monomer_type = monomer_type.split(',') self.exclusive = exclusive self.monomer_type = monomer_type def __call__(self, t): structure = t[1] contrains_polymer = False global_match = False # get number of chains in first model, nessary? num_chains = structure.chains_per_model[0] group_counter = 0 for i in range(num_chains): match = True chain_type = [chain['type'] for chain in structure.entity_list if i in chain['chainIndexList']][0] polymer = chain_type == "polymer" if polymer: contains_polymer = True else: match = False for j in range(structure.groups_per_chain[i]): if match and polymer: group_idx = structure.group_type_list[group_counter] group_type = structure.group_list[group_idx]['chemCompType'] match = (group_type in self.monomer_type) group_counter += 1 if (polymer and match and not self.exclusive): return True if (polymer and not match and self.exclusive): return False if match: global_match = True return global_match and contains_polymer