Source code for mmtfPyspark.webfilters.advancedQuery

#!/user/bin/env python
'''advancedQuery.py

This filter runs an RCSB PDB Advanced Search web service using an XML query
description.

References
----------
- `Advanced Search Query <https://www.rcsb.org/pdb/staticHelp.do?p=help/advancedSearch.html>`_

Examples
--------
Find PDB entries that contain the word "mutant" in the structure title:

>>> query = "<orgPdbQuery>" + \
...         "<queryType>org.pdb.query.simple.StructTitleQuery</queryType>" + \
...         "<struct.title.comparator>contains</struct.title.comparator>" + \
...         "<struct.title.value>mutant</struct.title.value" + \
... "</orgPdbQuery>"
>>> pdb = pdb.filter(AdvancedSearch(query));

'''
__author__ = "Mars (Shih-Cheng) Huang"
__maintainer__ = "Mars (Shih-Cheng) Huang"
__email__ = "marshuang80@gmail.com"
__version__ = "0.2.0"
__status__ = "Done"

from mmtfPyspark.webservices.advancedQueryService import post_query


[docs]class AdvancedQuery(object): '''Filters using the RCSB PDB Advanced Search web service Attributes ---------- xmlQuery : str query in RCSB PDB XML format ''' def __init__(self, xmlQuery): results = post_query(xmlQuery) self.entityLevel = (len(results) > 0) and (":" in results[0]) self.structureIds = list(set(results)) self.exclusive = False def __call__(self, t): structure = t[1] globalMatch = False numChains = structure.chains_per_model[0] entityChainIndex = self._get_chain_to_entity_index(structure) for i in range(numChains): ID = t[0] if self.entityLevel: ID = self._get_structure_entity_id( structure, ID, entityChainIndex[i]) match = ID in self.structureIds if match and not self.exclusive: return True if not match and self.exclusive: return False if match: globalMatch = True return globalMatch def _get_structure_entity_id(self, structure, origStructureId, origEntityId): keyStructureId = origStructureId try: index = keyStructureId.index(".") keyStructureId = keyStructureId[:index] except: pass try: pos = structure.structure_id.rindex(".") valueStructureId = structure.structure_id[:structure.structure_id.index( ".")] if keyStructureId != valueStructureId: raise Exception("Structure mismatch: key vs value: %s vs. %s" % (keyStructureId, valueStructureId)) entityId = structure.structure_id[pos + 1:] ID = valueStructureId + ":" + entityId except: ID = keyStructureId + ":" + str(origEntityId + 1) return ID def _get_chain_to_entity_index(self, structure): entityChainIndex = [0] * structure.num_chains for i in range(len(structure.entity_list)): for j in structure.entity_list[i]['chainIndexList']: entityChainIndex[j] = i return entityChainIndex