Package rdkit :: Package VLib :: Package NodeLib :: Module SmartsMolFilter
[hide private]
[frames] | no frames]

Source Code for Module rdkit.VLib.NodeLib.SmartsMolFilter

  1  #  $Id$ 
  2  # 
  3  #  Copyright (C) 2003 Rational Discovery LLC 
  4  #     All Rights Reserved 
  5  # 
  6  from rdkit import Chem 
  7  from rdkit.VLib.Filter import FilterNode 
  8   
  9   
10 -class SmartsFilter(FilterNode):
11 """ filter out molecules matching one or more SMARTS patterns 12 13 There is a count associated with each pattern. Molecules are 14 allowed to match the pattern up to this number of times. 15 16 Assumptions: 17 18 - inputs are molecules 19 20 21 Sample Usage: 22 >>> smis = ['C1CCC1','C1CCC1C=O','CCCC','CCC=O','CC(=O)C','CCN','NCCN','NCC=O'] 23 >>> mols = [Chem.MolFromSmiles(x) for x in smis] 24 >>> from rdkit.VLib.Supply import SupplyNode 25 >>> suppl = SupplyNode(contents=mols) 26 >>> ms = [x for x in suppl] 27 >>> len(ms) 28 8 29 30 We can pass in SMARTS strings: 31 >>> smas = ['C=O','CN'] 32 >>> counts = [1,2] 33 >>> filt = SmartsFilter(patterns=smas,counts=counts) 34 >>> filt.AddParent(suppl) 35 >>> ms = [x for x in filt] 36 >>> len(ms) 37 5 38 39 Alternatively, we can pass in molecule objects: 40 >>> mols =[Chem.MolFromSmarts(x) for x in smas] 41 >>> counts = [1,2] 42 >>> filt.Destroy() 43 >>> filt = SmartsFilter(patterns=mols,counts=counts) 44 >>> filt.AddParent(suppl) 45 >>> ms = [x for x in filt] 46 >>> len(ms) 47 5 48 49 Negation does what you'd expect: 50 >>> filt.SetNegate(1) 51 >>> ms = [x for x in filt] 52 >>> len(ms) 53 3 54 55 56 """ 57
58 - def __init__(self, patterns=[], counts=[], **kwargs):
59 FilterNode.__init__(self, func=self.filter, **kwargs) 60 self._initPatterns(patterns, counts)
61
62 - def _initPatterns(self, patterns, counts):
63 nPatts = len(patterns) 64 if len(counts) and len(counts) != nPatts: 65 raise ValueError('if counts is specified, it must match patterns in length') 66 if not len(counts): 67 counts = [1] * nPatts 68 targets = [None] * nPatts 69 for i in range(nPatts): 70 p = patterns[i] 71 c = counts[i] 72 if type(p) in (str, bytes): 73 m = Chem.MolFromSmarts(p) 74 if not m: 75 raise ValueError('bad smarts: %s' % (p)) 76 p = m 77 targets[i] = p, c 78 self._patterns = tuple(targets)
79
80 - def filter(self, cmpd):
81 res = False 82 for patt, count in self._patterns: 83 ms = cmpd.GetSubstructMatches(patt) 84 nMatches = len(ms) 85 if nMatches >= count: 86 # this query is an or, so we short circuit true: 87 res = True 88 break 89 return res
90 91 92 # ------------------------------------ 93 # 94 # doctest boilerplate 95 #
96 -def _runDoctests(verbose=None): # pragma: nocover
97 import sys 98 import doctest 99 failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) 100 sys.exit(failed) 101 102 103 if __name__ == '__main__': # pragma: nocover 104 _runDoctests() 105