1
2
3
4
5
6 from rdkit import Chem
7 from rdkit.VLib.Filter import FilterNode
8
9
11 """ filter out molecules matching one or more SMARTS patterns
12
13 There is a count associated with each pattern. Molecules are
14 allowed to match the pattern up to this number of times.
15
16 Assumptions:
17
18 - inputs are molecules
19
20
21 Sample Usage:
22 >>> smis = ['C1CCC1','C1CCC1C=O','CCCC','CCC=O','CC(=O)C','CCN','NCCN','NCC=O']
23 >>> mols = [Chem.MolFromSmiles(x) for x in smis]
24 >>> from rdkit.VLib.Supply import SupplyNode
25 >>> suppl = SupplyNode(contents=mols)
26 >>> ms = [x for x in suppl]
27 >>> len(ms)
28 8
29
30 We can pass in SMARTS strings:
31 >>> smas = ['C=O','CN']
32 >>> counts = [1,2]
33 >>> filt = SmartsFilter(patterns=smas,counts=counts)
34 >>> filt.AddParent(suppl)
35 >>> ms = [x for x in filt]
36 >>> len(ms)
37 5
38
39 Alternatively, we can pass in molecule objects:
40 >>> mols =[Chem.MolFromSmarts(x) for x in smas]
41 >>> counts = [1,2]
42 >>> filt.Destroy()
43 >>> filt = SmartsFilter(patterns=mols,counts=counts)
44 >>> filt.AddParent(suppl)
45 >>> ms = [x for x in filt]
46 >>> len(ms)
47 5
48
49 Negation does what you'd expect:
50 >>> filt.SetNegate(1)
51 >>> ms = [x for x in filt]
52 >>> len(ms)
53 3
54
55
56 """
57
58 - def __init__(self, patterns=[], counts=[], **kwargs):
61
63 nPatts = len(patterns)
64 if len(counts) and len(counts) != nPatts:
65 raise ValueError('if counts is specified, it must match patterns in length')
66 if not len(counts):
67 counts = [1] * nPatts
68 targets = [None] * nPatts
69 for i in range(nPatts):
70 p = patterns[i]
71 c = counts[i]
72 if type(p) in (str, bytes):
73 m = Chem.MolFromSmarts(p)
74 if not m:
75 raise ValueError('bad smarts: %s' % (p))
76 p = m
77 targets[i] = p, c
78 self._patterns = tuple(targets)
79
81 res = False
82 for patt, count in self._patterns:
83 ms = cmpd.GetSubstructMatches(patt)
84 nMatches = len(ms)
85 if nMatches >= count:
86
87 res = True
88 break
89 return res
90
91
92
93
94
95
97 import sys
98 import doctest
99 failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose)
100 sys.exit(failed)
101
102
103 if __name__ == '__main__':
104 _runDoctests()
105