1
2
3
4
5
6
7
8
9
10 from __future__ import print_function
11
12 import math
13 import sys
14 import time
15
16 import numpy
17
18 from rdkit import Chem
19 from rdkit import RDLogger as logging
20 from rdkit.Chem import ChemicalFeatures
21 from rdkit.Chem import ChemicalForceFields
22 from rdkit.Chem import rdDistGeom as MolDG
23 from rdkit.Chem.Pharm3D import ExcludedVolume
24 import rdkit.DistanceGeometry as DG
25 from rdkit.ML.Data import Stats
26
27 _times = {}
28
29 logger = logging.logger()
30 defaultFeatLength = 2.0
31
32
34 """ returns a list of the heavy-atom neighbors of the
35 atom passed in:
36
37 >>> m = Chem.MolFromSmiles('CCO')
38 >>> l = GetAtomHeavyNeighbors(m.GetAtomWithIdx(0))
39 >>> len(l)
40 1
41 >>> isinstance(l[0],Chem.Atom)
42 True
43 >>> l[0].GetIdx()
44 1
45
46 >>> l = GetAtomHeavyNeighbors(m.GetAtomWithIdx(1))
47 >>> len(l)
48 2
49 >>> l[0].GetIdx()
50 0
51 >>> l[1].GetIdx()
52 2
53
54 """
55 res = []
56 for nbr in atom.GetNeighbors():
57 if nbr.GetAtomicNum() != 1:
58 res.append(nbr)
59 return res
60
61
63 """ Adds an entry at the end of the bounds matrix for a point at
64 the center of a multi-point feature
65
66 returns a 2-tuple:
67 new bounds mat
68 index of point added
69
70 >>> boundsMat = numpy.array([[0.0,2.0,2.0],[1.0,0.0,2.0],[1.0,1.0,0.0]])
71 >>> match = [0,1,2]
72 >>> bm,idx = ReplaceGroup(match,boundsMat,slop=0.0)
73
74 the index is at the end:
75 >>> idx == 3
76 True
77
78 and the matrix is one bigger:
79 >>> bm.shape == (4, 4)
80 True
81
82 but the original bounds mat is not altered:
83 >>> boundsMat.shape == (3, 3)
84 True
85
86
87 We make the assumption that the points of the
88 feature form a regular polygon, are listed in order
89 (i.e. pt 0 is a neighbor to pt 1 and pt N-1)
90 and that the replacement point goes at the center:
91 >>> print(', '.join(['%.3f'%x for x in bm[-1]]))
92 0.577, 0.577, 0.577, 0.000
93 >>> print(', '.join(['%.3f'%x for x in bm[:,-1]]))
94 1.155, 1.155, 1.155, 0.000
95
96 The slop argument (default = 0.01) is fractional:
97 >>> bm,idx = ReplaceGroup(match,boundsMat)
98 >>> print(', '.join(['%.3f'%x for x in bm[-1]]))
99 0.572, 0.572, 0.572, 0.000
100 >>> print(', '.join(['%.3f'%x for x in bm[:,-1]]))
101 1.166, 1.166, 1.166, 0.000
102
103
104 """
105 maxVal = -1000.0
106 minVal = 1e8
107 nPts = len(match)
108 for i in range(nPts):
109 idx0 = match[i]
110 if i < nPts - 1:
111 idx1 = match[i + 1]
112 else:
113 idx1 = match[0]
114 if idx1 < idx0:
115 idx0, idx1 = idx1, idx0
116 minVal = min(minVal, bounds[idx1, idx0])
117 maxVal = max(maxVal, bounds[idx0, idx1])
118 maxVal *= (1 + slop)
119 minVal *= (1 - slop)
120
121 scaleFact = 1.0 / (2.0 * math.sin(math.pi / nPts))
122 minVal *= scaleFact
123 maxVal *= scaleFact
124
125 replaceIdx = bounds.shape[0]
126 if not useDirs:
127 bm = numpy.zeros((bounds.shape[0] + 1, bounds.shape[1] + 1), numpy.float)
128 else:
129 bm = numpy.zeros((bounds.shape[0] + 2, bounds.shape[1] + 2), numpy.float)
130 bm[0:bounds.shape[0], 0:bounds.shape[1]] = bounds
131 bm[:replaceIdx, replaceIdx] = 1000.
132
133 if useDirs:
134 bm[:replaceIdx + 1, replaceIdx + 1] = 1000.
135
136 bm[replaceIdx, replaceIdx + 1] = dirLength + slop
137 bm[replaceIdx + 1, replaceIdx] = dirLength - slop
138
139 for idx1 in match:
140 bm[idx1, replaceIdx] = maxVal
141 bm[replaceIdx, idx1] = minVal
142 if useDirs:
143
144 bm[idx1, replaceIdx + 1] = numpy.sqrt(bm[replaceIdx, replaceIdx + 1] ** 2 + maxVal ** 2)
145 bm[replaceIdx + 1, idx1] = numpy.sqrt(bm[replaceIdx + 1, replaceIdx] ** 2 + minVal ** 2)
146 return bm, replaceIdx
147
148
149 -def EmbedMol(mol, bm, atomMatch=None, weight=2.0, randomSeed=-1, excludedVolumes=None):
150 """ Generates an embedding for a molecule based on a bounds matrix and adds
151 a conformer (id 0) to the molecule
152
153 if the optional argument atomMatch is provided, it will be used to provide
154 supplemental weights for the embedding routine (used in the optimization
155 phase to ensure that the resulting geometry really does satisfy the
156 pharmacophore).
157
158 if the excludedVolumes is provided, it should be a sequence of
159 ExcludedVolume objects
160
161 >>> m = Chem.MolFromSmiles('c1ccccc1C')
162 >>> bounds = MolDG.GetMoleculeBoundsMatrix(m)
163 >>> bounds.shape == (7, 7)
164 True
165 >>> m.GetNumConformers()
166 0
167 >>> EmbedMol(m,bounds,randomSeed=23)
168 >>> m.GetNumConformers()
169 1
170
171
172 """
173 nAts = mol.GetNumAtoms()
174 weights = []
175 if atomMatch:
176 for i in range(len(atomMatch)):
177 for j in range(i + 1, len(atomMatch)):
178 weights.append((i, j, weight))
179 if excludedVolumes:
180 for vol in excludedVolumes:
181 idx = vol.index
182
183 for i in range(nAts):
184 weights.append((i, idx, weight))
185 coords = DG.EmbedBoundsMatrix(bm, weights=weights, numZeroFail=1, randomSeed=randomSeed)
186
187
188
189 conf = Chem.Conformer(nAts)
190 conf.SetId(0)
191 for i in range(nAts):
192 conf.SetAtomPosition(i, list(coords[i]))
193 if excludedVolumes:
194 for vol in excludedVolumes:
195 vol.pos = numpy.array(coords[vol.index])
196
197 print(' % 7.4f % 7.4f % 7.4f Ar 0 0 0 0 0 0 0 0 0 0 0 0' % tuple(coords[-1]),
198 file=sys.stderr)
199 mol.AddConformer(conf)
200
201
203 """ Adds a set of excluded volumes to the bounds matrix
204 and returns the new matrix
205
206 excludedVolumes is a list of ExcludedVolume objects
207
208
209 >>> boundsMat = numpy.array([[0.0,2.0,2.0],[1.0,0.0,2.0],[1.0,1.0,0.0]])
210 >>> ev1 = ExcludedVolume.ExcludedVolume(([(0,),0.5,1.0],),exclusionDist=1.5)
211 >>> bm = AddExcludedVolumes(boundsMat,(ev1,))
212
213 the results matrix is one bigger:
214 >>> bm.shape == (4, 4)
215 True
216
217 and the original bounds mat is not altered:
218 >>> boundsMat.shape == (3, 3)
219 True
220
221 >>> print(', '.join(['%.3f'%x for x in bm[-1]]))
222 0.500, 1.500, 1.500, 0.000
223 >>> print(', '.join(['%.3f'%x for x in bm[:,-1]]))
224 1.000, 3.000, 3.000, 0.000
225
226 """
227 oDim = bm.shape[0]
228 dim = oDim + len(excludedVolumes)
229 res = numpy.zeros((dim, dim), numpy.float)
230 res[:oDim, :oDim] = bm
231 for i, vol in enumerate(excludedVolumes):
232 bmIdx = oDim + i
233 vol.index = bmIdx
234
235
236 res[bmIdx, :bmIdx] = vol.exclusionDist
237 res[:bmIdx, bmIdx] = 1000.0
238
239
240 for indices, minV, maxV in vol.featInfo:
241 for index in indices:
242 try:
243 res[bmIdx, index] = minV
244 res[index, bmIdx] = maxV
245 except IndexError:
246 logger.error('BAD INDEX: res[%d,%d], shape is %s' % (bmIdx, index, str(res.shape)))
247 raise IndexError
248
249
250 for j in range(bmIdx + 1, dim):
251 res[bmIdx, j:dim] = 0
252 res[j:dim, bmIdx] = 1000
253
254 if smoothIt:
255 DG.DoTriangleSmoothing(res)
256 return res
257
258
261 """ loops over a distance bounds matrix and replaces the elements
262 that are altered by a pharmacophore
263
264 **NOTE** this returns the resulting bounds matrix, but it may also
265 alter the input matrix
266
267 atomMatch is a sequence of sequences containing atom indices
268 for each of the pharmacophore's features.
269
270 >>> from rdkit import Geometry
271 >>> from rdkit.Chem.Pharm3D import Pharmacophore
272 >>> feats = [
273 ... ChemicalFeatures.FreeChemicalFeature('HBondAcceptor', 'HAcceptor1',
274 ... Geometry.Point3D(0.0, 0.0, 0.0)),
275 ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1',
276 ... Geometry.Point3D(2.65, 0.0, 0.0)),
277 ... ]
278 >>> pcophore=Pharmacophore.Pharmacophore(feats)
279 >>> pcophore.setLowerBound(0,1, 1.0)
280 >>> pcophore.setUpperBound(0,1, 2.0)
281
282 >>> boundsMat = numpy.array([[0.0,3.0,3.0],[2.0,0.0,3.0],[2.0,2.0,0.0]])
283 >>> atomMatch = ((0,),(1,))
284 >>> bm = UpdatePharmacophoreBounds(boundsMat,atomMatch,pcophore)
285
286
287 In this case, there are no multi-atom features, so the result matrix
288 is the same as the input:
289 >>> bm is boundsMat
290 True
291
292 this means, of course, that the input boundsMat is altered:
293 >>> print(', '.join(['%.3f'%x for x in boundsMat[0]]))
294 0.000, 2.000, 3.000
295 >>> print(', '.join(['%.3f'%x for x in boundsMat[1]]))
296 1.000, 0.000, 3.000
297 >>> print(', '.join(['%.3f'%x for x in boundsMat[2]]))
298 2.000, 2.000, 0.000
299
300 """
301 replaceMap = {}
302 for i, matchI in enumerate(atomMatch):
303 if len(matchI) > 1:
304 bm, replaceIdx = ReplaceGroup(matchI, bm, useDirs=useDirs)
305 replaceMap[i] = replaceIdx
306
307 for i, matchI in enumerate(atomMatch):
308 mi = replaceMap.get(i, matchI[0])
309 for j in range(i + 1, len(atomMatch)):
310 mj = replaceMap.get(j, atomMatch[j][0])
311 if mi < mj:
312 idx0, idx1 = mi, mj
313 else:
314 idx0, idx1 = mj, mi
315 bm[idx0, idx1] = pcophore.getUpperBound(i, j)
316 bm[idx1, idx0] = pcophore.getLowerBound(i, j)
317
318 return bm
319
320
321 -def EmbedPharmacophore(mol, atomMatch, pcophore, randomSeed=-1, count=10, smoothFirst=True,
322 silent=False, bounds=None, excludedVolumes=None, targetNumber=-1,
323 useDirs=False):
324 """ Generates one or more embeddings for a molecule that satisfy a pharmacophore
325
326 atomMatch is a sequence of sequences containing atom indices
327 for each of the pharmacophore's features.
328
329 - count: is the maximum number of attempts to make a generating an embedding
330 - smoothFirst: toggles triangle smoothing of the molecular bounds matix
331 - bounds: if provided, should be the molecular bounds matrix. If this isn't
332 provided, the matrix will be generated.
333 - targetNumber: if this number is positive, it provides a maximum number
334 of embeddings to generate (i.e. we'll have count attempts to generate
335 targetNumber embeddings).
336
337 returns: a 3 tuple:
338 1) the molecular bounds matrix adjusted for the pharmacophore
339 2) a list of embeddings (molecules with a single conformer)
340 3) the number of failed attempts at embedding
341
342 >>> from rdkit import Geometry
343 >>> from rdkit.Chem.Pharm3D import Pharmacophore
344 >>> m = Chem.MolFromSmiles('OCCN')
345 >>> feats = [
346 ... ChemicalFeatures.FreeChemicalFeature('HBondAcceptor', 'HAcceptor1',
347 ... Geometry.Point3D(0.0, 0.0, 0.0)),
348 ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1',
349 ... Geometry.Point3D(2.65, 0.0, 0.0)),
350 ... ]
351 >>> pcophore=Pharmacophore.Pharmacophore(feats)
352 >>> pcophore.setLowerBound(0,1, 2.5)
353 >>> pcophore.setUpperBound(0,1, 3.5)
354 >>> atomMatch = ((0,),(3,))
355
356 >>> bm,embeds,nFail = EmbedPharmacophore(m,atomMatch,pcophore,randomSeed=23,silent=1)
357 >>> len(embeds)
358 10
359 >>> nFail
360 0
361
362 Set up a case that can't succeed:
363 >>> pcophore=Pharmacophore.Pharmacophore(feats)
364 >>> pcophore.setLowerBound(0,1, 2.0)
365 >>> pcophore.setUpperBound(0,1, 2.1)
366 >>> atomMatch = ((0,),(3,))
367
368 >>> bm,embeds,nFail = EmbedPharmacophore(m,atomMatch,pcophore,randomSeed=23,silent=1)
369 >>> len(embeds)
370 0
371 >>> nFail
372 10
373
374 """
375 global _times
376 if not hasattr(mol, '_chiralCenters'):
377 mol._chiralCenters = Chem.FindMolChiralCenters(mol)
378
379 if bounds is None:
380 bounds = MolDG.GetMoleculeBoundsMatrix(mol)
381 if smoothFirst:
382 DG.DoTriangleSmoothing(bounds)
383
384 bm = bounds.copy()
385
386
387
388
389
390
391 bm = UpdatePharmacophoreBounds(bm, atomMatch, pcophore, useDirs=useDirs, mol=mol)
392
393 if excludedVolumes:
394 bm = AddExcludedVolumes(bm, excludedVolumes, smoothIt=False)
395
396 if not DG.DoTriangleSmoothing(bm):
397 raise ValueError("could not smooth bounds matrix")
398
399
400
401
402
403
404
405 if targetNumber <= 0:
406 targetNumber = count
407 nFailed = 0
408 res = []
409 for i in range(count):
410 tmpM = bm[:, :]
411 m2 = Chem.Mol(mol)
412 t1 = time.time()
413 try:
414 if randomSeed <= 0:
415 seed = i * 10 + 1
416 else:
417 seed = i * 10 + randomSeed
418 EmbedMol(m2, tmpM, atomMatch, randomSeed=seed, excludedVolumes=excludedVolumes)
419 except ValueError:
420 if not silent:
421 logger.info('Embed failed')
422 nFailed += 1
423 else:
424 t2 = time.time()
425 _times['embed'] = _times.get('embed', 0) + t2 - t1
426 keepIt = True
427 for idx, stereo in mol._chiralCenters:
428 if stereo in ('R', 'S'):
429 vol = ComputeChiralVolume(m2, idx)
430 if (stereo == 'R' and vol >= 0) or (stereo == 'S' and vol <= 0):
431 keepIt = False
432 break
433 if keepIt:
434 res.append(m2)
435 else:
436 logger.debug('Removed embedding due to chiral constraints.')
437 if len(res) == targetNumber:
438 break
439 return bm, res, nFailed
440
441
443 """ provides an OS independent way of detecting NaNs
444 This is intended to be used with values returned from the C++
445 side of things.
446
447 We can't actually test this from Python (which traps
448 zero division errors), but it would work something like
449 this if we could:
450 >>> isNaN(0)
451 False
452
453 #>>> isNan(1/0)
454 #True
455
456 """
457 if v != v and sys.platform == 'win32':
458 return True
459 elif v == 0 and v == 1 and sys.platform != 'win32':
460 return True
461 return False
462
463
464 -def OptimizeMol(mol, bm, atomMatches=None, excludedVolumes=None, forceConstant=1200.0, maxPasses=5,
465 verbose=False):
466 """ carries out a UFF optimization for a molecule optionally subject
467 to the constraints in a bounds matrix
468
469 - atomMatches, if provided, is a sequence of sequences
470 - forceConstant is the force constant of the spring used to enforce
471 the constraints
472
473 returns a 2-tuple:
474 1) the energy of the initial conformation
475 2) the energy post-embedding
476 NOTE that these energies include the energies of the constraints
477
478
479
480 >>> from rdkit import Geometry
481 >>> from rdkit.Chem.Pharm3D import Pharmacophore
482 >>> m = Chem.MolFromSmiles('OCCN')
483 >>> feats = [
484 ... ChemicalFeatures.FreeChemicalFeature('HBondAcceptor', 'HAcceptor1',
485 ... Geometry.Point3D(0.0, 0.0, 0.0)),
486 ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1',
487 ... Geometry.Point3D(2.65, 0.0, 0.0)),
488 ... ]
489 >>> pcophore=Pharmacophore.Pharmacophore(feats)
490 >>> pcophore.setLowerBound(0,1, 2.5)
491 >>> pcophore.setUpperBound(0,1, 2.8)
492 >>> atomMatch = ((0,),(3,))
493 >>> bm,embeds,nFail = EmbedPharmacophore(m,atomMatch,pcophore,randomSeed=23,silent=1)
494 >>> len(embeds)
495 10
496 >>> testM = embeds[0]
497
498 Do the optimization:
499 >>> e1,e2 = OptimizeMol(testM,bm,atomMatches=atomMatch)
500
501 Optimizing should have lowered the energy:
502 >>> e2 < e1
503 True
504
505 Check the constrained distance:
506 >>> conf = testM.GetConformer(0)
507 >>> p0 = conf.GetAtomPosition(0)
508 >>> p3 = conf.GetAtomPosition(3)
509 >>> d03 = p0.Distance(p3)
510 >>> d03 >= pcophore.getLowerBound(0,1)-.01
511 True
512 >>> d03 <= pcophore.getUpperBound(0,1)+.01
513 True
514
515 If we optimize without the distance constraints (provided via the atomMatches
516 argument) we're not guaranteed to get the same results, particularly in a case
517 like the current one where the pharmcophore brings the atoms uncomfortably
518 close together:
519 >>> testM = embeds[1]
520 >>> e1,e2 = OptimizeMol(testM,bm)
521 >>> e2 < e1
522 True
523 >>> conf = testM.GetConformer(0)
524 >>> p0 = conf.GetAtomPosition(0)
525 >>> p3 = conf.GetAtomPosition(3)
526 >>> d03 = p0.Distance(p3)
527 >>> d03 >= pcophore.getLowerBound(0,1)-.01
528 True
529 >>> d03 <= pcophore.getUpperBound(0,1)+.01
530 False
531
532 """
533 try:
534 ff = ChemicalForceFields.UFFGetMoleculeForceField(mol)
535 except Exception:
536 logger.info('Problems building molecular forcefield', exc_info=True)
537 return -1.0, -1.0
538
539 weights = []
540 if (atomMatches):
541 for k in range(len(atomMatches)):
542 for i in atomMatches[k]:
543 for l in range(k + 1, len(atomMatches)):
544 for j in atomMatches[l]:
545 weights.append((i, j))
546 for i, j in weights:
547 if j < i:
548 i, j = j, i
549 minV = bm[j, i]
550 maxV = bm[i, j]
551 ff.AddDistanceConstraint(i, j, minV, maxV, forceConstant)
552 if excludedVolumes:
553 nAts = mol.GetNumAtoms()
554 conf = mol.GetConformer()
555 idx = nAts
556 for exVol in excludedVolumes:
557 assert exVol.pos is not None
558 logger.debug('ff.AddExtraPoint(%.4f,%.4f,%.4f)' % (exVol.pos[0], exVol.pos[1], exVol.pos[2]))
559 ff.AddExtraPoint(exVol.pos[0], exVol.pos[1], exVol.pos[2], True)
560 indices = []
561 for localIndices, _, _ in exVol.featInfo:
562 indices += list(localIndices)
563 for i in range(nAts):
564 v = numpy.array(conf.GetAtomPosition(i)) - numpy.array(exVol.pos)
565 d = numpy.sqrt(numpy.dot(v, v))
566 if i not in indices:
567 if d < 5.0:
568 logger.debug('ff.AddDistanceConstraint(%d,%d,%.3f,%d,%.0f)' %
569 (i, idx, exVol.exclusionDist, 1000, forceConstant))
570 ff.AddDistanceConstraint(i, idx, exVol.exclusionDist, 1000, forceConstant)
571
572 else:
573 logger.debug('ff.AddDistanceConstraint(%d,%d,%.3f,%.3f,%.0f)' %
574 (i, idx, bm[exVol.index, i], bm[i, exVol.index], forceConstant))
575 ff.AddDistanceConstraint(i, idx, bm[exVol.index, i], bm[i, exVol.index], forceConstant)
576 idx += 1
577 ff.Initialize()
578 e1 = ff.CalcEnergy()
579 if isNaN(e1):
580 raise ValueError('bogus energy')
581
582 if verbose:
583 print(Chem.MolToMolBlock(mol))
584 for i, _ in enumerate(excludedVolumes):
585 pos = ff.GetExtraPointPos(i)
586 print(' % 7.4f % 7.4f % 7.4f As 0 0 0 0 0 0 0 0 0 0 0 0' % tuple(pos),
587 file=sys.stderr)
588 needsMore = ff.Minimize()
589 nPasses = 0
590 while needsMore and nPasses < maxPasses:
591 needsMore = ff.Minimize()
592 nPasses += 1
593 e2 = ff.CalcEnergy()
594 if isNaN(e2):
595 raise ValueError('bogus energy')
596
597 if verbose:
598 print('--------')
599 print(Chem.MolToMolBlock(mol))
600 for i, _ in enumerate(excludedVolumes):
601 pos = ff.GetExtraPointPos(i)
602 print(' % 7.4f % 7.4f % 7.4f Sb 0 0 0 0 0 0 0 0 0 0 0 0' % tuple(pos),
603 file=sys.stderr)
604 ff = None
605 return e1, e2
606
607
608 -def EmbedOne(mol, name, match, pcophore, count=1, silent=0, **kwargs):
609 """ generates statistics for a molecule's embeddings
610
611 Four energies are computed for each embedding:
612 1) E1: the energy (with constraints) of the initial embedding
613 2) E2: the energy (with constraints) of the optimized embedding
614 3) E3: the energy (no constraints) the geometry for E2
615 4) E4: the energy (no constraints) of the optimized free-molecule
616 (starting from the E3 geometry)
617
618 Returns a 9-tuple:
619 1) the mean value of E1
620 2) the sample standard deviation of E1
621 3) the mean value of E2
622 4) the sample standard deviation of E2
623 5) the mean value of E3
624 6) the sample standard deviation of E3
625 7) the mean value of E4
626 8) the sample standard deviation of E4
627 9) The number of embeddings that failed
628
629 """
630 global _times
631 atomMatch = [list(x.GetAtomIds()) for x in match]
632 bm, ms, nFailed = EmbedPharmacophore(mol, atomMatch, pcophore, count=count, silent=silent,
633 **kwargs)
634 e1s = []
635 e2s = []
636 e3s = []
637 e4s = []
638 d12s = []
639 d23s = []
640 d34s = []
641 for m in ms:
642 t1 = time.time()
643 try:
644 e1, e2 = OptimizeMol(m, bm, atomMatch)
645 except ValueError:
646 pass
647 else:
648 t2 = time.time()
649 _times['opt1'] = _times.get('opt1', 0) + t2 - t1
650
651 e1s.append(e1)
652 e2s.append(e2)
653
654 d12s.append(e1 - e2)
655 t1 = time.time()
656 try:
657 e3, e4 = OptimizeMol(m, bm)
658 except ValueError:
659 pass
660 else:
661 t2 = time.time()
662 _times['opt2'] = _times.get('opt2', 0) + t2 - t1
663 e3s.append(e3)
664 e4s.append(e4)
665 d23s.append(e2 - e3)
666 d34s.append(e3 - e4)
667 count += 1
668 try:
669 e1, e1d = Stats.MeanAndDev(e1s)
670 except Exception:
671 e1 = -1.0
672 e1d = -1.0
673 try:
674 e2, e2d = Stats.MeanAndDev(e2s)
675 except Exception:
676 e2 = -1.0
677 e2d = -1.0
678 try:
679 e3, e3d = Stats.MeanAndDev(e3s)
680 except Exception:
681 e3 = -1.0
682 e3d = -1.0
683
684 try:
685 e4, e4d = Stats.MeanAndDev(e4s)
686 except Exception:
687 e4 = -1.0
688 e4d = -1.0
689 if not silent:
690 print('%s(%d): %.2f(%.2f) -> %.2f(%.2f) : %.2f(%.2f) -> %.2f(%.2f)' %
691 (name, nFailed, e1, e1d, e2, e2d, e3, e3d, e4, e4d))
692 return e1, e1d, e2, e2d, e3, e3d, e4, e4d, nFailed
693
694
696 """ generates a list of all possible mappings of a pharmacophore to a molecule
697
698 Returns a 2-tuple:
699 1) a boolean indicating whether or not all features were found
700 2) a list, numFeatures long, of sequences of features
701
702
703 >>> import os.path
704 >>> from rdkit import Geometry, RDConfig
705 >>> from rdkit.Chem.Pharm3D import Pharmacophore
706 >>> fdefFile = os.path.join(RDConfig.RDCodeDir,'Chem/Pharm3D/test_data/BaseFeatures.fdef')
707 >>> featFactory = ChemicalFeatures.BuildFeatureFactory(fdefFile)
708 >>> activeFeats = [
709 ... ChemicalFeatures.FreeChemicalFeature('Acceptor', Geometry.Point3D(0.0, 0.0, 0.0)),
710 ... ChemicalFeatures.FreeChemicalFeature('Donor',Geometry.Point3D(0.0, 0.0, 0.0))]
711 >>> pcophore= Pharmacophore.Pharmacophore(activeFeats)
712 >>> m = Chem.MolFromSmiles('FCCN')
713 >>> match,mList = MatchPharmacophoreToMol(m,featFactory,pcophore)
714 >>> match
715 True
716
717 Two feature types:
718 >>> len(mList)
719 2
720
721 The first feature type, Acceptor, has two matches:
722 >>> len(mList[0])
723 2
724 >>> mList[0][0].GetAtomIds()
725 (0,)
726 >>> mList[0][1].GetAtomIds()
727 (3,)
728
729 The first feature type, Donor, has a single match:
730 >>> len(mList[1])
731 1
732 >>> mList[1][0].GetAtomIds()
733 (3,)
734
735 """
736 return MatchFeatsToMol(mol, featFactory, pcophore.getFeatures())
737
738
740 """ **INTERNAL USE ONLY**
741
742 >>> import os.path
743 >>> from rdkit import Geometry, RDConfig, Chem
744 >>> fdefFile = os.path.join(RDConfig.RDCodeDir,'Chem/Pharm3D/test_data/BaseFeatures.fdef')
745 >>> featFactory = ChemicalFeatures.BuildFeatureFactory(fdefFile)
746 >>> activeFeats = [
747 ... ChemicalFeatures.FreeChemicalFeature('Acceptor', Geometry.Point3D(0.0, 0.0, 0.0)),
748 ... ChemicalFeatures.FreeChemicalFeature('Donor',Geometry.Point3D(0.0, 0.0, 0.0))]
749 >>> m = Chem.MolFromSmiles('FCCN')
750 >>> d =_getFeatDict(m,featFactory,activeFeats)
751 >>> sorted(list(d.keys()))
752 ['Acceptor', 'Donor']
753 >>> donors = d['Donor']
754 >>> len(donors)
755 1
756 >>> donors[0].GetAtomIds()
757 (3,)
758 >>> acceptors = d['Acceptor']
759 >>> len(acceptors)
760 2
761 >>> acceptors[0].GetAtomIds()
762 (0,)
763 >>> acceptors[1].GetAtomIds()
764 (3,)
765
766 """
767 molFeats = {}
768 for feat in features:
769 family = feat.GetFamily()
770 if family not in molFeats:
771 matches = featFactory.GetFeaturesForMol(mol, includeOnly=family)
772 molFeats[family] = matches
773 return molFeats
774
775
777 """ generates a list of all possible mappings of each feature to a molecule
778
779 Returns a 2-tuple:
780 1) a boolean indicating whether or not all features were found
781 2) a list, numFeatures long, of sequences of features
782
783
784 >>> import os.path
785 >>> from rdkit import RDConfig, Geometry
786 >>> fdefFile = os.path.join(RDConfig.RDCodeDir,'Chem/Pharm3D/test_data/BaseFeatures.fdef')
787 >>> featFactory = ChemicalFeatures.BuildFeatureFactory(fdefFile)
788 >>> activeFeats = [
789 ... ChemicalFeatures.FreeChemicalFeature('Acceptor', Geometry.Point3D(0.0, 0.0, 0.0)),
790 ... ChemicalFeatures.FreeChemicalFeature('Donor',Geometry.Point3D(0.0, 0.0, 0.0))]
791 >>> m = Chem.MolFromSmiles('FCCN')
792 >>> match,mList = MatchFeatsToMol(m,featFactory,activeFeats)
793 >>> match
794 True
795
796 Two feature types:
797 >>> len(mList)
798 2
799
800 The first feature type, Acceptor, has two matches:
801 >>> len(mList[0])
802 2
803 >>> mList[0][0].GetAtomIds()
804 (0,)
805 >>> mList[0][1].GetAtomIds()
806 (3,)
807
808 The first feature type, Donor, has a single match:
809 >>> len(mList[1])
810 1
811 >>> mList[1][0].GetAtomIds()
812 (3,)
813
814 """
815 molFeats = _getFeatDict(mol, featFactory, features)
816 res = []
817 for feat in features:
818 matches = molFeats.get(feat.GetFamily(), [])
819 if len(matches) == 0:
820 return False, None
821 res.append(matches)
822 return True, res
823
824
826 """ This generator takes a sequence of sequences as an argument and
827 provides all combinations of the elements of the subsequences:
828
829 >>> gen = CombiEnum(((1,2),(10,20)))
830 >>> next(gen)
831 [1, 10]
832 >>> next(gen)
833 [1, 20]
834
835 >>> [x for x in CombiEnum(((1,2),(10,20)))]
836 [[1, 10], [1, 20], [2, 10], [2, 20]]
837
838 >>> [x for x in CombiEnum(((1,2),(10,20),(100,200)))]
839 [[1, 10, 100], [1, 10, 200], [1, 20, 100], [1, 20, 200], [2, 10, 100],
840 [2, 10, 200], [2, 20, 100], [2, 20, 200]]
841
842 """
843 if not len(sequence):
844 yield []
845 elif len(sequence) == 1:
846 for entry in sequence[0]:
847 yield [entry]
848 else:
849 for entry in sequence[0]:
850 for subVal in CombiEnum(sequence[1:]):
851 yield [entry] + subVal
852
853
855 """ removes rows from a bounds matrix that are
856 that are greater than a threshold value away from a set of
857 other points
858
859 returns the modfied bounds matrix
860
861 The goal of this function is to remove rows from the bounds matrix
862 that correspond to atoms that are likely to be quite far from
863 the pharmacophore we're interested in. Because the bounds smoothing
864 we eventually have to do is N^3, this can be a big win
865
866 >>> boundsMat = numpy.array([[0.0,3.0,4.0],[2.0,0.0,3.0],[2.0,2.0,0.0]])
867 >>> bm = DownsampleBoundsMatrix(boundsMat,(0,),3.5)
868 >>> bm.shape == (2, 2)
869 True
870
871 we don't touch the input matrix:
872 >>> boundsMat.shape == (3, 3)
873 True
874
875 >>> print(', '.join(['%.3f'%x for x in bm[0]]))
876 0.000, 3.000
877 >>> print(', '.join(['%.3f'%x for x in bm[1]]))
878 2.000, 0.000
879
880 if the threshold is high enough, we don't do anything:
881 >>> boundsMat = numpy.array([[0.0,4.0,3.0],[2.0,0.0,3.0],[2.0,2.0,0.0]])
882 >>> bm = DownsampleBoundsMatrix(boundsMat,(0,),5.0)
883 >>> bm.shape == (3, 3)
884 True
885
886 If there's a max value that's close enough to *any* of the indices
887 we pass in, we'll keep it:
888 >>> boundsMat = numpy.array([[0.0,4.0,3.0],[2.0,0.0,3.0],[2.0,2.0,0.0]])
889 >>> bm = DownsampleBoundsMatrix(boundsMat,(0,1),3.5)
890 >>> bm.shape == (3, 3)
891 True
892
893 """
894 nPts = bm.shape[0]
895 k = numpy.zeros(nPts, numpy.int0)
896 for idx in indices:
897 k[idx] = 1
898 for i in indices:
899 row = bm[i]
900 for j in range(i + 1, nPts):
901 if not k[j] and row[j] < maxThresh:
902 k[j] = 1
903 keep = numpy.nonzero(k)[0]
904 bm2 = numpy.zeros((len(keep), len(keep)), numpy.float)
905 for i, idx in enumerate(keep):
906 row = bm[idx]
907 bm2[i] = numpy.take(row, keep)
908 return bm2
909
910
912 """
913 >>> from rdkit import Geometry
914 >>> from rdkit.Chem.Pharm3D import Pharmacophore
915 >>> feats = [
916 ... ChemicalFeatures.FreeChemicalFeature('HBondAcceptor', 'HAcceptor1',
917 ... Geometry.Point3D(0.0, 0.0, 0.0)),
918 ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1',
919 ... Geometry.Point3D(2.65, 0.0, 0.0)),
920 ... ChemicalFeatures.FreeChemicalFeature('Aromatic', 'Aromatic1',
921 ... Geometry.Point3D(5.12, 0.908, 0.0)),
922 ... ]
923 >>> pcophore=Pharmacophore.Pharmacophore(feats)
924 >>> pcophore.setLowerBound(0,1, 1.1)
925 >>> pcophore.setUpperBound(0,1, 1.9)
926 >>> pcophore.setLowerBound(0,2, 2.1)
927 >>> pcophore.setUpperBound(0,2, 2.9)
928 >>> pcophore.setLowerBound(1,2, 2.1)
929 >>> pcophore.setUpperBound(1,2, 3.9)
930
931 >>> bounds = numpy.array([[0,2,3],[1,0,4],[2,3,0]],numpy.float)
932 >>> CoarseScreenPharmacophore(((0,),(1,)),bounds,pcophore)
933 True
934
935 >>> CoarseScreenPharmacophore(((0,),(2,)),bounds,pcophore)
936 False
937
938 >>> CoarseScreenPharmacophore(((1,),(2,)),bounds,pcophore)
939 False
940
941 >>> CoarseScreenPharmacophore(((0,),(1,),(2,)),bounds,pcophore)
942 True
943
944 >>> CoarseScreenPharmacophore(((1,),(0,),(2,)),bounds,pcophore)
945 False
946
947 >>> CoarseScreenPharmacophore(((2,),(1,),(0,)),bounds,pcophore)
948 False
949
950 # we ignore the point locations here and just use their definitions:
951 >>> feats = [
952 ... ChemicalFeatures.FreeChemicalFeature('HBondAcceptor', 'HAcceptor1',
953 ... Geometry.Point3D(0.0, 0.0, 0.0)),
954 ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1',
955 ... Geometry.Point3D(2.65, 0.0, 0.0)),
956 ... ChemicalFeatures.FreeChemicalFeature('Aromatic', 'Aromatic1',
957 ... Geometry.Point3D(5.12, 0.908, 0.0)),
958 ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1',
959 ... Geometry.Point3D(2.65, 0.0, 0.0)),
960 ... ]
961 >>> pcophore=Pharmacophore.Pharmacophore(feats)
962 >>> pcophore.setLowerBound(0,1, 2.1)
963 >>> pcophore.setUpperBound(0,1, 2.9)
964 >>> pcophore.setLowerBound(0,2, 2.1)
965 >>> pcophore.setUpperBound(0,2, 2.9)
966 >>> pcophore.setLowerBound(0,3, 2.1)
967 >>> pcophore.setUpperBound(0,3, 2.9)
968 >>> pcophore.setLowerBound(1,2, 1.1)
969 >>> pcophore.setUpperBound(1,2, 1.9)
970 >>> pcophore.setLowerBound(1,3, 1.1)
971 >>> pcophore.setUpperBound(1,3, 1.9)
972 >>> pcophore.setLowerBound(2,3, 1.1)
973 >>> pcophore.setUpperBound(2,3, 1.9)
974 >>> bounds = numpy.array([[0,3,3,3],[2,0,2,2],[2,1,0,2],[2,1,1,0]],numpy.float)
975
976 >>> CoarseScreenPharmacophore(((0,),(1,),(2,),(3,)),bounds,pcophore)
977 True
978
979 >>> CoarseScreenPharmacophore(((0,),(1,),(3,),(2,)),bounds,pcophore)
980 True
981
982 >>> CoarseScreenPharmacophore(((1,),(0,),(3,),(2,)),bounds,pcophore)
983 False
984
985 """
986 for k in range(len(atomMatch)):
987 if len(atomMatch[k]) == 1:
988 for l in range(k + 1, len(atomMatch)):
989 if len(atomMatch[l]) == 1:
990 idx0 = atomMatch[k][0]
991 idx1 = atomMatch[l][0]
992 if idx1 < idx0:
993 idx0, idx1 = idx1, idx0
994 if (bounds[idx1, idx0] >= pcophore.getUpperBound(k, l) or
995 bounds[idx0, idx1] <= pcophore.getLowerBound(k, l)):
996 if verbose:
997 print('\t (%d,%d) [%d,%d] fail' % (idx1, idx0, k, l))
998 print('\t %f,%f - %f,%f' % (bounds[idx1, idx0], pcophore.getUpperBound(k, l),
999 bounds[idx0, idx1], pcophore.getLowerBound(k, l)))
1000
1001
1002
1003
1004 return False
1005 return True
1006
1007
1009 """ checks to see if a particular mapping of features onto
1010 a molecule satisfies a pharmacophore's 2D restrictions
1011
1012 >>> from rdkit import Geometry
1013 >>> from rdkit.Chem.Pharm3D import Pharmacophore
1014 >>> activeFeats = [
1015 ... ChemicalFeatures.FreeChemicalFeature('Acceptor', Geometry.Point3D(0.0, 0.0, 0.0)),
1016 ... ChemicalFeatures.FreeChemicalFeature('Donor',Geometry.Point3D(0.0, 0.0, 0.0))]
1017 >>> pcophore= Pharmacophore.Pharmacophore(activeFeats)
1018 >>> pcophore.setUpperBound2D(0,1,3)
1019 >>> m = Chem.MolFromSmiles('FCC(N)CN')
1020 >>> Check2DBounds(((0,),(3,)),m,pcophore)
1021 True
1022 >>> Check2DBounds(((0,),(5,)),m,pcophore)
1023 False
1024
1025 """
1026 dm = Chem.GetDistanceMatrix(mol, False, False, False)
1027 nFeats = len(atomMatch)
1028 for i in range(nFeats):
1029 for j in range(i + 1, nFeats):
1030 lowerB = pcophore._boundsMat2D[j, i]
1031 upperB = pcophore._boundsMat2D[i, j]
1032 dij = 10000
1033 for atomI in atomMatch[i]:
1034 for atomJ in atomMatch[j]:
1035 try:
1036 dij = min(dij, dm[atomI, atomJ])
1037 except IndexError:
1038 print('bad indices:', atomI, atomJ)
1039 print(' shape:', dm.shape)
1040 print(' match:', atomMatch)
1041 print(' mol:')
1042 print(Chem.MolToMolBlock(mol))
1043 raise IndexError
1044 if dij < lowerB or dij > upperB:
1045 return False
1046 return True
1047
1048
1049 -def _checkMatch(match, mol, bounds, pcophore, use2DLimits):
1050 """ **INTERNAL USE ONLY**
1051
1052 checks whether a particular atom match can be satisfied by
1053 a molecule
1054
1055 """
1056 atomMatch = ChemicalFeatures.GetAtomMatch(match)
1057 if not atomMatch:
1058 return None
1059 elif use2DLimits:
1060 if not Check2DBounds(atomMatch, mol, pcophore):
1061 return None
1062 if not CoarseScreenPharmacophore(atomMatch, bounds, pcophore):
1063 return None
1064 return atomMatch
1065
1066
1067 -def ConstrainedEnum(matches, mol, pcophore, bounds, use2DLimits=False, index=0, soFar=[]):
1068 """ Enumerates the list of atom mappings a molecule
1069 has to a particular pharmacophore.
1070 We do check distance bounds here.
1071
1072
1073 """
1074 nMatches = len(matches)
1075 if index >= nMatches:
1076 yield soFar, []
1077 elif index == nMatches - 1:
1078 for entry in matches[index]:
1079 nextStep = soFar + [entry]
1080 if index != 0:
1081 atomMatch = _checkMatch(nextStep, mol, bounds, pcophore, use2DLimits)
1082 else:
1083 atomMatch = ChemicalFeatures.GetAtomMatch(nextStep)
1084 if atomMatch:
1085 yield soFar + [entry], atomMatch
1086 else:
1087 for entry in matches[index]:
1088 nextStep = soFar + [entry]
1089 if index != 0:
1090 atomMatch = _checkMatch(nextStep, mol, bounds, pcophore, use2DLimits)
1091 if not atomMatch:
1092 continue
1093 for val in ConstrainedEnum(matches, mol, pcophore, bounds, use2DLimits=use2DLimits,
1094 index=index + 1, soFar=nextStep):
1095 if val:
1096 yield val
1097
1098
1099 -def MatchPharmacophore(matches, bounds, pcophore, useDownsampling=False, use2DLimits=False,
1100 mol=None, excludedVolumes=None, useDirs=False):
1101 """
1102
1103 if use2DLimits is set, the molecule must also be provided and topological
1104 distances will also be used to filter out matches
1105
1106 """
1107 for match, atomMatch in ConstrainedEnum(matches, mol, pcophore, bounds, use2DLimits=use2DLimits):
1108 bm = bounds.copy()
1109 bm = UpdatePharmacophoreBounds(bm, atomMatch, pcophore, useDirs=useDirs, mol=mol)
1110
1111 if excludedVolumes:
1112 localEvs = []
1113 for eV in excludedVolumes:
1114 featInfo = []
1115 for i, entry in enumerate(atomMatch):
1116 info = list(eV.featInfo[i])
1117 info[0] = entry
1118 featInfo.append(info)
1119 localEvs.append(ExcludedVolume.ExcludedVolume(featInfo, eV.index, eV.exclusionDist))
1120 bm = AddExcludedVolumes(bm, localEvs, smoothIt=False)
1121
1122 sz = bm.shape[0]
1123 if useDownsampling:
1124 indices = []
1125 for entry in atomMatch:
1126 indices.extend(entry)
1127 if excludedVolumes:
1128 for vol in localEvs:
1129 indices.append(vol.index)
1130 bm = DownsampleBoundsMatrix(bm, indices)
1131 if DG.DoTriangleSmoothing(bm):
1132 return 0, bm, match, (sz, bm.shape[0])
1133
1134 return 1, None, None, None
1135
1136
1137 -def GetAllPharmacophoreMatches(matches, bounds, pcophore, useDownsampling=0, progressCallback=None,
1138 use2DLimits=False, mol=None, verbose=False):
1139 res = []
1140 nDone = 0
1141 for match in CombiEnum(matches):
1142 atomMatch = ChemicalFeatures.GetAtomMatch(match)
1143 if atomMatch and use2DLimits and mol:
1144 pass2D = Check2DBounds(atomMatch, mol, pcophore)
1145 if verbose:
1146 print('..', atomMatch)
1147 print(' ..Pass2d:', pass2D)
1148 else:
1149 pass2D = True
1150 if atomMatch and pass2D and \
1151 CoarseScreenPharmacophore(atomMatch, bounds, pcophore, verbose=verbose):
1152 if verbose:
1153 print(' ..CoarseScreen: Pass')
1154
1155 bm = bounds.copy()
1156 if verbose:
1157 print('pre update:')
1158 for row in bm:
1159 print(' ', ' '.join(['% 4.2f' % x for x in row]))
1160 bm = UpdatePharmacophoreBounds(bm, atomMatch, pcophore)
1161 if verbose:
1162 print('pre downsample:')
1163 for row in bm:
1164 print(' ', ' '.join(['% 4.2f' % x for x in row]))
1165
1166 if useDownsampling:
1167 indices = []
1168 for entry in atomMatch:
1169 indices += list(entry)
1170 bm = DownsampleBoundsMatrix(bm, indices)
1171 if verbose:
1172 print('post downsample:')
1173 for row in bm:
1174 print(' ', ' '.join(['% 4.2f' % x for x in row]))
1175
1176 if DG.DoTriangleSmoothing(bm):
1177 res.append(match)
1178 elif verbose:
1179 print('cannot smooth')
1180 nDone += 1
1181 if progressCallback:
1182 progressCallback(nDone)
1183 return res
1184
1185
1187 """ Computes the chiral volume of an atom
1188
1189 We're using the chiral volume formula from Figure 7 of
1190 Blaney and Dixon, Rev. Comp. Chem. V, 299-335 (1994)
1191
1192 >>> import os.path
1193 >>> from rdkit import RDConfig
1194 >>> dataDir = os.path.join(RDConfig.RDCodeDir,'Chem/Pharm3D/test_data')
1195
1196 R configuration atoms give negative volumes:
1197 >>> mol = Chem.MolFromMolFile(os.path.join(dataDir,'mol-r.mol'))
1198 >>> Chem.AssignStereochemistry(mol)
1199 >>> mol.GetAtomWithIdx(1).GetProp('_CIPCode')
1200 'R'
1201 >>> ComputeChiralVolume(mol,1) < 0
1202 True
1203
1204 S configuration atoms give positive volumes:
1205 >>> mol = Chem.MolFromMolFile(os.path.join(dataDir,'mol-s.mol'))
1206 >>> Chem.AssignStereochemistry(mol)
1207 >>> mol.GetAtomWithIdx(1).GetProp('_CIPCode')
1208 'S'
1209 >>> ComputeChiralVolume(mol,1) > 0
1210 True
1211
1212 Non-chiral (or non-specified) atoms give zero volume:
1213 >>> ComputeChiralVolume(mol,0) == 0.0
1214 True
1215
1216 We also work on 3-coordinate atoms (with implicit Hs):
1217 >>> mol = Chem.MolFromMolFile(os.path.join(dataDir,'mol-r-3.mol'))
1218 >>> Chem.AssignStereochemistry(mol)
1219 >>> mol.GetAtomWithIdx(1).GetProp('_CIPCode')
1220 'R'
1221 >>> ComputeChiralVolume(mol,1)<0
1222 True
1223
1224 >>> mol = Chem.MolFromMolFile(os.path.join(dataDir,'mol-s-3.mol'))
1225 >>> Chem.AssignStereochemistry(mol)
1226 >>> mol.GetAtomWithIdx(1).GetProp('_CIPCode')
1227 'S'
1228 >>> ComputeChiralVolume(mol,1)>0
1229 True
1230
1231
1232
1233 """
1234 conf = mol.GetConformer(confId)
1235 Chem.AssignStereochemistry(mol)
1236 center = mol.GetAtomWithIdx(centerIdx)
1237 if not center.HasProp('_CIPCode'):
1238 return 0.0
1239
1240 nbrs = center.GetNeighbors()
1241 nbrRanks = []
1242 for nbr in nbrs:
1243 rank = int(nbr.GetProp('_CIPRank'))
1244 pos = conf.GetAtomPosition(nbr.GetIdx())
1245 nbrRanks.append((rank, pos))
1246
1247
1248
1249 if len(nbrRanks) == 3:
1250 nbrRanks.append((-1, conf.GetAtomPosition(centerIdx)))
1251 nbrRanks.sort()
1252
1253 ps = [x[1] for x in nbrRanks]
1254 v1 = ps[0] - ps[3]
1255 v2 = ps[1] - ps[3]
1256 v3 = ps[2] - ps[3]
1257
1258 res = v1.DotProduct(v2.CrossProduct(v3))
1259 return res
1260
1261
1262
1263
1264
1265
1267 import doctest
1268 failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS + doctest.NORMALIZE_WHITESPACE,
1269 verbose=verbose)
1270 sys.exit(failed)
1271
1272
1273 if __name__ == '__main__':
1274 _runDoctests()
1275