1
2
3
4
5
6
7
8
9
10 """ Import all RDKit chemistry modules
11
12 """
13 import sys
14 import warnings
15 from collections import namedtuple
16
17 import numpy
18
19 from rdkit import DataStructs
20 from rdkit import ForceField
21 from rdkit import RDConfig
22 from rdkit import rdBase
23 from rdkit.Chem import *
24 from rdkit.Chem.ChemicalFeatures import *
25 from rdkit.Chem.rdChemReactions import *
26 from rdkit.Chem.rdDepictor import *
27 from rdkit.Chem.rdDistGeom import *
28 from rdkit.Chem.rdForceFieldHelpers import *
29 from rdkit.Chem.rdMolAlign import *
30 from rdkit.Chem.rdMolDescriptors import *
31 from rdkit.Chem.rdMolTransforms import *
32 from rdkit.Chem.rdPartialCharges import *
33 from rdkit.Chem.rdReducedGraphs import *
34 from rdkit.Chem.rdShapeHelpers import *
35 from rdkit.Chem.rdqueries import *
36 from rdkit.Geometry import rdGeometry
37 from rdkit.RDLogger import logger
38 from rdkit.Chem.EnumerateStereoisomers import StereoEnumerationOptions, EnumerateStereoisomers
39
40 try:
41 from rdkit.Chem.rdSLNParse import *
42 except ImportError:
43 pass
44
45 Mol.Compute2DCoords = Compute2DCoords
46 Mol.ComputeGasteigerCharges = ComputeGasteigerCharges
47 logger = logger()
48
49
65
66
67 -def ComputeMolShape(mol, confId=-1, boxDim=(20, 20, 20), spacing=0.5, **kwargs):
68 """ returns a grid representation of the molecule's shape
69 """
70 res = rdGeometry.UniformGrid3D(boxDim[0], boxDim[1], boxDim[2], spacing=spacing)
71 EncodeShape(mol, res, confId, **kwargs)
72 return res
73
74
76 """ Calculates the volume of a particular conformer of a molecule
77 based on a grid-encoding of the molecular shape.
78
79 """
80 mol = rdchem.Mol(mol)
81 conf = mol.GetConformer(confId)
82 CanonicalizeConformer(conf)
83 box = ComputeConfBox(conf)
84 sideLen = (box[1].x - box[0].x + 2 * boxMargin, box[1].y - box[0].y + 2 * boxMargin,
85 box[1].z - box[0].z + 2 * boxMargin)
86 shape = rdGeometry.UniformGrid3D(sideLen[0], sideLen[1], sideLen[2], spacing=gridSpacing)
87 EncodeShape(mol, shape, confId, ignoreHs=False, vdwScale=1.0)
88 voxelVol = gridSpacing**3
89 occVect = shape.GetOccupancyVect()
90 voxels = [1 for x in occVect if x == 3]
91 vol = voxelVol * len(voxels)
92 return vol
93
128
129
174
175
177 """ Returns a generator for the virtual library defined by
178 a reaction and a sequence of sidechain sets
179
180 >>> from rdkit import Chem
181 >>> from rdkit.Chem import AllChem
182 >>> s1=[Chem.MolFromSmiles(x) for x in ('NC','NCC')]
183 >>> s2=[Chem.MolFromSmiles(x) for x in ('OC=O','OC(=O)C')]
184 >>> rxn = AllChem.ReactionFromSmarts('[O:2]=[C:1][OH].[N:3]>>[O:2]=[C:1][N:3]')
185 >>> r = AllChem.EnumerateLibraryFromReaction(rxn,[s2,s1])
186 >>> [Chem.MolToSmiles(x[0]) for x in list(r)]
187 ['CNC=O', 'CCNC=O', 'CNC(C)=O', 'CCNC(C)=O']
188
189 Note that this is all done in a lazy manner, so "infinitely" large libraries can
190 be done without worrying about running out of memory. Your patience will run out first:
191
192 Define a set of 10000 amines:
193 >>> amines = (Chem.MolFromSmiles('N'+'C'*x) for x in range(10000))
194
195 ... a set of 10000 acids
196 >>> acids = (Chem.MolFromSmiles('OC(=O)'+'C'*x) for x in range(10000))
197
198 ... now the virtual library (1e8 compounds in principle):
199 >>> r = AllChem.EnumerateLibraryFromReaction(rxn,[acids,amines])
200
201 ... look at the first 4 compounds:
202 >>> [Chem.MolToSmiles(next(r)[0]) for x in range(4)]
203 ['NC=O', 'CNC=O', 'CCNC=O', 'CCCNC=O']
204
205
206 """
207 if len(sidechainSets) != reaction.GetNumReactantTemplates():
208 raise ValueError('%d sidechains provided, %d required' %
209 (len(sidechainSets), reaction.GetNumReactantTemplates()))
210
211 def _combiEnumerator(items, depth=0):
212 for item in items[depth]:
213 if depth + 1 < len(items):
214 v = _combiEnumerator(items, depth + 1)
215 for entry in v:
216 l = [item]
217 l.extend(entry)
218 yield l
219 else:
220 yield [item]
221
222 ProductReactants = namedtuple('ProductReactants', 'products,reactants')
223 for chains in _combiEnumerator(sidechainSets):
224 prodSets = reaction.RunReactants(chains)
225 for prods in prodSets:
226 if returnReactants:
227 yield ProductReactants(prods, chains)
228 else:
229 yield prods
230
231
232 -def ConstrainedEmbed(mol, core, useTethers=True, coreConfId=-1, randomseed=2342,
233 getForceField=UFFGetMoleculeForceField, **kwargs):
234 """ generates an embedding of a molecule where part of the molecule
235 is constrained to have particular coordinates
236
237 Arguments
238 - mol: the molecule to embed
239 - core: the molecule to use as a source of constraints
240 - useTethers: (optional) if True, the final conformation will be
241 optimized subject to a series of extra forces that pull the
242 matching atoms to the positions of the core atoms. Otherwise
243 simple distance constraints based on the core atoms will be
244 used in the optimization.
245 - coreConfId: (optional) id of the core conformation to use
246 - randomSeed: (optional) seed for the random number generator
247
248
249 An example, start by generating a template with a 3D structure:
250 >>> from rdkit.Chem import AllChem
251 >>> template = AllChem.MolFromSmiles("c1nn(Cc2ccccc2)cc1")
252 >>> AllChem.EmbedMolecule(template)
253 0
254 >>> AllChem.UFFOptimizeMolecule(template)
255 0
256
257 Here's a molecule:
258 >>> mol = AllChem.MolFromSmiles("c1nn(Cc2ccccc2)cc1-c3ccccc3")
259
260 Now do the constrained embedding
261 >>> newmol=AllChem.ConstrainedEmbed(mol, template)
262
263 Demonstrate that the positions are the same:
264 >>> newp=newmol.GetConformer().GetAtomPosition(0)
265 >>> molp=mol.GetConformer().GetAtomPosition(0)
266 >>> list(newp-molp)==[0.0,0.0,0.0]
267 True
268 >>> newp=newmol.GetConformer().GetAtomPosition(1)
269 >>> molp=mol.GetConformer().GetAtomPosition(1)
270 >>> list(newp-molp)==[0.0,0.0,0.0]
271 True
272
273 """
274 match = mol.GetSubstructMatch(core)
275 if not match:
276 raise ValueError("molecule doesn't match the core")
277 coordMap = {}
278 coreConf = core.GetConformer(coreConfId)
279 for i, idxI in enumerate(match):
280 corePtI = coreConf.GetAtomPosition(i)
281 coordMap[idxI] = corePtI
282
283 ci = EmbedMolecule(mol, coordMap=coordMap, randomSeed=randomseed, **kwargs)
284 if ci < 0:
285 raise ValueError('Could not embed molecule.')
286
287 algMap = [(j, i) for i, j in enumerate(match)]
288
289 if not useTethers:
290
291 ff = getForceField(mol, confId=0)
292 for i, idxI in enumerate(match):
293 for j in range(i + 1, len(match)):
294 idxJ = match[j]
295 d = coordMap[idxI].Distance(coordMap[idxJ])
296 ff.AddDistanceConstraint(idxI, idxJ, d, d, 100.)
297 ff.Initialize()
298 n = 4
299 more = ff.Minimize()
300 while more and n:
301 more = ff.Minimize()
302 n -= 1
303
304 rms = AlignMol(mol, core, atomMap=algMap)
305 else:
306
307 rms = AlignMol(mol, core, atomMap=algMap)
308 ff = getForceField(mol, confId=0)
309 conf = core.GetConformer()
310 for i in range(core.GetNumAtoms()):
311 p = conf.GetAtomPosition(i)
312 pIdx = ff.AddExtraPoint(p.x, p.y, p.z, fixed=True) - 1
313 ff.AddDistanceConstraint(pIdx, match[i], 0, 0, 100.)
314 ff.Initialize()
315 n = 4
316 more = ff.Minimize(energyTol=1e-4, forceTol=1e-3)
317 while more and n:
318 more = ff.Minimize(energyTol=1e-4, forceTol=1e-3)
319 n -= 1
320
321 rms = AlignMol(mol, core, atomMap=algMap)
322 mol.SetProp('EmbedRMS', str(rms))
323 return mol
324
325
327 """ assigns bond orders to a molecule based on the
328 bond orders in a template molecule
329
330 Arguments
331 - refmol: the template molecule
332 - mol: the molecule to assign bond orders to
333
334 An example, start by generating a template from a SMILES
335 and read in the PDB structure of the molecule
336 >>> import os
337 >>> from rdkit.Chem import AllChem
338 >>> template = AllChem.MolFromSmiles("CN1C(=NC(C1=O)(c2ccccc2)c3ccccc3)N")
339 >>> mol = AllChem.MolFromPDBFile(os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '4DJU_lig.pdb'))
340 >>> len([1 for b in template.GetBonds() if b.GetBondTypeAsDouble() == 1.0])
341 8
342 >>> len([1 for b in mol.GetBonds() if b.GetBondTypeAsDouble() == 1.0])
343 22
344
345 Now assign the bond orders based on the template molecule
346 >>> newMol = AllChem.AssignBondOrdersFromTemplate(template, mol)
347 >>> len([1 for b in newMol.GetBonds() if b.GetBondTypeAsDouble() == 1.0])
348 8
349
350 Note that the template molecule should have no explicit hydrogens
351 else the algorithm will fail.
352
353 It also works if there are different formal charges (this was github issue 235):
354 >>> template=AllChem.MolFromSmiles('CN(C)C(=O)Cc1ccc2c(c1)NC(=O)c3ccc(cc3N2)c4ccc(c(c4)OC)[N+](=O)[O-]')
355 >>> mol = AllChem.MolFromMolFile(os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '4FTR_lig.mol'))
356 >>> AllChem.MolToSmiles(mol)
357 'COC1CC(C2CCC3C(O)NC4CC(CC(O)N(C)C)CCC4NC3C2)CCC1N(O)O'
358 >>> newMol = AllChem.AssignBondOrdersFromTemplate(template, mol)
359 >>> AllChem.MolToSmiles(newMol)
360 'COc1cc(-c2ccc3c(c2)Nc2ccc(CC(=O)N(C)C)cc2NC3=O)ccc1[N+](=O)[O-]'
361
362 """
363 refmol2 = rdchem.Mol(refmol)
364 mol2 = rdchem.Mol(mol)
365
366 matching = mol2.GetSubstructMatch(refmol2)
367 if not matching:
368
369 for b in mol2.GetBonds():
370 if b.GetBondType() != BondType.SINGLE:
371 b.SetBondType(BondType.SINGLE)
372 b.SetIsAromatic(False)
373
374 for b in refmol2.GetBonds():
375 b.SetBondType(BondType.SINGLE)
376 b.SetIsAromatic(False)
377
378 for a in refmol2.GetAtoms():
379 a.SetFormalCharge(0)
380 for a in mol2.GetAtoms():
381 a.SetFormalCharge(0)
382
383 matching = mol2.GetSubstructMatches(refmol2, uniquify=False)
384
385 if matching:
386 if len(matching) > 1:
387 logger.warning("More than one matching pattern found - picking one")
388 matching = matching[0]
389
390 for b in refmol.GetBonds():
391 atom1 = matching[b.GetBeginAtomIdx()]
392 atom2 = matching[b.GetEndAtomIdx()]
393 b2 = mol2.GetBondBetweenAtoms(atom1, atom2)
394 b2.SetBondType(b.GetBondType())
395 b2.SetIsAromatic(b.GetIsAromatic())
396
397 for a in refmol.GetAtoms():
398 a2 = mol2.GetAtomWithIdx(matching[a.GetIdx()])
399 a2.SetHybridization(a.GetHybridization())
400 a2.SetIsAromatic(a.GetIsAromatic())
401 a2.SetNumExplicitHs(a.GetNumExplicitHs())
402 a2.SetFormalCharge(a.GetFormalCharge())
403 SanitizeMol(mol2)
404 if hasattr(mol2, '__sssAtoms'):
405 mol2.__sssAtoms = None
406 else:
407 raise ValueError("No matching found")
408 return mol2
409
410
411
412
413
415 import sys
416 import doctest
417 failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose)
418 sys.exit(failed)
419
420
421 if __name__ == '__main__':
422 _runDoctests()
423