Package rdkit :: Package Chem :: Module AllChem
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.AllChem

  1  # 
  2  #  Copyright (C) 2006-2017  greg Landrum and Rational Discovery LLC 
  3  # 
  4  #   @@ All Rights Reserved @@ 
  5  #  This file is part of the RDKit. 
  6  #  The contents are covered by the terms of the BSD license 
  7  #  which is included in the file license.txt, found at the root 
  8  #  of the RDKit source tree. 
  9  # 
 10  """ Import all RDKit chemistry modules 
 11   
 12  """ 
 13  import sys 
 14  import warnings 
 15  from collections import namedtuple 
 16   
 17  import numpy 
 18   
 19  from rdkit import DataStructs 
 20  from rdkit import ForceField 
 21  from rdkit import RDConfig 
 22  from rdkit import rdBase 
 23  from rdkit.Chem import * 
 24  from rdkit.Chem.ChemicalFeatures import * 
 25  from rdkit.Chem.rdChemReactions import * 
 26  from rdkit.Chem.rdDepictor import * 
 27  from rdkit.Chem.rdDistGeom import * 
 28  from rdkit.Chem.rdForceFieldHelpers import * 
 29  from rdkit.Chem.rdMolAlign import * 
 30  from rdkit.Chem.rdMolDescriptors import * 
 31  from rdkit.Chem.rdMolTransforms import * 
 32  from rdkit.Chem.rdPartialCharges import * 
 33  from rdkit.Chem.rdReducedGraphs import * 
 34  from rdkit.Chem.rdShapeHelpers import * 
 35  from rdkit.Chem.rdqueries import * 
 36  from rdkit.Geometry import rdGeometry 
 37  from rdkit.RDLogger import logger 
 38  from rdkit.Chem.EnumerateStereoisomers import StereoEnumerationOptions, EnumerateStereoisomers 
 39   
 40  try: 
 41    from rdkit.Chem.rdSLNParse import * 
 42  except ImportError: 
 43    pass 
 44   
 45  Mol.Compute2DCoords = Compute2DCoords 
 46  Mol.ComputeGasteigerCharges = ComputeGasteigerCharges 
 47  logger = logger() 
 48   
 49   
50 -def TransformMol(mol, tform, confId=-1, keepConfs=False):
51 """ Applies the transformation (usually a 4x4 double matrix) to a molecule 52 if keepConfs is False then all but that conformer are removed 53 """ 54 refConf = mol.GetConformer(confId) 55 TransformConformer(refConf, tform) 56 if not keepConfs: 57 if confId == -1: 58 confId = 0 59 allConfIds = [c.GetId() for c in mol.GetConformers()] 60 for cid in allConfIds: 61 if not cid == confId: 62 mol.RemoveConformer(cid) 63 # reset the conf Id to zero since there is only one conformer left 64 mol.GetConformer(confId).SetId(0)
65 66
67 -def ComputeMolShape(mol, confId=-1, boxDim=(20, 20, 20), spacing=0.5, **kwargs):
68 """ returns a grid representation of the molecule's shape 69 """ 70 res = rdGeometry.UniformGrid3D(boxDim[0], boxDim[1], boxDim[2], spacing=spacing) 71 EncodeShape(mol, res, confId, **kwargs) 72 return res
73 74
75 -def ComputeMolVolume(mol, confId=-1, gridSpacing=0.2, boxMargin=2.0):
76 """ Calculates the volume of a particular conformer of a molecule 77 based on a grid-encoding of the molecular shape. 78 79 """ 80 mol = rdchem.Mol(mol) 81 conf = mol.GetConformer(confId) 82 CanonicalizeConformer(conf) 83 box = ComputeConfBox(conf) 84 sideLen = (box[1].x - box[0].x + 2 * boxMargin, box[1].y - box[0].y + 2 * boxMargin, 85 box[1].z - box[0].z + 2 * boxMargin) 86 shape = rdGeometry.UniformGrid3D(sideLen[0], sideLen[1], sideLen[2], spacing=gridSpacing) 87 EncodeShape(mol, shape, confId, ignoreHs=False, vdwScale=1.0) 88 voxelVol = gridSpacing**3 89 occVect = shape.GetOccupancyVect() 90 voxels = [1 for x in occVect if x == 3] 91 vol = voxelVol * len(voxels) 92 return vol
93
94 -def GetConformerRMS(mol, confId1, confId2, atomIds=None, prealigned=False):
95 """ Returns the RMS between two conformations. 96 By default, the conformers will be aligned to the first conformer 97 before the RMS calculation and, as a side-effect, the second will be left 98 in the aligned state. 99 100 Arguments: 101 - mol: the molecule 102 - confId1: the id of the first conformer 103 - confId2: the id of the second conformer 104 - atomIds: (optional) list of atom ids to use a points for 105 alingment - defaults to all atoms 106 - prealigned: (optional) by default the conformers are assumed 107 be unaligned and the second conformer be aligned 108 to the first 109 110 """ 111 # align the conformers if necessary 112 # Note: the reference conformer is always the first one 113 if not prealigned: 114 if atomIds: 115 AlignMolConformers(mol, confIds=[confId1, confId2], atomIds=atomIds) 116 else: 117 AlignMolConformers(mol, confIds=[confId1, confId2]) 118 119 # calculate the RMS between the two conformations 120 conf1 = mol.GetConformer(id=confId1) 121 conf2 = mol.GetConformer(id=confId2) 122 ssr = 0 123 for i in range(mol.GetNumAtoms()): 124 d = conf1.GetAtomPosition(i).Distance(conf2.GetAtomPosition(i)) 125 ssr += d * d 126 ssr /= mol.GetNumAtoms() 127 return numpy.sqrt(ssr)
128 129
130 -def GetConformerRMSMatrix(mol, atomIds=None, prealigned=False):
131 """ Returns the RMS matrix of the conformers of a molecule. 132 As a side-effect, the conformers will be aligned to the first 133 conformer (i.e. the reference) and will left in the aligned state. 134 135 Arguments: 136 - mol: the molecule 137 - atomIds: (optional) list of atom ids to use a points for 138 alingment - defaults to all atoms 139 - prealigned: (optional) by default the conformers are assumed 140 be unaligned and will therefore be aligned to the 141 first conformer 142 143 Note that the returned RMS matrix is symmetrical, i.e. it is the 144 lower half of the matrix, e.g. for 5 conformers: 145 rmsmatrix = [ a, 146 b, c, 147 d, e, f, 148 g, h, i, j] 149 where a is the RMS between conformers 0 and 1, b is the RMS between 150 conformers 0 and 2, etc. 151 This way it can be directly used as distance matrix in e.g. Butina 152 clustering. 153 154 """ 155 # if necessary, align the conformers 156 # Note: the reference conformer is always the first one 157 rmsvals = [] 158 confIds = [conf.GetId() for conf in mol.GetConformers()] 159 if not prealigned: 160 if atomIds: 161 AlignMolConformers(mol, atomIds=atomIds, RMSlist=rmsvals) 162 else: 163 AlignMolConformers(mol, RMSlist=rmsvals) 164 else: # already prealigned 165 for i in range(1, len(confIds)): 166 rmsvals.append(GetConformerRMS(mol, confIds[0], confIds[i], atomIds=atomIds, prealigned=prealigned)) 167 # loop over the conformations (except the reference one) 168 cmat = [] 169 for i in range(1, len(confIds)): 170 cmat.append(rmsvals[i - 1]) 171 for j in range(1, i): 172 cmat.append(GetConformerRMS(mol, confIds[i], confIds[j], atomIds=atomIds, prealigned=True)) 173 return cmat
174 175
176 -def EnumerateLibraryFromReaction(reaction, sidechainSets, returnReactants=False):
177 """ Returns a generator for the virtual library defined by 178 a reaction and a sequence of sidechain sets 179 180 >>> from rdkit import Chem 181 >>> from rdkit.Chem import AllChem 182 >>> s1=[Chem.MolFromSmiles(x) for x in ('NC','NCC')] 183 >>> s2=[Chem.MolFromSmiles(x) for x in ('OC=O','OC(=O)C')] 184 >>> rxn = AllChem.ReactionFromSmarts('[O:2]=[C:1][OH].[N:3]>>[O:2]=[C:1][N:3]') 185 >>> r = AllChem.EnumerateLibraryFromReaction(rxn,[s2,s1]) 186 >>> [Chem.MolToSmiles(x[0]) for x in list(r)] 187 ['CNC=O', 'CCNC=O', 'CNC(C)=O', 'CCNC(C)=O'] 188 189 Note that this is all done in a lazy manner, so "infinitely" large libraries can 190 be done without worrying about running out of memory. Your patience will run out first: 191 192 Define a set of 10000 amines: 193 >>> amines = (Chem.MolFromSmiles('N'+'C'*x) for x in range(10000)) 194 195 ... a set of 10000 acids 196 >>> acids = (Chem.MolFromSmiles('OC(=O)'+'C'*x) for x in range(10000)) 197 198 ... now the virtual library (1e8 compounds in principle): 199 >>> r = AllChem.EnumerateLibraryFromReaction(rxn,[acids,amines]) 200 201 ... look at the first 4 compounds: 202 >>> [Chem.MolToSmiles(next(r)[0]) for x in range(4)] 203 ['NC=O', 'CNC=O', 'CCNC=O', 'CCCNC=O'] 204 205 206 """ 207 if len(sidechainSets) != reaction.GetNumReactantTemplates(): 208 raise ValueError('%d sidechains provided, %d required' % 209 (len(sidechainSets), reaction.GetNumReactantTemplates())) 210 211 def _combiEnumerator(items, depth=0): 212 for item in items[depth]: 213 if depth + 1 < len(items): 214 v = _combiEnumerator(items, depth + 1) 215 for entry in v: 216 l = [item] 217 l.extend(entry) 218 yield l 219 else: 220 yield [item]
221 222 ProductReactants = namedtuple('ProductReactants', 'products,reactants') 223 for chains in _combiEnumerator(sidechainSets): 224 prodSets = reaction.RunReactants(chains) 225 for prods in prodSets: 226 if returnReactants: 227 yield ProductReactants(prods, chains) 228 else: 229 yield prods 230 231
232 -def ConstrainedEmbed(mol, core, useTethers=True, coreConfId=-1, randomseed=2342, 233 getForceField=UFFGetMoleculeForceField, **kwargs):
234 """ generates an embedding of a molecule where part of the molecule 235 is constrained to have particular coordinates 236 237 Arguments 238 - mol: the molecule to embed 239 - core: the molecule to use as a source of constraints 240 - useTethers: (optional) if True, the final conformation will be 241 optimized subject to a series of extra forces that pull the 242 matching atoms to the positions of the core atoms. Otherwise 243 simple distance constraints based on the core atoms will be 244 used in the optimization. 245 - coreConfId: (optional) id of the core conformation to use 246 - randomSeed: (optional) seed for the random number generator 247 248 249 An example, start by generating a template with a 3D structure: 250 >>> from rdkit.Chem import AllChem 251 >>> template = AllChem.MolFromSmiles("c1nn(Cc2ccccc2)cc1") 252 >>> AllChem.EmbedMolecule(template) 253 0 254 >>> AllChem.UFFOptimizeMolecule(template) 255 0 256 257 Here's a molecule: 258 >>> mol = AllChem.MolFromSmiles("c1nn(Cc2ccccc2)cc1-c3ccccc3") 259 260 Now do the constrained embedding 261 >>> newmol=AllChem.ConstrainedEmbed(mol, template) 262 263 Demonstrate that the positions are the same: 264 >>> newp=newmol.GetConformer().GetAtomPosition(0) 265 >>> molp=mol.GetConformer().GetAtomPosition(0) 266 >>> list(newp-molp)==[0.0,0.0,0.0] 267 True 268 >>> newp=newmol.GetConformer().GetAtomPosition(1) 269 >>> molp=mol.GetConformer().GetAtomPosition(1) 270 >>> list(newp-molp)==[0.0,0.0,0.0] 271 True 272 273 """ 274 match = mol.GetSubstructMatch(core) 275 if not match: 276 raise ValueError("molecule doesn't match the core") 277 coordMap = {} 278 coreConf = core.GetConformer(coreConfId) 279 for i, idxI in enumerate(match): 280 corePtI = coreConf.GetAtomPosition(i) 281 coordMap[idxI] = corePtI 282 283 ci = EmbedMolecule(mol, coordMap=coordMap, randomSeed=randomseed, **kwargs) 284 if ci < 0: 285 raise ValueError('Could not embed molecule.') 286 287 algMap = [(j, i) for i, j in enumerate(match)] 288 289 if not useTethers: 290 # clean up the conformation 291 ff = getForceField(mol, confId=0) 292 for i, idxI in enumerate(match): 293 for j in range(i + 1, len(match)): 294 idxJ = match[j] 295 d = coordMap[idxI].Distance(coordMap[idxJ]) 296 ff.AddDistanceConstraint(idxI, idxJ, d, d, 100.) 297 ff.Initialize() 298 n = 4 299 more = ff.Minimize() 300 while more and n: 301 more = ff.Minimize() 302 n -= 1 303 # rotate the embedded conformation onto the core: 304 rms = AlignMol(mol, core, atomMap=algMap) 305 else: 306 # rotate the embedded conformation onto the core: 307 rms = AlignMol(mol, core, atomMap=algMap) 308 ff = getForceField(mol, confId=0) 309 conf = core.GetConformer() 310 for i in range(core.GetNumAtoms()): 311 p = conf.GetAtomPosition(i) 312 pIdx = ff.AddExtraPoint(p.x, p.y, p.z, fixed=True) - 1 313 ff.AddDistanceConstraint(pIdx, match[i], 0, 0, 100.) 314 ff.Initialize() 315 n = 4 316 more = ff.Minimize(energyTol=1e-4, forceTol=1e-3) 317 while more and n: 318 more = ff.Minimize(energyTol=1e-4, forceTol=1e-3) 319 n -= 1 320 # realign 321 rms = AlignMol(mol, core, atomMap=algMap) 322 mol.SetProp('EmbedRMS', str(rms)) 323 return mol
324 325
326 -def AssignBondOrdersFromTemplate(refmol, mol):
327 """ assigns bond orders to a molecule based on the 328 bond orders in a template molecule 329 330 Arguments 331 - refmol: the template molecule 332 - mol: the molecule to assign bond orders to 333 334 An example, start by generating a template from a SMILES 335 and read in the PDB structure of the molecule 336 >>> import os 337 >>> from rdkit.Chem import AllChem 338 >>> template = AllChem.MolFromSmiles("CN1C(=NC(C1=O)(c2ccccc2)c3ccccc3)N") 339 >>> mol = AllChem.MolFromPDBFile(os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '4DJU_lig.pdb')) 340 >>> len([1 for b in template.GetBonds() if b.GetBondTypeAsDouble() == 1.0]) 341 8 342 >>> len([1 for b in mol.GetBonds() if b.GetBondTypeAsDouble() == 1.0]) 343 22 344 345 Now assign the bond orders based on the template molecule 346 >>> newMol = AllChem.AssignBondOrdersFromTemplate(template, mol) 347 >>> len([1 for b in newMol.GetBonds() if b.GetBondTypeAsDouble() == 1.0]) 348 8 349 350 Note that the template molecule should have no explicit hydrogens 351 else the algorithm will fail. 352 353 It also works if there are different formal charges (this was github issue 235): 354 >>> template=AllChem.MolFromSmiles('CN(C)C(=O)Cc1ccc2c(c1)NC(=O)c3ccc(cc3N2)c4ccc(c(c4)OC)[N+](=O)[O-]') 355 >>> mol = AllChem.MolFromMolFile(os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '4FTR_lig.mol')) 356 >>> AllChem.MolToSmiles(mol) 357 'COC1CC(C2CCC3C(O)NC4CC(CC(O)N(C)C)CCC4NC3C2)CCC1N(O)O' 358 >>> newMol = AllChem.AssignBondOrdersFromTemplate(template, mol) 359 >>> AllChem.MolToSmiles(newMol) 360 'COc1cc(-c2ccc3c(c2)Nc2ccc(CC(=O)N(C)C)cc2NC3=O)ccc1[N+](=O)[O-]' 361 362 """ 363 refmol2 = rdchem.Mol(refmol) 364 mol2 = rdchem.Mol(mol) 365 # do the molecules match already? 366 matching = mol2.GetSubstructMatch(refmol2) 367 if not matching: # no, they don't match 368 # check if bonds of mol are SINGLE 369 for b in mol2.GetBonds(): 370 if b.GetBondType() != BondType.SINGLE: 371 b.SetBondType(BondType.SINGLE) 372 b.SetIsAromatic(False) 373 # set the bonds of mol to SINGLE 374 for b in refmol2.GetBonds(): 375 b.SetBondType(BondType.SINGLE) 376 b.SetIsAromatic(False) 377 # set atom charges to zero; 378 for a in refmol2.GetAtoms(): 379 a.SetFormalCharge(0) 380 for a in mol2.GetAtoms(): 381 a.SetFormalCharge(0) 382 383 matching = mol2.GetSubstructMatches(refmol2, uniquify=False) 384 # do the molecules match now? 385 if matching: 386 if len(matching) > 1: 387 logger.warning("More than one matching pattern found - picking one") 388 matching = matching[0] 389 # apply matching: set bond properties 390 for b in refmol.GetBonds(): 391 atom1 = matching[b.GetBeginAtomIdx()] 392 atom2 = matching[b.GetEndAtomIdx()] 393 b2 = mol2.GetBondBetweenAtoms(atom1, atom2) 394 b2.SetBondType(b.GetBondType()) 395 b2.SetIsAromatic(b.GetIsAromatic()) 396 # apply matching: set atom properties 397 for a in refmol.GetAtoms(): 398 a2 = mol2.GetAtomWithIdx(matching[a.GetIdx()]) 399 a2.SetHybridization(a.GetHybridization()) 400 a2.SetIsAromatic(a.GetIsAromatic()) 401 a2.SetNumExplicitHs(a.GetNumExplicitHs()) 402 a2.SetFormalCharge(a.GetFormalCharge()) 403 SanitizeMol(mol2) 404 if hasattr(mol2, '__sssAtoms'): 405 mol2.__sssAtoms = None # we don't want all bonds highlighted 406 else: 407 raise ValueError("No matching found") 408 return mol2
409 410 # ------------------------------------ 411 # 412 # doctest boilerplate 413 #
414 -def _runDoctests(verbose=None): # pragma: nocover
415 import sys 416 import doctest 417 failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) 418 sys.exit(failed) 419 420 421 if __name__ == '__main__': # pragma: nocover 422 _runDoctests() 423