Package rdkit :: Package Chem :: Package MolDb :: Module Loader_sa
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.MolDb.Loader_sa

  1  # $Id$ 
  2  # 
  3  #  Copyright (C) 2007-2009 Greg Landrum 
  4  #   @@ All Rights Reserved @@ 
  5  #  This file is part of the RDKit. 
  6  #  The contents are covered by the terms of the BSD license 
  7  #  which is included in the file license.txt, found at the root 
  8  #  of the RDKit source tree. 
  9  # 
 10  import sqlalchemy 
 11   
 12  from rdkit import Chem 
 13  from rdkit.Chem import AllChem 
 14  from rdkit.Chem import Lipinski, Descriptors, Crippen 
 15  from rdkit.Dbase.DbConnection import DbConnect 
 16  from rdkit.Dbase import DbModule 
 17  import os 
 18   
 19  from sqlalchemy.ext.declarative import declarative_base 
 20  from sqlalchemy import Table, Column, MetaData 
 21  from sqlalchemy import Integer, Text, String, ForeignKey, Binary, DateTime, Float 
 22  from sqlalchemy.orm import relation, mapper, sessionmaker, backref 
 23  from sqlalchemy import create_engine 
 24   
 25  decBase = declarative_base() 
 26   
 27   
28 -class Compound(decBase):
29 __tablename__ = 'molecules' 30 guid = Column(Integer, primary_key=True) 31 molpkl = Column(Binary)
32 33
34 -def RegisterSchema(dbUrl, echo=False):
35 engine = create_engine(dbUrl, echo=echo) 36 decBase.metadata.create_all(engine) 37 maker = sessionmaker(bind=engine) 38 return maker
39 40 41 ConnectToSchema = RegisterSchema 42 43
44 -def _ConnectToSchema(dbUrl, echo=False):
45 engine = create_engine(dbUrl, echo=echo) 46 meta 47 decBase.metadata.create_all(engine) 48 maker = sessionmaker(bind=engine) 49 return maker
50 51 #set up the logger: 52 53 import rdkit.RDLogger as logging 54 logger = logging.logger() 55 logger.setLevel(logging.INFO) 56 57
58 -def ProcessMol(session, mol, globalProps, nDone, nameProp='_Name', nameCol='compound_id', 59 redraw=False, keepHs=False, skipProps=False, addComputedProps=False, 60 skipSmiles=False):
61 if not mol: 62 raise ValueError('no molecule') 63 if keepHs: 64 Chem.SanitizeMol(mol) 65 try: 66 nm = mol.GetProp(nameProp) 67 except KeyError: 68 nm = None 69 if not nm: 70 nm = 'Mol_%d' % nDone 71 72 cmpd = Compound() 73 session.add(cmpd) 74 75 if redraw: 76 AllChem.Compute2DCoords(m) 77 78 if not skipSmiles: 79 cmpd.smiles = Chem.MolToSmiles(mol, True) 80 cmpd.molpkl = mol.ToBinary() 81 setattr(cmpd, nameCol, nm) 82 83 if not skipProps: 84 if addComputedProps: 85 cmpd.DonorCount = Lipinski.NumHDonors(mol) 86 cmpd.AcceptorCount = Lipinski.NumHAcceptors(mol) 87 cmpd.RotatableBondCount = Lipinski.NumRotatableBonds(mol) 88 cmpd.AMW = Descriptors.MolWt(mol) 89 cmpd.MolLogP = Crippen.MolLogP(mol) 90 pns = list(mol.GetPropNames()) 91 for pi, pn in enumerate(pns): 92 if pn.lower() == nameCol.lower(): 93 continue 94 pv = mol.GetProp(pn).strip() 95 if pn in globalProps: 96 setattr(cmpd, pn.lower(), pv) 97 return cmpd
98 99
100 -def LoadDb(suppl, dbName, nameProp='_Name', nameCol='compound_id', silent=False, redraw=False, 101 errorsTo=None, keepHs=False, defaultVal='N/A', skipProps=False, regName='molecules', 102 skipSmiles=False, maxRowsCached=-1, uniqNames=False, addComputedProps=False, 103 lazySupplier=False, numForPropScan=10, startAnew=True):
104 if not lazySupplier: 105 nMols = len(suppl) 106 else: 107 nMols = -1 108 if not silent: 109 logger.info("Generating molecular database in file %s" % dbName) 110 if not lazySupplier: 111 logger.info(" Processing %d molecules" % nMols) 112 113 globalProps = {} 114 if startAnew: 115 if os.path.exists(dbName): 116 for i in range(5): 117 try: 118 os.unlink(dbName) 119 break 120 except: 121 import time 122 time.sleep(2) 123 if os.path.exists(dbName): 124 raise IOError('could not delete old database %s' % dbName) 125 sIter = iter(suppl) 126 setattr(Compound, nameCol.lower(), Column(nameCol.lower(), String, default=defaultVal, 127 unique=uniqNames)) 128 if not skipSmiles: 129 Compound.smiles = Column(Text, unique=True) 130 if not skipProps: 131 while numForPropScan > 0: 132 try: 133 m = next(sIter) 134 except StopIteration: 135 numForPropScan = 0 136 break 137 if not m: 138 continue 139 for pn in m.GetPropNames(): 140 if pn.lower() == nameCol.lower(): 141 continue 142 if pn not in globalProps: 143 globalProps[pn] = 1 144 setattr(Compound, pn.lower(), Column(pn.lower(), String, default=defaultVal)) 145 numForPropScan -= 1 146 if addComputedProps: 147 Compound.DonorCount = Column(Integer) 148 Compound.AcceptorCount = Column(Integer) 149 Compound.RotatableBondCount = Column(Integer) 150 Compound.AMW = Column(Float) 151 Compound.MolLogP = Column(Float) 152 session = RegisterSchema('sqlite:///%s' % (dbName))() 153 154 nDone = 0 155 cache = [] 156 for m in suppl: 157 nDone += 1 158 if not m: 159 if errorsTo: 160 if hasattr(suppl, 'GetItemText'): 161 d = suppl.GetItemText(nDone - 1) 162 errorsTo.write(d) 163 else: 164 logger.warning('full error file support not complete') 165 continue 166 167 cmpd = ProcessMol(session, m, globalProps, nDone, nameProp=nameProp, nameCol=nameCol, 168 redraw=redraw, keepHs=keepHs, skipProps=skipProps, 169 addComputedProps=addComputedProps, skipSmiles=skipSmiles) 170 if cmpd is not None: 171 cache.append(cmpd) 172 173 if not silent and not nDone % 100: 174 logger.info(' done %d' % nDone) 175 try: 176 session.commit() 177 except Exception: 178 session.rollback() 179 for cmpd in cache: 180 try: 181 session.add(cmpd) 182 session.commit() 183 except Exception: 184 session.rollback() 185 except BaseException: 186 # Rollback even with KeyboardInterrupt 187 session.rollback() 188 raise 189 cache = [] 190 191 try: 192 session.commit() 193 except BaseException as exc: 194 import traceback 195 traceback.print_exc() 196 session.rollback() 197 for cmpd in cache: 198 try: 199 session.add(cmpd) 200 session.commit() 201 except Exception: 202 session.rollback() 203 except BaseException: 204 session.rollback() 205 raise 206 if not isinstance(exc, Exception): 207 # Re-raise on KeyboardInterrupt, SystemExit, etc. 208 raise exc
209 210 211 if __name__ == '__main__': 212 import sys 213 sdf = Chem.SDMolSupplier(sys.argv[1]) 214 db = sys.argv[2] 215 LoadDb(sdf, db, addComputedProps=False) 216 session = RegisterSchema('sqlite:///%s' % (db))() 217 print('>>>>', len(session.query(Compound).all())) 218