Package rdkit :: Package Chem :: Module QED
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.QED

  1  # 
  2  #  Copyright (c) 2009-2017, Novartis Institutes for BioMedical Research Inc. 
  3  #  All rights reserved. 
  4  # 
  5  # Redistribution and use in source and binary forms, with or without 
  6  # modification, are permitted provided that the following conditions are 
  7  # met: 
  8  # 
  9  #     * Redistributions of source code must retain the above copyright 
 10  #       notice, this list of conditions and the following disclaimer. 
 11  #     * Redistributions in binary form must reproduce the above 
 12  #       copyright notice, this list of conditions and the following 
 13  #       disclaimer in the documentation and/or other materials provided 
 14  #       with the distribution. 
 15  #     * Neither the name of Novartis Institutes for BioMedical Research Inc. 
 16  #       nor the names of its contributors may be used to endorse or promote 
 17  #       products derived from this software without specific prior written permission. 
 18  # 
 19  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 20  # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 21  # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 22  # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 23  # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 24  # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 25  # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 26  # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 27  # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 28  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 29  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 30  # 
 31  """ 
 32   
 33  QED stands for quantitative estimation of drug-likeness and the concept was for the first time 
 34  introduced by Richard Bickerton and coworkers [1]. The empirical rationale of the QED measure 
 35  reflects the underlying distribution of molecular properties including molecular weight, logP, 
 36  topological polar surface area, number of hydrogen bond donors and acceptors, the number of 
 37  aromatic rings and rotatable bonds, and the presence of unwanted chemical functionalities. 
 38   
 39  The QED results as generated by the RDKit-based implementation of Biscu-it(tm) are not completely 
 40  identical to those from the original publication [1]. These differences are a consequence of 
 41  differences within the underlying calculated property calculators used in both methods. For 
 42  example, discrepancies can be noted in the results from the logP calculations, nevertheless 
 43  despite the fact that both approaches (Pipeline Pilot in the original publication and RDKit 
 44  in our Biscu-it(tm) implementation) mention to use the Wildmann and Crippen methodology for the 
 45  calculation of their logP-values [2]. However, the differences in the resulting QED-values 
 46  are very small and are not compromising the usefulness of using Qed in your daily research. 
 47   
 48  [1] Bickerton, G.R.; Paolini, G.V.; Besnard, J.; Muresan, S.; Hopkins, A.L. (2012) 
 49      'Quantifying the chemical beauty of drugs', 
 50      Nature Chemistry, 4, 90-98 [http://dx.doi.org/10.1038/nchem.1243] 
 51   
 52  History: 
 53    2012-04 Adapted to internal RDkit implementation 
 54    2013-05 moved to rdkit.Chem.QED 
 55  """ 
 56  from collections import namedtuple 
 57  import math 
 58   
 59  from rdkit import Chem 
 60  from rdkit.Chem import MolSurf, Crippen 
 61  from rdkit.Chem import rdMolDescriptors as rdmd 
 62  from rdkit.Chem.ChemUtils.DescriptorUtilities import setDescriptorVersion 
 63   
 64   
 65  QEDproperties = namedtuple('QEDproperties', 'MW,ALOGP,HBA,HBD,PSA,ROTB,AROM,ALERTS') 
 66  ADSparameter = namedtuple('ADSparameter', 'A,B,C,D,E,F,DMAX') 
 67   
 68  WEIGHT_MAX = QEDproperties(0.50, 0.25, 0.00, 0.50, 0.00, 0.50, 0.25, 1.00) 
 69  WEIGHT_MEAN = QEDproperties(0.66, 0.46, 0.05, 0.61, 0.06, 0.65, 0.48, 0.95) 
 70  WEIGHT_NONE = QEDproperties(1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00) 
 71   
 72  AliphaticRings = Chem.MolFromSmarts('[$([A;R][!a])]') 
 73   
 74  # 
 75  AcceptorSmarts = [ 
 76    '[oH0;X2]', 
 77    '[OH1;X2;v2]', 
 78    '[OH0;X2;v2]', 
 79    '[OH0;X1;v2]', 
 80    '[O-;X1]', 
 81    '[SH0;X2;v2]', 
 82    '[SH0;X1;v2]', 
 83    '[S-;X1]', 
 84    '[nH0;X2]', 
 85    '[NH0;X1;v3]', 
 86    '[$([N;+0;X3;v3]);!$(N[C,S]=O)]' 
 87  ] 
 88  Acceptors = [Chem.MolFromSmarts(hba) for hba in AcceptorSmarts] 
 89   
 90  # 
 91  StructuralAlertSmarts = [ 
 92    '*1[O,S,N]*1', 
 93    '[S,C](=[O,S])[F,Br,Cl,I]', 
 94    '[CX4][Cl,Br,I]', 
 95    '[#6]S(=O)(=O)O[#6]', 
 96    '[$([CH]),$(CC)]#CC(=O)[#6]', 
 97    '[$([CH]),$(CC)]#CC(=O)O[#6]', 
 98    'n[OH]', 
 99    '[$([CH]),$(CC)]#CS(=O)(=O)[#6]', 
100    'C=C(C=O)C=O', 
101    'n1c([F,Cl,Br,I])cccc1', 
102    '[CH1](=O)', 
103    '[#8][#8]', 
104    '[C;!R]=[N;!R]', 
105    '[N!R]=[N!R]', 
106    '[#6](=O)[#6](=O)', 
107    '[#16][#16]', 
108    '[#7][NH2]', 
109    'C(=O)N[NH2]', 
110    '[#6]=S', 
111    '[$([CH2]),$([CH][CX4]),$(C([CX4])[CX4])]=[$([CH2]),$([CH][CX4]),$(C([CX4])[CX4])]', 
112    'C1(=[O,N])C=CC(=[O,N])C=C1', 
113    'C1(=[O,N])C(=[O,N])C=CC=C1', 
114    'a21aa3a(aa1aaaa2)aaaa3', 
115    'a31a(a2a(aa1)aaaa2)aaaa3', 
116    'a1aa2a3a(a1)A=AA=A3=AA=A2', 
117    'c1cc([NH2])ccc1', 
118    '[Hg,Fe,As,Sb,Zn,Se,se,Te,B,Si,Na,Ca,Ge,Ag,Mg,K,Ba,Sr,Be,Ti,Mo,Mn,Ru,Pd,Ni,Cu,Au,Cd,' + 
119    'Al,Ga,Sn,Rh,Tl,Bi,Nb,Li,Pb,Hf,Ho]', 
120    'I', 
121    'OS(=O)(=O)[O-]', 
122    '[N+](=O)[O-]', 
123    'C(=O)N[OH]', 
124    'C1NC(=O)NC(=O)1', 
125    '[SH]', 
126    '[S-]', 
127    'c1ccc([Cl,Br,I,F])c([Cl,Br,I,F])c1[Cl,Br,I,F]', 
128    'c1cc([Cl,Br,I,F])cc([Cl,Br,I,F])c1[Cl,Br,I,F]', 
129    '[CR1]1[CR1][CR1][CR1][CR1][CR1][CR1]1', 
130    '[CR1]1[CR1][CR1]cc[CR1][CR1]1', 
131    '[CR2]1[CR2][CR2][CR2][CR2][CR2][CR2][CR2]1', 
132    '[CR2]1[CR2][CR2]cc[CR2][CR2][CR2]1', 
133    '[CH2R2]1N[CH2R2][CH2R2][CH2R2][CH2R2][CH2R2]1', 
134    '[CH2R2]1N[CH2R2][CH2R2][CH2R2][CH2R2][CH2R2][CH2R2]1', 
135    'C#C', 
136    '[OR2,NR2]@[CR2]@[CR2]@[OR2,NR2]@[CR2]@[CR2]@[OR2,NR2]', 
137    '[$([N+R]),$([n+R]),$([N+]=C)][O-]', 
138    '[#6]=N[OH]', 
139    '[#6]=NOC=O', 
140    '[#6](=O)[CX4,CR0X3,O][#6](=O)', 
141    'c1ccc2c(c1)ccc(=O)o2', 
142    '[O+,o+,S+,s+]', 
143    'N=C=O', 
144    '[NX3,NX4][F,Cl,Br,I]', 
145    'c1ccccc1OC(=O)[#6]', 
146    '[CR0]=[CR0][CR0]=[CR0]', 
147    '[C+,c+,C-,c-]', 
148    'N=[N+]=[N-]', 
149    'C12C(NC(N1)=O)CSC2', 
150    'c1c([OH])c([OH,NH2,NH])ccc1', 
151    'P', 
152    '[N,O,S]C#N', 
153    'C=C=O', 
154    '[Si][F,Cl,Br,I]', 
155    '[SX2]O', 
156    '[SiR0,CR0](c1ccccc1)(c2ccccc2)(c3ccccc3)', 
157    'O1CCCCC1OC2CCC3CCCCC3C2', 
158    'N=[CR0][N,n,O,S]', 
159    '[cR2]1[cR2][cR2]([Nv3X3,Nv4X4])[cR2][cR2][cR2]1[cR2]2[cR2][cR2][cR2]([Nv3X3,Nv4X4])[cR2][cR2]2', 
160    'C=[C!r]C#N', 
161    '[cR2]1[cR2]c([N+0X3R0,nX3R0])c([N+0X3R0,nX3R0])[cR2][cR2]1', 
162    '[cR2]1[cR2]c([N+0X3R0,nX3R0])[cR2]c([N+0X3R0,nX3R0])[cR2]1', 
163    '[cR2]1[cR2]c([N+0X3R0,nX3R0])[cR2][cR2]c1([N+0X3R0,nX3R0])', 
164    '[OH]c1ccc([OH,NH2,NH])cc1', 
165    'c1ccccc1OC(=O)O', 
166    '[SX2H0][N]', 
167    'c12ccccc1(SC(S)=N2)', 
168    'c12ccccc1(SC(=S)N2)', 
169    'c1nnnn1C=O', 
170    's1c(S)nnc1NC=O', 
171    'S1C=CSC1=S', 
172    'C(=O)Onnn', 
173    'OS(=O)(=O)C(F)(F)F', 
174    'N#CC[OH]', 
175    'N#CC(=O)', 
176    'S(=O)(=O)C#N', 
177    'N[CH2]C#N', 
178    'C1(=O)NCC1', 
179    'S(=O)(=O)[O-,OH]', 
180    'NC[F,Cl,Br,I]', 
181    'C=[C!r]O', 
182    '[NX2+0]=[O+0]', 
183    '[OR0,NR0][OR0,NR0]', 
184    'C(=O)O[C,H1].C(=O)O[C,H1].C(=O)O[C,H1]', 
185    '[CX2R0][NX3R0]', 
186    'c1ccccc1[C;!R]=[C;!R]c2ccccc2', 
187    '[NX3R0,NX4R0,OR0,SX2R0][CX4][NX3R0,NX4R0,OR0,SX2R0]', 
188    '[s,S,c,C,n,N,o,O]~[n+,N+](~[s,S,c,C,n,N,o,O])(~[s,S,c,C,n,N,o,O])~[s,S,c,C,n,N,o,O]', 
189    '[s,S,c,C,n,N,o,O]~[nX3+,NX3+](~[s,S,c,C,n,N])~[s,S,c,C,n,N]', 
190    '[*]=[N+]=[*]', 
191    '[SX3](=O)[O-,OH]', 
192    'N#N', 
193    'F.F.F.F', 
194    '[R0;D2][R0;D2][R0;D2][R0;D2]', 
195    '[cR,CR]~C(=O)NC(=O)~[cR,CR]', 
196    'C=!@CC=[O,S]', 
197    '[#6,#8,#16][#6](=O)O[#6]', 
198    'c[C;R0](=[O,S])[#6]', 
199    'c[SX2][C;!R]', 
200    'C=C=C', 
201    'c1nc([F,Cl,Br,I,S])ncc1', 
202    'c1ncnc([F,Cl,Br,I,S])c1', 
203    'c1nc(c2c(n1)nc(n2)[F,Cl,Br,I])', 
204    '[#6]S(=O)(=O)c1ccc(cc1)F', 
205    '[15N]', 
206    '[13C]', 
207    '[18O]', 
208    '[34S]' 
209  ] 
210   
211  StructuralAlerts = [Chem.MolFromSmarts(smarts) for smarts in StructuralAlertSmarts] 
212   
213  adsParameters = { 
214    'MW': ADSparameter(A=2.817065973, B=392.5754953, C=290.7489764, D=2.419764353, E=49.22325677, 
215                       F=65.37051707, DMAX=104.9805561), 
216    'ALOGP': ADSparameter(A=3.172690585, B=137.8624751, C=2.534937431, D=4.581497897, E=0.822739154, 
217                          F=0.576295591, DMAX=131.3186604), 
218    'HBA': ADSparameter(A=2.948620388, B=160.4605972, C=3.615294657, D=4.435986202, E=0.290141953, 
219                        F=1.300669958, DMAX=148.7763046), 
220    'HBD': ADSparameter(A=1.618662227, B=1010.051101, C=0.985094388, D=0.000000001, E=0.713820843, 
221                        F=0.920922555, DMAX=258.1632616), 
222    'PSA': ADSparameter(A=1.876861559, B=125.2232657, C=62.90773554, D=87.83366614, E=12.01999824, 
223                        F=28.51324732, DMAX=104.5686167), 
224    'ROTB': ADSparameter(A=0.010000000, B=272.4121427, C=2.558379970, D=1.565547684, E=1.271567166, 
225                         F=2.758063707, DMAX=105.4420403), 
226    'AROM': ADSparameter(A=3.217788970, B=957.7374108, C=2.274627939, D=0.000000001, E=1.317690384, 
227                         F=0.375760881, DMAX=312.3372610), 
228    'ALERTS': ADSparameter(A=0.010000000, B=1199.094025, C=-0.09002883, D=0.000000001, E=0.185904477, 
229                           F=0.875193782, DMAX=417.7253140), 
230  } 
231 232 233 -def ads(x, adsParameter):
234 """ ADS function """ 235 p = adsParameter 236 exp1 = 1 + math.exp(-1 * (x - p.C + p.D / 2) / p.E) 237 exp2 = 1 + math.exp(-1 * (x - p.C - p.D / 2) / p.F) 238 dx = p.A + p.B / exp1 * (1 - 1 / exp2) 239 return dx / p.DMAX
240
241 242 -def properties(mol):
243 """ 244 Calculates the properties that are required to calculate the QED descriptor. 245 """ 246 if mol is None: 247 raise ValueError('You need to provide a mol argument.') 248 mol = Chem.RemoveHs(mol) 249 qedProperties = QEDproperties( 250 MW=rdmd._CalcMolWt(mol), 251 ALOGP=Crippen.MolLogP(mol), 252 HBA=sum(len(mol.GetSubstructMatches(pattern)) for pattern in Acceptors 253 if mol.HasSubstructMatch(pattern)), 254 HBD=rdmd.CalcNumHBD(mol), 255 PSA=MolSurf.TPSA(mol), 256 ROTB=rdmd.CalcNumRotatableBonds(mol, rdmd.NumRotatableBondsOptions.Strict), 257 AROM=Chem.GetSSSR(Chem.DeleteSubstructs(Chem.Mol(mol), AliphaticRings)), 258 ALERTS=sum(1 for alert in StructuralAlerts if mol.HasSubstructMatch(alert)), 259 ) 260 # The replacement 261 # AROM=Lipinski.NumAromaticRings(mol), 262 # is not identical. The expression above tends to count more rings 263 # N1C2=CC=CC=C2SC3=C1C=CC4=C3C=CC=C4 264 # OC1=C(O)C=C2C(=C1)OC3=CC(=O)C(=CC3=C2C4=CC=CC=C4)O 265 # CC(C)C1=CC2=C(C)C=CC2=C(C)C=C1 uses 2, should be 0 ? 266 return qedProperties
267
268 269 @setDescriptorVersion(version='1.1.0') 270 -def qed(mol, w=WEIGHT_MEAN, qedProperties=None):
271 """ Calculate the weighted sum of ADS mapped properties 272 273 some examples from the QED paper, reference values from Peter G's original implementation 274 >>> m = Chem.MolFromSmiles('N=C(CCSCc1csc(N=C(N)N)n1)NS(N)(=O)=O') 275 >>> qed(m) 276 0.253... 277 >>> m = Chem.MolFromSmiles('CNC(=NCCSCc1nc[nH]c1C)NC#N') 278 >>> qed(m) 279 0.234... 280 >>> m = Chem.MolFromSmiles('CCCCCNC(=N)NN=Cc1c[nH]c2ccc(CO)cc12') 281 >>> qed(m) 282 0.234... 283 """ 284 if qedProperties is None: 285 qedProperties = properties(mol) 286 d = [ads(pi, adsParameters[name]) for name, pi in qedProperties._asdict().items()] 287 t = sum(wi * math.log(di) for wi, di in zip(w, d)) 288 return math.exp(t / sum(w))
289
290 291 -def weights_max(mol):
292 """ 293 Calculates the QED descriptor using maximal descriptor weights. 294 """ 295 return qed(mol, w=WEIGHT_MAX)
296
297 298 -def weights_mean(mol):
299 """ 300 Calculates the QED descriptor using average descriptor weights. 301 """ 302 return qed(mol, w=WEIGHT_MEAN)
303
304 305 -def weights_none(mol):
306 """ 307 Calculates the QED descriptor using unit weights. 308 """ 309 return qed(mol, w=WEIGHT_NONE)
310
311 312 -def default(mol):
313 """ 314 Calculates the QED descriptor using average descriptor weights. 315 """ 316 return weights_mean(mol)
317
318 319 # ------------------------------------ 320 # 321 # doctest boilerplate 322 # 323 -def _runDoctests(verbose=None): # pragma: nocover
324 import sys 325 import doctest 326 failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) 327 sys.exit(failed) 328 329 330 if __name__ == '__main__': # pragma: nocover 331 _runDoctests() 332