Package rdkit :: Package Chem :: Package Pharm2D :: Module Utils
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.Pharm2D.Utils

  1  # 
  2  # Copyright (C) 2002-2006 greg Landrum and Rational Discovery LLC 
  3  # 
  4  #   @@ All Rights Reserved @@ 
  5  #  This file is part of the RDKit. 
  6  #  The contents are covered by the terms of the BSD license 
  7  #  which is included in the file license.txt, found at the root 
  8  #  of the RDKit source tree. 
  9  # 
 10  """ utility functionality for the 2D pharmacophores code 
 11   
 12    See Docs/Chem/Pharm2D.triangles.jpg for an illustration of the way 
 13    pharmacophores are broken into triangles and labelled. 
 14   
 15    See Docs/Chem/Pharm2D.signatures.jpg for an illustration of bit 
 16    numbering 
 17   
 18  """ 
 19  from __future__ import print_function, division 
 20   
 21  import itertools 
 22   
 23  # 
 24  #  number of points in a scaffold -> sequence of distances (p1,p2) in 
 25  #   the scaffold 
 26  # 
 27  nPointDistDict = { 
 28    2: ((0, 1), ), 
 29    3: ((0, 1), (0, 2), (1, 2)), 
 30    4: ((0, 1), (0, 2), (0, 3), (1, 2), (2, 3)), 
 31    5: ((0, 1), (0, 2), (0, 3), (0, 4), (1, 2), (2, 3), (3, 4)), 
 32    6: ((0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (1, 2), (2, 3), (3, 4), (4, 5)), 
 33    7: ((0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6)), 
 34    8: ((0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), (1, 2), (2, 3), (3, 4), (4, 5), 
 35        (5, 6), (6, 7)), 
 36    9: ((0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), (0, 8), (1, 2), (2, 3), (3, 4), 
 37        (4, 5), (5, 6), (6, 7), (7, 8)), 
 38    10: ((0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), (0, 8), (0, 9), (1, 2), (2, 3), 
 39         (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9)), 
 40  } 
 41   
 42  # 
 43  #  number of distances in a scaffold -> number of points in the scaffold 
 44  # 
 45  nDistPointDict = { 
 46    1: 2, 
 47    3: 3, 
 48    5: 4, 
 49    7: 5, 
 50    9: 6, 
 51    11: 7, 
 52    13: 8, 
 53    15: 9, 
 54    17: 10, 
 55  } 
 56   
 57  _trianglesInPharmacophore = {} 
 58   
 59   
60 -def GetTriangles(nPts):
61 """ returns a tuple with the distance indices for 62 triangles composing an nPts-pharmacophore 63 64 """ 65 global _trianglesInPharmacophore 66 if nPts < 3: 67 return [] 68 res = _trianglesInPharmacophore.get(nPts, []) 69 if not res: 70 idx1, idx2, idx3 = (0, 1, nPts - 1) 71 while idx1 < nPts - 2: 72 res.append((idx1, idx2, idx3)) 73 idx1 += 1 74 idx2 += 1 75 idx3 += 1 76 res = tuple(res) 77 _trianglesInPharmacophore[nPts] = res 78 return res
79 80
81 -def _fact(x):
82 if x <= 1: 83 return 1 84 85 accum = 1 86 for i in range(x): 87 accum *= i + 1 88 return accum
89 90
91 -def BinsTriangleInequality(d1, d2, d3):
92 """ checks the triangle inequality for combinations 93 of distance bins. 94 95 the general triangle inequality is: 96 d1 + d2 >= d3 97 the conservative binned form of this is: 98 d1(upper) + d2(upper) >= d3(lower) 99 100 """ 101 if d1[1] + d2[1] < d3[0]: 102 return False 103 if d2[1] + d3[1] < d1[0]: 104 return False 105 if d3[1] + d1[1] < d2[0]: 106 return False 107 108 return True
109 110
111 -def ScaffoldPasses(combo, bins=None):
112 """ checks the scaffold passed in to see if all 113 contributing triangles can satisfy the triangle inequality 114 115 the scaffold itself (encoded in combo) is a list of binned distances 116 117 """ 118 # this is the number of points in the pharmacophore 119 nPts = nDistPointDict[len(combo)] 120 tris = GetTriangles(nPts) 121 for tri in tris: 122 ds = [bins[combo[x]] for x in tri] 123 if not BinsTriangleInequality(ds[0], ds[1], ds[2]): 124 return False 125 return True
126 127 128 _numCombDict = {} 129 130
131 -def NumCombinations(nItems, nSlots):
132 """ returns the number of ways to fit nItems into nSlots 133 134 We assume that (x,y) and (y,x) are equivalent, and 135 (x,x) is allowed. 136 137 General formula is, for N items and S slots: 138 res = (N+S-1)! / ( (N-1)! * S! ) 139 140 """ 141 global _numCombDict 142 res = _numCombDict.get((nItems, nSlots), -1) 143 if res == -1: 144 res = _fact(nItems + nSlots - 1) // (_fact(nItems - 1) * _fact(nSlots)) 145 _numCombDict[(nItems, nSlots)] = res 146 return res
147 148 149 _verbose = 0 150 151 _countCache = {} 152 153
154 -def CountUpTo(nItems, nSlots, vs, idx=0, startAt=0):
155 """ Figures out where a given combination of indices would 156 occur in the combinatorial explosion generated by _GetIndexCombinations_ 157 158 **Arguments** 159 160 - nItems: the number of items to distribute 161 162 - nSlots: the number of slots in which to distribute them 163 164 - vs: a sequence containing the values to find 165 166 - idx: used in the recursion 167 168 - startAt: used in the recursion 169 170 **Returns** 171 172 an integer 173 174 """ 175 global _countCache 176 if _verbose: 177 print(' ' * idx, 'CountUpTo(%d)' % idx, vs[idx], startAt) 178 if idx == 0 and (nItems, nSlots, tuple(vs)) in _countCache: 179 return _countCache[(nItems, nSlots, tuple(vs))] 180 elif idx >= nSlots: 181 accum = 0 182 elif idx == nSlots - 1: 183 accum = vs[idx] - startAt 184 else: 185 accum = 0 186 # get the digit at idx correct 187 for i in range(startAt, vs[idx]): 188 nLevsUnder = nSlots - idx - 1 189 nValsOver = nItems - i 190 if _verbose: 191 print(' ' * idx, ' ', i, nValsOver, nLevsUnder, NumCombinations(nValsOver, nLevsUnder)) 192 accum += NumCombinations(nValsOver, nLevsUnder) 193 accum += CountUpTo(nItems, nSlots, vs, idx + 1, vs[idx]) 194 if _verbose: 195 print(' ' * idx, '>', accum) 196 if idx == 0: 197 _countCache[(nItems, nSlots, tuple(vs))] = accum 198 return accum
199 200 201 _indexCombinations = {} 202 203
204 -def GetIndexCombinations(nItems, nSlots, slot=0, lastItemVal=0):
205 """ Generates all combinations of nItems in nSlots without including 206 duplicates 207 208 **Arguments** 209 210 - nItems: the number of items to distribute 211 212 - nSlots: the number of slots in which to distribute them 213 214 - slot: used in recursion 215 216 - lastItemVal: used in recursion 217 218 **Returns** 219 220 a list of lists 221 222 """ 223 global _indexCombinations 224 if not slot and (nItems, nSlots) in _indexCombinations: 225 res = _indexCombinations[(nItems, nSlots)] 226 elif slot >= nSlots: 227 res = [] 228 elif slot == nSlots - 1: 229 res = [[x] for x in range(lastItemVal, nItems)] 230 else: 231 res = [] 232 for x in range(lastItemVal, nItems): 233 tmp = GetIndexCombinations(nItems, nSlots, slot + 1, x) 234 for entry in tmp: 235 res.append([x] + entry) 236 if not slot: 237 _indexCombinations[(nItems, nSlots)] = res 238 return res
239 240
241 -def GetAllCombinations(choices, noDups=1, which=0):
242 """ Does the combinatorial explosion of the possible combinations 243 of the elements of _choices_. 244 245 **Arguments** 246 247 - choices: sequence of sequences with the elements to be enumerated 248 249 - noDups: (optional) if this is nonzero, results with duplicates, 250 e.g. (1,1,0), will not be generated 251 252 - which: used in recursion 253 254 **Returns** 255 256 a list of lists 257 258 >>> GetAllCombinations([(0,),(1,),(2,)]) 259 [[0, 1, 2]] 260 >>> GetAllCombinations([(0,),(1,3),(2,)]) 261 [[0, 1, 2], [0, 3, 2]] 262 263 >>> GetAllCombinations([(0,1),(1,3),(2,)]) 264 [[0, 1, 2], [0, 3, 2], [1, 3, 2]] 265 266 """ 267 if which >= len(choices): 268 res = [] 269 elif which == len(choices) - 1: 270 res = [[x] for x in choices[which]] 271 else: 272 res = [] 273 tmp = GetAllCombinations(choices, noDups=noDups, which=which + 1) 274 for thing in choices[which]: 275 for other in tmp: 276 if not noDups or thing not in other: 277 res.append([thing] + other) 278 return res
279 280
281 -def GetUniqueCombinations(choices, classes, which=0):
282 """ Does the combinatorial explosion of the possible combinations 283 of the elements of _choices_. 284 285 """ 286 # print(choices, classes) 287 assert len(choices) == len(classes) 288 if which >= len(choices): 289 res = [] 290 elif which == len(choices) - 1: 291 res = [[(classes[which], x)] for x in choices[which]] 292 else: 293 res = [] 294 tmp = GetUniqueCombinations(choices, classes, which=which + 1) 295 for thing in choices[which]: 296 for other in tmp: 297 idxThere = 0 298 for x in other: 299 if x[1] == thing: 300 idxThere += 1 301 if not idxThere: 302 newL = [(classes[which], thing)] + other 303 newL.sort() 304 if newL not in res: 305 res.append(newL) 306 return res
307 308
309 -def GetUniqueCombinations_new(choices, classes, which=0):
310 """ Does the combinatorial explosion of the possible combinations 311 of the elements of _choices_. 312 313 """ 314 # print(choices, classes) 315 assert len(choices) == len(classes) 316 combos = set() 317 for choice in itertools.product(*choices): 318 # If a choice occurs in more than one of the fields, we ignore this case 319 if len(set(choice)) != len(choice): 320 continue 321 combos.add(tuple(sorted((cls, ch) for cls, ch in zip(classes, choice)))) 322 return [list(combo) for combo in sorted(combos)]
323 324
325 -def UniquifyCombinations(combos):
326 """ uniquifies the combinations in the argument 327 328 **Arguments**: 329 330 - combos: a sequence of sequences 331 332 **Returns** 333 334 - a list of tuples containing the unique combos 335 336 """ 337 resD = {} 338 for combo in combos: 339 k = combo[:] 340 k.sort() 341 resD[tuple(k)] = tuple(combo) 342 return list(resD.values())
343 344
345 -def GetPossibleScaffolds(nPts, bins, useTriangleInequality=True):
346 """ gets all realizable scaffolds (passing the triangle inequality) with the 347 given number of points and returns them as a list of tuples 348 349 """ 350 if nPts < 2: 351 res = 0 352 elif nPts == 2: 353 res = [(x, ) for x in range(len(bins))] 354 else: 355 nDists = len(nPointDistDict[nPts]) 356 combos = GetAllCombinations([range(len(bins))] * nDists, noDups=0) 357 res = [] 358 for combo in combos: 359 if not useTriangleInequality or ScaffoldPasses(combo, bins): 360 res.append(tuple(combo)) 361 return res
362 363
364 -def OrderTriangle(featIndices, dists):
365 """ 366 put the distances for a triangle into canonical order 367 368 It's easy if the features are all different: 369 >>> OrderTriangle([0,2,4],[1,2,3]) 370 ([0, 2, 4], [1, 2, 3]) 371 372 It's trickiest if they are all the same: 373 >>> OrderTriangle([0,0,0],[1,2,3]) 374 ([0, 0, 0], [3, 2, 1]) 375 >>> OrderTriangle([0,0,0],[2,1,3]) 376 ([0, 0, 0], [3, 2, 1]) 377 >>> OrderTriangle([0,0,0],[1,3,2]) 378 ([0, 0, 0], [3, 2, 1]) 379 >>> OrderTriangle([0,0,0],[3,1,2]) 380 ([0, 0, 0], [3, 2, 1]) 381 >>> OrderTriangle([0,0,0],[3,2,1]) 382 ([0, 0, 0], [3, 2, 1]) 383 384 >>> OrderTriangle([0,0,1],[3,2,1]) 385 ([0, 0, 1], [3, 2, 1]) 386 >>> OrderTriangle([0,0,1],[1,3,2]) 387 ([0, 0, 1], [1, 3, 2]) 388 >>> OrderTriangle([0,0,1],[1,2,3]) 389 ([0, 0, 1], [1, 3, 2]) 390 >>> OrderTriangle([0,0,1],[1,3,2]) 391 ([0, 0, 1], [1, 3, 2]) 392 393 """ 394 if len(featIndices) != 3: 395 raise ValueError('bad indices') 396 if len(dists) != 3: 397 raise ValueError('bad dists') 398 399 fs = set(featIndices) 400 if len(fs) == 3: 401 return featIndices, dists 402 403 dSums = [0] * 3 404 dSums[0] = dists[0] + dists[1] 405 dSums[1] = dists[0] + dists[2] 406 dSums[2] = dists[1] + dists[2] 407 mD = max(dSums) 408 if len(fs) == 1: 409 if dSums[0] == mD: 410 if dists[0] > dists[1]: 411 ireorder = (0, 1, 2) 412 dreorder = (0, 1, 2) 413 else: 414 ireorder = (0, 2, 1) 415 dreorder = (1, 0, 2) 416 elif dSums[1] == mD: 417 if dists[0] > dists[2]: 418 ireorder = (1, 0, 2) 419 dreorder = (0, 2, 1) 420 else: 421 ireorder = (1, 2, 0) 422 dreorder = (2, 0, 1) 423 else: 424 if dists[1] > dists[2]: 425 ireorder = (2, 0, 1) 426 dreorder = (1, 2, 0) 427 else: 428 ireorder = (2, 1, 0) 429 dreorder = (2, 1, 0) 430 else: 431 # two classes 432 if featIndices[0] == featIndices[1]: 433 if dists[1] > dists[2]: 434 ireorder = (0, 1, 2) 435 dreorder = (0, 1, 2) 436 else: 437 ireorder = (1, 0, 2) 438 dreorder = (0, 2, 1) 439 elif featIndices[0] == featIndices[2]: 440 if dists[0] > dists[2]: 441 ireorder = (0, 1, 2) 442 dreorder = (0, 1, 2) 443 else: 444 ireorder = (2, 1, 0) 445 dreorder = (2, 1, 0) 446 else: # featIndices[1]==featIndices[2]: 447 if dists[0] > dists[1]: 448 ireorder = (0, 1, 2) 449 dreorder = (0, 1, 2) 450 else: 451 ireorder = (0, 2, 1) 452 dreorder = (1, 0, 2) 453 dists = [dists[x] for x in dreorder] 454 featIndices = [featIndices[x] for x in ireorder] 455 return featIndices, dists
456 457 458 # ------------------------------------ 459 # 460 # doctest boilerplate 461 #
462 -def _runDoctests(verbose=None): # pragma: nocover
463 import sys 464 import doctest 465 failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose) 466 sys.exit(failed) 467 468 469 if __name__ == '__main__': # pragma: nocover 470 _runDoctests() 471