1
2
3
4
5
6
7
8
9
10 """ utility functionality for the 2D pharmacophores code
11
12 See Docs/Chem/Pharm2D.triangles.jpg for an illustration of the way
13 pharmacophores are broken into triangles and labelled.
14
15 See Docs/Chem/Pharm2D.signatures.jpg for an illustration of bit
16 numbering
17
18 """
19 from __future__ import print_function, division
20
21 import itertools
22
23
24
25
26
27 nPointDistDict = {
28 2: ((0, 1), ),
29 3: ((0, 1), (0, 2), (1, 2)),
30 4: ((0, 1), (0, 2), (0, 3), (1, 2), (2, 3)),
31 5: ((0, 1), (0, 2), (0, 3), (0, 4), (1, 2), (2, 3), (3, 4)),
32 6: ((0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (1, 2), (2, 3), (3, 4), (4, 5)),
33 7: ((0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6)),
34 8: ((0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), (1, 2), (2, 3), (3, 4), (4, 5),
35 (5, 6), (6, 7)),
36 9: ((0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), (0, 8), (1, 2), (2, 3), (3, 4),
37 (4, 5), (5, 6), (6, 7), (7, 8)),
38 10: ((0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), (0, 8), (0, 9), (1, 2), (2, 3),
39 (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9)),
40 }
41
42
43
44
45 nDistPointDict = {
46 1: 2,
47 3: 3,
48 5: 4,
49 7: 5,
50 9: 6,
51 11: 7,
52 13: 8,
53 15: 9,
54 17: 10,
55 }
56
57 _trianglesInPharmacophore = {}
58
59
61 """ returns a tuple with the distance indices for
62 triangles composing an nPts-pharmacophore
63
64 """
65 global _trianglesInPharmacophore
66 if nPts < 3:
67 return []
68 res = _trianglesInPharmacophore.get(nPts, [])
69 if not res:
70 idx1, idx2, idx3 = (0, 1, nPts - 1)
71 while idx1 < nPts - 2:
72 res.append((idx1, idx2, idx3))
73 idx1 += 1
74 idx2 += 1
75 idx3 += 1
76 res = tuple(res)
77 _trianglesInPharmacophore[nPts] = res
78 return res
79
80
82 if x <= 1:
83 return 1
84
85 accum = 1
86 for i in range(x):
87 accum *= i + 1
88 return accum
89
90
92 """ checks the triangle inequality for combinations
93 of distance bins.
94
95 the general triangle inequality is:
96 d1 + d2 >= d3
97 the conservative binned form of this is:
98 d1(upper) + d2(upper) >= d3(lower)
99
100 """
101 if d1[1] + d2[1] < d3[0]:
102 return False
103 if d2[1] + d3[1] < d1[0]:
104 return False
105 if d3[1] + d1[1] < d2[0]:
106 return False
107
108 return True
109
110
112 """ checks the scaffold passed in to see if all
113 contributing triangles can satisfy the triangle inequality
114
115 the scaffold itself (encoded in combo) is a list of binned distances
116
117 """
118
119 nPts = nDistPointDict[len(combo)]
120 tris = GetTriangles(nPts)
121 for tri in tris:
122 ds = [bins[combo[x]] for x in tri]
123 if not BinsTriangleInequality(ds[0], ds[1], ds[2]):
124 return False
125 return True
126
127
128 _numCombDict = {}
129
130
132 """ returns the number of ways to fit nItems into nSlots
133
134 We assume that (x,y) and (y,x) are equivalent, and
135 (x,x) is allowed.
136
137 General formula is, for N items and S slots:
138 res = (N+S-1)! / ( (N-1)! * S! )
139
140 """
141 global _numCombDict
142 res = _numCombDict.get((nItems, nSlots), -1)
143 if res == -1:
144 res = _fact(nItems + nSlots - 1) // (_fact(nItems - 1) * _fact(nSlots))
145 _numCombDict[(nItems, nSlots)] = res
146 return res
147
148
149 _verbose = 0
150
151 _countCache = {}
152
153
154 -def CountUpTo(nItems, nSlots, vs, idx=0, startAt=0):
155 """ Figures out where a given combination of indices would
156 occur in the combinatorial explosion generated by _GetIndexCombinations_
157
158 **Arguments**
159
160 - nItems: the number of items to distribute
161
162 - nSlots: the number of slots in which to distribute them
163
164 - vs: a sequence containing the values to find
165
166 - idx: used in the recursion
167
168 - startAt: used in the recursion
169
170 **Returns**
171
172 an integer
173
174 """
175 global _countCache
176 if _verbose:
177 print(' ' * idx, 'CountUpTo(%d)' % idx, vs[idx], startAt)
178 if idx == 0 and (nItems, nSlots, tuple(vs)) in _countCache:
179 return _countCache[(nItems, nSlots, tuple(vs))]
180 elif idx >= nSlots:
181 accum = 0
182 elif idx == nSlots - 1:
183 accum = vs[idx] - startAt
184 else:
185 accum = 0
186
187 for i in range(startAt, vs[idx]):
188 nLevsUnder = nSlots - idx - 1
189 nValsOver = nItems - i
190 if _verbose:
191 print(' ' * idx, ' ', i, nValsOver, nLevsUnder, NumCombinations(nValsOver, nLevsUnder))
192 accum += NumCombinations(nValsOver, nLevsUnder)
193 accum += CountUpTo(nItems, nSlots, vs, idx + 1, vs[idx])
194 if _verbose:
195 print(' ' * idx, '>', accum)
196 if idx == 0:
197 _countCache[(nItems, nSlots, tuple(vs))] = accum
198 return accum
199
200
201 _indexCombinations = {}
202
203
205 """ Generates all combinations of nItems in nSlots without including
206 duplicates
207
208 **Arguments**
209
210 - nItems: the number of items to distribute
211
212 - nSlots: the number of slots in which to distribute them
213
214 - slot: used in recursion
215
216 - lastItemVal: used in recursion
217
218 **Returns**
219
220 a list of lists
221
222 """
223 global _indexCombinations
224 if not slot and (nItems, nSlots) in _indexCombinations:
225 res = _indexCombinations[(nItems, nSlots)]
226 elif slot >= nSlots:
227 res = []
228 elif slot == nSlots - 1:
229 res = [[x] for x in range(lastItemVal, nItems)]
230 else:
231 res = []
232 for x in range(lastItemVal, nItems):
233 tmp = GetIndexCombinations(nItems, nSlots, slot + 1, x)
234 for entry in tmp:
235 res.append([x] + entry)
236 if not slot:
237 _indexCombinations[(nItems, nSlots)] = res
238 return res
239
240
242 """ Does the combinatorial explosion of the possible combinations
243 of the elements of _choices_.
244
245 **Arguments**
246
247 - choices: sequence of sequences with the elements to be enumerated
248
249 - noDups: (optional) if this is nonzero, results with duplicates,
250 e.g. (1,1,0), will not be generated
251
252 - which: used in recursion
253
254 **Returns**
255
256 a list of lists
257
258 >>> GetAllCombinations([(0,),(1,),(2,)])
259 [[0, 1, 2]]
260 >>> GetAllCombinations([(0,),(1,3),(2,)])
261 [[0, 1, 2], [0, 3, 2]]
262
263 >>> GetAllCombinations([(0,1),(1,3),(2,)])
264 [[0, 1, 2], [0, 3, 2], [1, 3, 2]]
265
266 """
267 if which >= len(choices):
268 res = []
269 elif which == len(choices) - 1:
270 res = [[x] for x in choices[which]]
271 else:
272 res = []
273 tmp = GetAllCombinations(choices, noDups=noDups, which=which + 1)
274 for thing in choices[which]:
275 for other in tmp:
276 if not noDups or thing not in other:
277 res.append([thing] + other)
278 return res
279
280
282 """ Does the combinatorial explosion of the possible combinations
283 of the elements of _choices_.
284
285 """
286
287 assert len(choices) == len(classes)
288 if which >= len(choices):
289 res = []
290 elif which == len(choices) - 1:
291 res = [[(classes[which], x)] for x in choices[which]]
292 else:
293 res = []
294 tmp = GetUniqueCombinations(choices, classes, which=which + 1)
295 for thing in choices[which]:
296 for other in tmp:
297 idxThere = 0
298 for x in other:
299 if x[1] == thing:
300 idxThere += 1
301 if not idxThere:
302 newL = [(classes[which], thing)] + other
303 newL.sort()
304 if newL not in res:
305 res.append(newL)
306 return res
307
308
310 """ Does the combinatorial explosion of the possible combinations
311 of the elements of _choices_.
312
313 """
314
315 assert len(choices) == len(classes)
316 combos = set()
317 for choice in itertools.product(*choices):
318
319 if len(set(choice)) != len(choice):
320 continue
321 combos.add(tuple(sorted((cls, ch) for cls, ch in zip(classes, choice))))
322 return [list(combo) for combo in sorted(combos)]
323
324
326 """ uniquifies the combinations in the argument
327
328 **Arguments**:
329
330 - combos: a sequence of sequences
331
332 **Returns**
333
334 - a list of tuples containing the unique combos
335
336 """
337 resD = {}
338 for combo in combos:
339 k = combo[:]
340 k.sort()
341 resD[tuple(k)] = tuple(combo)
342 return list(resD.values())
343
344
346 """ gets all realizable scaffolds (passing the triangle inequality) with the
347 given number of points and returns them as a list of tuples
348
349 """
350 if nPts < 2:
351 res = 0
352 elif nPts == 2:
353 res = [(x, ) for x in range(len(bins))]
354 else:
355 nDists = len(nPointDistDict[nPts])
356 combos = GetAllCombinations([range(len(bins))] * nDists, noDups=0)
357 res = []
358 for combo in combos:
359 if not useTriangleInequality or ScaffoldPasses(combo, bins):
360 res.append(tuple(combo))
361 return res
362
363
365 """
366 put the distances for a triangle into canonical order
367
368 It's easy if the features are all different:
369 >>> OrderTriangle([0,2,4],[1,2,3])
370 ([0, 2, 4], [1, 2, 3])
371
372 It's trickiest if they are all the same:
373 >>> OrderTriangle([0,0,0],[1,2,3])
374 ([0, 0, 0], [3, 2, 1])
375 >>> OrderTriangle([0,0,0],[2,1,3])
376 ([0, 0, 0], [3, 2, 1])
377 >>> OrderTriangle([0,0,0],[1,3,2])
378 ([0, 0, 0], [3, 2, 1])
379 >>> OrderTriangle([0,0,0],[3,1,2])
380 ([0, 0, 0], [3, 2, 1])
381 >>> OrderTriangle([0,0,0],[3,2,1])
382 ([0, 0, 0], [3, 2, 1])
383
384 >>> OrderTriangle([0,0,1],[3,2,1])
385 ([0, 0, 1], [3, 2, 1])
386 >>> OrderTriangle([0,0,1],[1,3,2])
387 ([0, 0, 1], [1, 3, 2])
388 >>> OrderTriangle([0,0,1],[1,2,3])
389 ([0, 0, 1], [1, 3, 2])
390 >>> OrderTriangle([0,0,1],[1,3,2])
391 ([0, 0, 1], [1, 3, 2])
392
393 """
394 if len(featIndices) != 3:
395 raise ValueError('bad indices')
396 if len(dists) != 3:
397 raise ValueError('bad dists')
398
399 fs = set(featIndices)
400 if len(fs) == 3:
401 return featIndices, dists
402
403 dSums = [0] * 3
404 dSums[0] = dists[0] + dists[1]
405 dSums[1] = dists[0] + dists[2]
406 dSums[2] = dists[1] + dists[2]
407 mD = max(dSums)
408 if len(fs) == 1:
409 if dSums[0] == mD:
410 if dists[0] > dists[1]:
411 ireorder = (0, 1, 2)
412 dreorder = (0, 1, 2)
413 else:
414 ireorder = (0, 2, 1)
415 dreorder = (1, 0, 2)
416 elif dSums[1] == mD:
417 if dists[0] > dists[2]:
418 ireorder = (1, 0, 2)
419 dreorder = (0, 2, 1)
420 else:
421 ireorder = (1, 2, 0)
422 dreorder = (2, 0, 1)
423 else:
424 if dists[1] > dists[2]:
425 ireorder = (2, 0, 1)
426 dreorder = (1, 2, 0)
427 else:
428 ireorder = (2, 1, 0)
429 dreorder = (2, 1, 0)
430 else:
431
432 if featIndices[0] == featIndices[1]:
433 if dists[1] > dists[2]:
434 ireorder = (0, 1, 2)
435 dreorder = (0, 1, 2)
436 else:
437 ireorder = (1, 0, 2)
438 dreorder = (0, 2, 1)
439 elif featIndices[0] == featIndices[2]:
440 if dists[0] > dists[2]:
441 ireorder = (0, 1, 2)
442 dreorder = (0, 1, 2)
443 else:
444 ireorder = (2, 1, 0)
445 dreorder = (2, 1, 0)
446 else:
447 if dists[0] > dists[1]:
448 ireorder = (0, 1, 2)
449 dreorder = (0, 1, 2)
450 else:
451 ireorder = (0, 2, 1)
452 dreorder = (1, 0, 2)
453 dists = [dists[x] for x in dreorder]
454 featIndices = [featIndices[x] for x in ireorder]
455 return featIndices, dists
456
457
458
459
460
461
463 import sys
464 import doctest
465 failed, _ = doctest.testmod(optionflags=doctest.ELLIPSIS, verbose=verbose)
466 sys.exit(failed)
467
468
469 if __name__ == '__main__':
470 _runDoctests()
471