1
2
3
4
5
6
7
8
9
10
11 import sys
12
13 from rdkit import Chem
14 from rdkit.Chem.rdfragcatalog import *
15
16
19
20
26
27
29 inFile = open(fileName, 'r')
30 nRead = 0
31 res = []
32 for line in inFile.xreadlines():
33 nRead += 1
34 splitL = [x.strip() for x in line.split(delim)]
35 if nRead != 1 and len(splitL):
36 bit = BitGainsInfo()
37 bit.id = int(splitL[0])
38 col = 1
39 if haveDescriptions:
40 bit.description = splitL[col]
41 col += 1
42 bit.gain = float(splitL[col])
43 col += 1
44 nPerClass = []
45 for entry in splitL[col:]:
46 nPerClass.append(int(entry))
47 bit.nPerClass = nPerClass
48 res.append(bit)
49 if len(res) == nToDo:
50 break
51 return res
52
53
55 adjs = {}
56 levels = {}
57 bitIds = [bit.id for bit in bits]
58 for bitId in bitIds:
59 entry = catalog.GetBitEntryId(bitId)
60 tmp = []
61 order = catalog.GetEntryOrder(entry)
62 s = levels.get(order, set())
63 s.add(bitId)
64 levels[order] = s
65 for down in catalog.GetEntryDownIds(entry):
66 id = catalog.GetEntryBitId(down)
67 if not limitInclusion or id in bitIds:
68 tmp.append(id)
69 order = catalog.GetEntryOrder(down)
70 s = levels.get(order, set())
71 s.add(id)
72 levels[order] = s
73 adjs[bitId] = tmp
74 if orderLevels:
75
76
77 for order in levels.keys():
78 ids = levels[order]
79 counts = [len(adjs[id]) for id in ids]
80 countOrder = argsort(counts)
81 l = [ids[x] for x in countOrder]
82 l.reverse()
83 levels[order] = l
84 return adjs, levels
85
86
88 res = []
89 if isinstance(bit, BitGainsInfo):
90 bitId = bit.id
91 else:
92 bitId = bit
93 for i, mol in enumerate(mols):
94 fp = fps[i]
95 if fp[bitId]:
96 res.append(mol)
97 return res
98