Package rdkit :: Package ML :: Package InfoTheory :: Module BitClusterer
[hide private]
[frames] | no frames]

Source Code for Module rdkit.ML.InfoTheory.BitClusterer

 1  # 
 2  #  Copyright (C) 2000-2008  Greg Landrum and Rational Discovery LLC 
 3  # 
 4   
 5  from rdkit import DataStructs 
 6  from rdkit.SimDivFilters import rdSimDivPickers as rdsimdiv 
 7   
 8  if rdsimdiv is None: 
 9    raise ImportError('rdSimDivPickers not built') 
10   
11   
12 -class BitClusterer(object):
13 """ Class to cluster a set of bits based on their correllation 14 15 The correlation matrix is first built using by reading the fingerprints 16 from a database or a list of fingerprints 17 """ 18
19 - def __init__(self, idList, nCluster, type=rdsimdiv.ClusterMethod.WARD):
20 self._clusters = [] 21 self._bidList = idList 22 # self._matGen = BitCorrelationMatGenerator(idList) 23 self._nClusters = nCluster 24 self._type = type
25
26 - def ClusterBits(self, corrMat):
27 # clustering code actually needs distances so, take 1/val for each element in corMat 28 distMat = 1 / corrMat 29 30 pkr = rdsimdiv.HierarchicalClusterPicker(self._type) 31 32 cls = pkr.Cluster(distMat, len(self._bidList), self._nClusters) 33 # map the clusters to the actual bit ids 34 self._clusters = [] 35 for cl in cls: 36 self._clusters.append([self._bidList[i] for i in cl])
37
38 - def SetClusters(self, clusters):
39 assert len(clusters) == self._nClusters 40 self._clusters = clusters
41
42 - def GetClusters(self):
43 return self._clusters
44
45 - def MapToClusterScores(self, fp):
46 """ Map the fingerprint to a real valued vector of score based on the bit clusters 47 48 The dimension of the vector is same as the number of clusters. Each value in the 49 vector corresponds to the number of bits in the corresponding cluster 50 that are turned on in the fingerprint 51 52 ARGUMENTS: 53 - fp : the fingerprint 54 """ 55 scores = [0] * self._nClusters 56 for i, cls in enumerate(self._clusters): 57 for bid in cls: 58 if fp[bid]: 59 scores[i] += 1 60 return scores
61
62 - def MapToClusterFP(self, fp):
63 """ Map the fingerprint to a smaller sized (= number of clusters) fingerprint 64 65 Each cluster get a bit in the new fingerprint and is turned on if any of the bits in 66 the cluster are turned on in the original fingerprint""" 67 68 ebv = DataStructs.ExplicitBitVect(self._nClusters) 69 for i, cls in enumerate(self._clusters): 70 for bid in cls: 71 if fp[bid]: 72 ebv.SetBit(i) 73 break 74 return ebv
75