Package rdkit :: Package ML :: Package Cluster :: Module Resemblance
[hide private]
[frames] | no frames]

Source Code for Module rdkit.ML.Cluster.Resemblance

  1  # $Id$ 
  2  # 
  3  # Copyright (C) 2001-2006  greg Landrum 
  4  # 
  5  #   @@ All Rights Reserved @@ 
  6  #  This file is part of the RDKit. 
  7  #  The contents are covered by the terms of the BSD license 
  8  #  which is included in the file license.txt, found at the root 
  9  #  of the RDKit source tree. 
 10  # 
 11  """ code for dealing with resemblance (metric) matrices 
 12   
 13      Here's how the matrices are stored: 
 14   
 15       '[(0,1),(0,2),(1,2),(0,3),(1,3),(2,3)...]  (row,col), col>row' 
 16   
 17       or, alternatively the matrix can be drawn, with indices as: 
 18   
 19         || - || 0 || 1 || 3 
 20         || - || - || 2 || 4 
 21         || - || - || - || 5 
 22         || - || - || - || - 
 23   
 24       the index of a given (row,col) pair is: 
 25         '(col*(col-1))/2 + row' 
 26   
 27  """ 
 28  from __future__ import print_function 
 29   
 30  import numpy 
 31   
 32   
33 -def EuclideanDistance(inData):
34 """returns the euclidean metricMat between the points in _inData_ 35 36 **Arguments** 37 38 - inData: a Numeric array of data points 39 40 **Returns** 41 42 a Numeric array with the metric matrix. See the module documentation 43 for the format. 44 45 46 """ 47 nObjs = len(inData) 48 res = numpy.zeros((nObjs * (nObjs - 1) / 2), numpy.float) 49 nSoFar = 0 50 for col in range(1, nObjs): 51 for row in range(col): 52 t = inData[row] - inData[col] 53 res[nSoFar] = sum(t * t) 54 nSoFar += 1 55 return numpy.sqrt(res)
56 57
58 -def CalcMetricMatrix(inData, metricFunc):
59 """ generates a metric matrix 60 61 **Arguments** 62 - inData is assumed to be a list of clusters (or anything with 63 a GetPosition() method) 64 65 - metricFunc is the function to be used to generate the matrix 66 67 68 **Returns** 69 70 the metric matrix as a Numeric array 71 72 """ 73 # nObjs = len(inData) 74 # res = [] 75 inData = map(lambda x: x.GetPosition(), inData) 76 return metricFunc(inData)
77 78
79 -def FindMinValInList(mat, nObjs, minIdx=None):
80 """ finds the minimum value in a metricMatrix and returns it and its indices 81 82 **Arguments** 83 84 - mat: the metric matrix 85 86 - nObjs: the number of objects to be considered 87 88 - minIdx: the index of the minimum value (value, row and column still need 89 to be calculated 90 91 **Returns** 92 93 a 3-tuple containing: 94 95 1) the row 96 2) the column 97 3) the minimum value itself 98 99 **Notes** 100 101 -this probably ain't the speediest thing on earth 102 103 """ 104 assert len(mat) == nObjs * (nObjs - 1) / 2, 'bad matrix length in FindMinValInList' 105 if minIdx is None: 106 minIdx = numpy.argmin(mat) 107 108 nSoFar = 0 109 col = 0 110 while nSoFar <= minIdx: 111 col = col + 1 112 nSoFar += col 113 114 row = minIdx - nSoFar + col 115 return row, col, mat[minIdx]
116 117
118 -def ShowMetricMat(metricMat, nObjs):
119 """ displays a metric matrix 120 121 **Arguments** 122 123 - metricMat: the matrix to be displayed 124 125 - nObjs: the number of objects to display 126 127 """ 128 assert len(metricMat) == nObjs * (nObjs - 1) / 2, 'bad matrix length in FindMinValInList' 129 for row in range(nObjs): 130 for col in range(nObjs): 131 if col <= row: 132 print(' --- ', end='') 133 else: 134 print('%10.6f' % metricMat[(col * (col - 1)) / 2 + row], end='') 135 print()
136 137 138 methods = [("Euclidean", EuclideanDistance, "Euclidean Distance"), ] 139 140 if __name__ == '__main__': 141 m = [.1, .2, .3, .4, .5, .6, .7, .8, .9, 1.0] 142 nObjs = 5 143 for i in range(10): 144 print(i, FindMinValInList(m, nObjs, i)) 145