RDKit
Open-source cheminformatics and machine learning.
CorrMatGenerator.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2003-2006 Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #ifndef _RD_CORRMATGENERATOR_H_
11 #define _RD_CORRMATGENERATOR_H_
12 
13 #include <RDGeneral/types.h>
14 #include <DataStructs/BitVects.h>
15 #include <boost/dynamic_bitset.hpp>
16 
17 namespace RDInfoTheory {
18 // FIX: won't worry about it now, but this class can be templated by the type of
19 // container for the bit list and type of descriptors (fingerprint vs. real
20 // valued)
22  /*! \brief A class to generate a correlation matrix for a bunch of
23  *fingerprints
24  *
25  * The correlation matrix is done only for the bit IDs that are set by a call
26  *to the
27  * function setDescriptorIdList
28  *
29  * cr = CorrMatGenerator();
30  * cr.setDescriptorIdList(descList);
31  * for each fingerprint in list of fingerprints {
32  * cr.collectVotes(fingerprint);
33  * }
34  * double *corrMat = cr.getCorrMat()
35  *
36  * The resulting correlation matrix is a one dimension matrix with only the
37  *lower triangle elements
38  * of the symmetric matrix
39  */
40  public:
42 
43  ~BitCorrMatGenerator() { delete[] dp_corrMat; }
44 
45  void initGenerator() {
46  dp_corrMat = 0;
47  d_descs.resize(0);
48  d_nExamples = 0;
49  };
50 
51  /*! \brief Set the list bits that we are interested in correlating
52  *
53  * \param bitIdList is a list of bit ids that need to be correlated e.g. a
54  *list top ranked ensemble
55  * of bits
56  */
57  void setBitIdList(const RDKit::INT_VECT &bitIdList) {
58  d_descs = bitIdList;
59  int i, nd = d_descs.size();
60  int nelem = nd * (nd - 1) / 2;
61  delete[] dp_corrMat;
62 
63  dp_corrMat = new double[nd * (nd - 1) / 2];
64  for (i = 0; i < nelem; i++) {
65  dp_corrMat[i] = 0.0;
66  }
67  };
68 
69  //! \brief get the number of examples we used so far to compute the
70  //correlation matrix
71  int getNumExamples() const { return d_nExamples; };
72 
73  //! \brief Get the list of bits ID that are used to generate the correlation
74  //matrix
75  RDKit::INT_VECT getCorrBitList() const { return d_descs; };
76 
77  //! \brief Gets a pointer to the correlation matrix
78  double *getCorrMat() { return dp_corrMat; };
79 
80  //! \brief For each pair of on bits (bi, bj) in fp increase the correlation
81  //count
82  // for the pair by 1
83  void collectVotes(const BitVect &fp) {
84  unsigned int nd = d_descs.size();
85  // use a temporary bit vector to first mask the fingerprint
86  ExplicitBitVect ebv(nd);
87  int bi;
88  for (unsigned int i = 0; i < nd; i++) {
89  bi = d_descs[i];
90  if (fp[bi]) {
91  ebv.setBit(i);
92  }
93  }
94  for (unsigned i = 1; i < nd; i++) {
95  unsigned int itab = i * (i - 1) / 2;
96  if (ebv[i]) {
97  for (unsigned int j = 0; j < i; j++) {
98  if (ebv[j]) {
99  dp_corrMat[itab + j] += 1;
100  }
101  }
102  }
103  }
104  d_nExamples++;
105  };
106 
107  private:
108  RDKit::INT_VECT d_descs;
109  double *dp_corrMat;
110  int d_nExamples;
111 };
112 }
113 
114 #endif
Pulls in all the BitVect classes.
RDKit::INT_VECT getCorrBitList() const
Get the list of bits ID that are used to generate the correlation.
Class used to rank bits based on a specified measure of infomation.
void setBitIdList(const RDKit::INT_VECT &bitIdList)
Set the list bits that we are interested in correlating.
std::vector< int > INT_VECT
Definition: types.h:191
int getNumExamples() const
get the number of examples we used so far to compute the
bool setBit(const unsigned int which)
sets a particular bit and returns its original value
double * getCorrMat()
Gets a pointer to the correlation matrix.
a class for bit vectors that are densely occupied
Abstract base class for storing BitVectors.
Definition: BitVect.h:23
BitCorrMatGenerator()
A class to generate a correlation matrix for a bunch of fingerprints.
void collectVotes(const BitVect &fp)
For each pair of on bits (bi, bj) in fp increase the correlation.