casacore
ConstrainedRangeStatistics.h
Go to the documentation of this file.
1 //# Copyright (C) 2000,2001
2 //# Associated Universities, Inc. Washington DC, USA.
3 //#
4 //# This library is free software; you can redistribute it and/or modify it
5 //# under the terms of the GNU Library General Public License as published by
6 //# the Free Software Foundation; either version 2 of the License, or (at your
7 //# option) any later version.
8 //#
9 //# This library is distributed in the hope that it will be useful, but WITHOUT
10 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
12 //# License for more details.
13 //#
14 //# You should have received a copy of the GNU Library General Public License
15 //# along with this library; if not, write to the Free Software Foundation,
16 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
17 //#
18 //# Correspondence concerning AIPS++ should be addressed as follows:
19 //# Internet email: aips2-request@nrao.edu.
20 //# Postal address: AIPS++ Project Office
21 //# National Radio Astronomy Observatory
22 //# 520 Edgemont Road
23 //# Charlottesville, VA 22903-2475 USA
24 //#
25 
26 #ifndef SCIMATH_CONSTRAINEDRANGESTATISTICS_H
27 #define SCIMATH_CONSTRAINEDRANGESTATISTICS_H
28 
29 #include <casacore/casa/aips.h>
30 
31 #include <casacore/scimath/StatsFramework/ClassicalStatistics.h>
32 #include <casacore/scimath/StatsFramework/ConstrainedRangeQuantileComputer.h>
33 
34 #include <set>
35 #include <vector>
36 #include <utility>
37 
38 namespace casacore {
39 
40 // Abstract base class for statistics algorithms which are characterized by
41 // a range of good values. The range is usually calculated dynamically based
42 // on the entire distribution. The specifics of such calculations are
43 // delegated to derived classes.
44 
45 template <
46  class AccumType, class DataIterator, class MaskIterator=const Bool*,
47  class WeightsIterator=DataIterator
48 >
49 class ConstrainedRangeStatistics : public ClassicalStatistics<CASA_STATP> {
50 public:
51 
52  ConstrainedRangeStatistics() = delete;
53 
55 
56  // <group>
57  // In the following group of methods, if the size of the composite dataset
58  // is smaller than
59  // <src>binningThreshholdSizeBytes</src>, the composite dataset
60  // will be (perhaps partially) sorted and persisted in memory during the
61  // call. In that case, and if <src>persistSortedArray</src> is True, this
62  // sorted array will remain in memory after the call and will be used on
63  // subsequent calls of this method when
64  // <src>binningThreshholdSizeBytes</src> is greater than the size of the
65  // composite dataset. If <src>persistSortedArray</src> is False, the sorted
66  // array will not be stored after this call completes and so any subsequent
67  // calls for which the dataset size is less than
68  // <src>binningThreshholdSizeBytes</src>, the dataset will be sorted from
69  // scratch. Values which are not included due to non-unity strides, are not
70  // included in any specified ranges, are masked, or have associated weights
71  // of zero are not considered as dataset members for quantile computations.
72  // If one has a priori information regarding the number of points (npts)
73  // and/or the minimum and maximum values of the data set, these can be
74  // supplied to improve performance. Note however, that if these values are
75  // not correct, the resulting median and/or quantile values will also not be
76  // correct (although see the following notes regarding max/min). Note that
77  // if this object has already had getStatistics() called, and the min and
78  // max were calculated, there is no need to pass these values in as they
79  // have been stored internally and used (although passing them in shouldn't
80  // hurt anything). If provided, npts, the number of points falling in the
81  // specified ranges which are not masked and have weights > 0, should be
82  // exactly correct. <src>min</src> can be less than the true minimum, and
83  // <src>max</src> can be greater than the True maximum, but for best
84  // performance, these should be as close to the actual min and max as
85  // possible. In order for quantile computations to occur over multiple
86  // datasets, all datasets must be available. This means that if
87  // setCalculateAsAdded() was previously called by passing in a value of
88  // True, these methods will throw an exception as the previous call
89  // indicates that there is no guarantee that all datasets will be available.
90  // If one uses a data provider (by having called setDataProvider()), then
91  // this should not be an issue.
92 
93  // get the median of the distribution.
94  // For a dataset with an odd number of good points, the median is just the
95  // value at index int(N/2) in the equivalent sorted dataset, where N is the
96  // number of points. For a dataset with an even number of points, the median
97  // is the mean of the values at indices int(N/2)-1 and int(N/2) in the
98  // sorted dataset.
99  virtual AccumType getMedian(
100  CountedPtr<uInt64> knownNpts=nullptr,
101  CountedPtr<AccumType> knownMin=nullptr,
102  CountedPtr<AccumType> knownMax=nullptr,
103  uInt binningThreshholdSizeBytes=4096*4096,
104  Bool persistSortedArray=False, uInt nBins=10000
105  );
106 
107  // get the median of the absolute deviation about the median of the data.
108  virtual AccumType getMedianAbsDevMed(
109  CountedPtr<uInt64> knownNpts=nullptr,
110  CountedPtr<AccumType> knownMin=nullptr,
111  CountedPtr<AccumType> knownMax=nullptr,
112  uInt binningThreshholdSizeBytes=4096*4096,
113  Bool persistSortedArray=False, uInt nBins=10000
114  );
115 
116  // If one needs to compute both the median and quantile values, it is better
117  // to call getMedianAndQuantiles() rather than getMedian() and
118  // getQuantiles() separately, as the first will scan large data sets fewer
119  // times than calling the seperate methods. The return value is the median;
120  // the quantiles are returned in the <src>quantileToValue</src> map.
121  virtual AccumType getMedianAndQuantiles(
122  std::map<Double, AccumType>& quantileToValue,
123  const std::set<Double>& quantiles,
124  CountedPtr<uInt64> knownNpts=nullptr,
125  CountedPtr<AccumType> knownMin=nullptr,
126  CountedPtr<AccumType> knownMax=nullptr,
127  uInt binningThreshholdSizeBytes=4096*4096,
128  Bool persistSortedArray=False, uInt nBins=10000
129  );
130 
131  // Get the specified quantiles. <src>quantiles</src> must be between 0 and
132  // 1, noninclusive.
133  virtual std::map<Double, AccumType> getQuantiles(
134  const std::set<Double>& quantiles,
135  CountedPtr<uInt64> knownNpts=nullptr,
136  CountedPtr<AccumType> knownMin=nullptr,
137  CountedPtr<AccumType> knownMax=NULL,
138  uInt binningThreshholdSizeBytes=4096*4096,
139  Bool persistSortedArray=False, uInt nBins=10000
140  );
141  // </group>
142 
143  // get the min and max of the data set
144  virtual void getMinMax(AccumType& mymin, AccumType& mymax);
145 
146  // scan the dataset(s) that have been added, and find the number of good
147  // points. This method may be called even if setStatsToCaclulate has been
148  // called and NPTS has been excluded. If setCalculateAsAdded(True) has
149  // previously been called after this object has been (re)initialized, an
150  // exception will be thrown.
151  virtual uInt64 getNPts();
152 
153  // see base class description
155 
156  // reset object to initial state. Clears all private fields including data,
157  // accumulators, global range.
158  virtual void reset();
159 
160 protected:
161 
162  // Concrete derived classes are responsible for providing an appropriate
163  // QuantileComputer object to the constructor, which is ultimately passed
164  // up the instantiation hierarchy and stored at the StatisticsAlgorithm
165  // level.
168  );
169 
170  // copy semantics
173  );
174 
175  // copy semantics
178  );
179 
180  // <group>
181  // scan through the data set to determine the number of good (unmasked,
182  // weight > 0, within range) points. The first with no mask, no ranges, and
183  // no weights is trivial with npts = nr in this class, but is implemented
184  // here so that derived classes may override it.
185  virtual void _accumNpts(
186  uInt64& npts, const DataIterator& dataStart, uInt64 nr, uInt dataStride
187  ) const;
188 
189  virtual void _accumNpts(
190  uInt64& npts,
191  const DataIterator& dataStart, uInt64 nr, uInt dataStride,
192  const DataRanges& ranges, Bool isInclude
193  ) const;
194 
195  virtual void _accumNpts(
196  uInt64& npts, const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
197  const MaskIterator& maskBegin, uInt maskStride
198  ) const;
199 
200  virtual void _accumNpts(
201  uInt64& npts,
202  const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
203  const MaskIterator& maskBegin, uInt maskStride,
204  const DataRanges& ranges, Bool isInclude
205  ) const;
206 
207  virtual void _accumNpts(
208  uInt64& npts, const DataIterator& dataBegin,
209  const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride
210  ) const;
211 
212  virtual void _accumNpts(
213  uInt64& npts, const DataIterator& dataBegin,
214  const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride,
215  const DataRanges& ranges, Bool isInclude
216  ) const;
217 
218  virtual void _accumNpts(
219  uInt64& npts, const DataIterator& dataBegin,
220  const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride,
221  const MaskIterator& maskBegin, uInt maskStride,
222  const DataRanges& ranges, Bool isInclude
223  ) const;
224 
225  virtual void _accumNpts(
226  uInt64& npts, const DataIterator& dataBegin,
227  const WeightsIterator& weightBegin, uInt64 nr, uInt dataStride,
228  const MaskIterator& maskBegin, uInt maskStride
229  ) const;
230  // </group>
231 
232  virtual AccumType _getStatistic(StatisticsData::STATS stat);
233 
235 
236  // <group>
237  virtual void _minMax(
239  const DataIterator& dataBegin, uInt64 nr, uInt dataStride
240  ) const;
241 
242  virtual void _minMax(
244  const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
245  const DataRanges& ranges, Bool isInclude
246  ) const;
247 
248  virtual void _minMax(
250  const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
251  const MaskIterator& maskBegin, uInt maskStride
252  ) const;
253 
254  virtual void _minMax(
256  const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
257  const MaskIterator& maskBegin, uInt maskStride,
258  const DataRanges& ranges, Bool isInclude
259  ) const;
260 
261  virtual void _minMax(
263  const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
264  uInt64 nr, uInt dataStride
265  ) const;
266 
267  virtual void _minMax(
269  const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
270  uInt64 nr, uInt dataStride, const DataRanges& ranges, Bool isInclude
271  ) const;
272 
273  virtual void _minMax(
275  const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
276  uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
277  uInt maskStride, const DataRanges& ranges, Bool isInclude
278  ) const;
279 
280  virtual void _minMax(
282  const DataIterator& dataBegin, const WeightsIterator& weightBegin,
283  uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
284  uInt maskStride
285  ) const;
286  // </group>
287 
288  // <group>
289  // Sometimes we want the min, max, and npts all in one scan.
290  virtual void _minMaxNpts(
291  uInt64& npts, CountedPtr<AccumType>& mymin,
292  CountedPtr<AccumType>& mymax, const DataIterator& dataBegin, uInt64 nr,
293  uInt dataStride
294  ) const;
295 
296  virtual void _minMaxNpts(
297  uInt64& npts, CountedPtr<AccumType>& mymin,
298  CountedPtr<AccumType>& mymax, const DataIterator& dataBegin, uInt64 nr,
299  uInt dataStride, const DataRanges& ranges, Bool isInclude
300  ) const;
301 
302  virtual void _minMaxNpts(
303  uInt64& npts, CountedPtr<AccumType>& mymin,
304  CountedPtr<AccumType>& mymax, const DataIterator& dataBegin, uInt64 nr,
305  uInt dataStride, const MaskIterator& maskBegin, uInt maskStride
306  ) const;
307 
308  virtual void _minMaxNpts(
309  uInt64& npts, CountedPtr<AccumType>& mymin,
310  CountedPtr<AccumType>& mymax, const DataIterator& dataBegin, uInt64 nr,
311  uInt dataStride, const MaskIterator& maskBegin, uInt maskStride,
312  const DataRanges& ranges, Bool isInclude
313  ) const;
314 
315  virtual void _minMaxNpts(
316  uInt64& npts, CountedPtr<AccumType>& mymin,
317  CountedPtr<AccumType>& mymax, const DataIterator& dataBegin,
318  const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride
319  ) const;
320 
321  virtual void _minMaxNpts(
322  uInt64& npts, CountedPtr<AccumType>& mymin,
323  CountedPtr<AccumType>& mymax, const DataIterator& dataBegin,
324  const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride,
325  const DataRanges& ranges, Bool isInclude
326  ) const;
327 
328  virtual void _minMaxNpts(
329  uInt64& npts, CountedPtr<AccumType>& mymin,
330  CountedPtr<AccumType>& mymax, const DataIterator& dataBegin,
331  const WeightsIterator& weightsBegin, uInt64 nr, uInt dataStride,
332  const MaskIterator& maskBegin, uInt maskStride,
333  const DataRanges& ranges, Bool isInclude
334  ) const;
335 
336  virtual void _minMaxNpts(
337  uInt64& npts, CountedPtr<AccumType>& mymin,
338  CountedPtr<AccumType>& mymax, const DataIterator& dataBegin,
339  const WeightsIterator& weightBegin, uInt64 nr, uInt dataStride,
340  const MaskIterator& maskBegin, uInt maskStride
341  ) const;
342  // </group>
343 
344  // This method is purposefully non-virtual. Derived classes
345  // should implement the version with no parameters.
346  void _setRange(CountedPtr<std::pair<AccumType, AccumType> > r);
347 
348  // derived classes need to implement how to set their respective range
349  virtual void _setRange() = 0;
350 
351  // <group>
352  // no weights, no mask, no ranges
353  virtual void _unweightedStats(
354  StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
355  const DataIterator& dataBegin, uInt64 nr, uInt dataStride
356  );
357 
358  // no weights, no mask
359  virtual void _unweightedStats(
360  StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
361  const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
362  const DataRanges& ranges, Bool isInclude
363  );
364 
365  virtual void _unweightedStats(
366  StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
367  const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
368  const MaskIterator& maskBegin, uInt maskStride
369  );
370 
371  virtual void _unweightedStats(
372  StatsData<AccumType>& stats, uInt64& ngood, LocationType& location,
373  const DataIterator& dataBegin, uInt64 nr, uInt dataStride,
374  const MaskIterator& maskBegin, uInt maskStride,
375  const DataRanges& ranges, Bool isInclude
376  );
377  // </group>
378 
379  // <group>
380  // has weights, but no mask, no ranges
381  virtual void _weightedStats(
382  StatsData<AccumType>& stats, LocationType& location,
383  const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
384  uInt64 nr, uInt dataStride
385  );
386 
387  virtual void _weightedStats(
388  StatsData<AccumType>& stats, LocationType& location,
389  const DataIterator& dataBegin, const WeightsIterator& weightsBegin,
390  uInt64 nr, uInt dataStride, const DataRanges& ranges, Bool isInclude
391  );
392 
393  virtual void _weightedStats(
394  StatsData<AccumType>& stats, LocationType& location,
395  const DataIterator& dataBegin, const WeightsIterator& weightBegin,
396  uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
397  uInt maskStride
398  );
399 
400  virtual void _weightedStats(
401  StatsData<AccumType>& stats, LocationType& location,
402  const DataIterator& dataBegin, const WeightsIterator& weightBegin,
403  uInt64 nr, uInt dataStride, const MaskIterator& maskBegin,
404  uInt maskStride, const DataRanges& ranges, Bool isInclude
405  );
406  // </group>
407 
408 private:
409 
411 
412 };
413 
414 }
415 
416 #ifndef CASACORE_NO_AUTO_TEMPLATES
417 #include <casacore/scimath/StatsFramework/ConstrainedRangeStatistics.tcc>
418 #endif
419 
420 #endif
virtual AccumType getMedian(CountedPtr< uInt64 > knownNpts=nullptr, CountedPtr< AccumType > knownMin=nullptr, CountedPtr< AccumType > knownMax=nullptr, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
In the following group of methods, if the size of the composite dataset is smaller than binningThresh...
virtual AccumType getMedianAbsDevMed(CountedPtr< uInt64 > knownNpts=nullptr, CountedPtr< AccumType > knownMin=nullptr, CountedPtr< AccumType > knownMax=nullptr, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
get the median of the absolute deviation about the median of the data.
virtual void _setRange()=0
derived classes need to implement how to set their respective range
virtual std::map< Double, AccumType > getQuantiles(const std::set< Double > &quantiles, CountedPtr< uInt64 > knownNpts=nullptr, CountedPtr< AccumType > knownMin=nullptr, CountedPtr< AccumType > knownMax=NULL, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
Get the specified quantiles.
unsigned long long uInt64
Definition: aipsxtype.h:39
CountedPtr< std::pair< AccumType, AccumType > > _range
virtual void _weightedStats(StatsData< AccumType > &stats, LocationType &location, const DataIterator &dataBegin, const WeightsIterator &weightsBegin, uInt64 nr, uInt dataStride)
has weights, but no mask, no ranges
Class to calculate statistics in a "classical" sense, ie using accumulators with no special filtering...
virtual AccumType getMedianAndQuantiles(std::map< Double, AccumType > &quantileToValue, const std::set< Double > &quantiles, CountedPtr< uInt64 > knownNpts=nullptr, CountedPtr< AccumType > knownMin=nullptr, CountedPtr< AccumType > knownMax=nullptr, uInt binningThreshholdSizeBytes=4096 *4096, Bool persistSortedArray=False, uInt nBins=10000)
If one needs to compute both the median and quantile values, it is better to call getMedianAndQuantil...
virtual uInt64 getNPts()
scan the dataset(s) that have been added, and find the number of good points.
Referenced counted pointer for constant data.
Definition: CountedPtr.h:80
virtual AccumType _getStatistic(StatisticsData::STATS stat)
ConstrainedRangeStatistics< CASA_STATP > & operator=(const ConstrainedRangeStatistics< CASA_STATP > &other)
copy semantics
std::pair< Int64, Int64 > LocationType
virtual LocationType getStatisticIndex(StatisticsData::STATS stat)
see base class description
#define DataRanges
bool Bool
Define the standard types used by Casacore.
Definition: aipstype.h:42
virtual void reset()
reset object to initial state.
Abstract base class for statistics algorithms which are characterized by a range of good values...
const Bool False
Definition: aipstype.h:44
virtual void _minMax(CountedPtr< AccumType > &mymin, CountedPtr< AccumType > &mymax, const DataIterator &dataBegin, uInt64 nr, uInt dataStride) const
virtual void _minMaxNpts(uInt64 &npts, CountedPtr< AccumType > &mymin, CountedPtr< AccumType > &mymax, const DataIterator &dataBegin, uInt64 nr, uInt dataStride) const
Sometimes we want the min, max, and npts all in one scan.
virtual void _accumNpts(uInt64 &npts, const DataIterator &dataStart, uInt64 nr, uInt dataStride) const
scan through the data set to determine the number of good (unmasked, weight > 0, within range) points...
virtual void _unweightedStats(StatsData< AccumType > &stats, uInt64 &ngood, LocationType &location, const DataIterator &dataBegin, uInt64 nr, uInt dataStride)
no weights, no mask, no ranges
virtual StatsData< AccumType > _getStatistics()
this file contains all the compiler specific defines
Definition: mainpage.dox:28
virtual void getMinMax(AccumType &mymin, AccumType &mymax)
get the min and max of the data set
unsigned int uInt
Definition: aipstype.h:51