QGIS API Documentation  3.26.3-Buenos Aires (65e4edfdad)
qgsstatisticalsummary.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  qgsstatisticalsummary.cpp
3  --------------------------------------
4  Date : May 2015
5  Copyright : (C) 2015 by Nyall Dawson
6  Email : nyall dot dawson at gmail dot com
7  ***************************************************************************
8  * *
9  * This program is free software; you can redistribute it and/or modify *
10  * it under the terms of the GNU General Public License as published by *
11  * the Free Software Foundation; either version 2 of the License, or *
12  * (at your option) any later version. *
13  * *
14  ***************************************************************************/
15 
16 #include "qgsstatisticalsummary.h"
17 #include <limits>
18 #include <QString>
19 #include <QObject>
20 
21 /***************************************************************************
22  * This class is considered CRITICAL and any change MUST be accompanied with
23  * full unit tests in testqgsstatisticalsummary.cpp.
24  * See details in QEP #17
25  ****************************************************************************/
26 
28  : mStatistics( stats )
29 {
30  reset();
31 }
32 
33 void QgsStatisticalSummary::setStatistics( QgsStatisticalSummary::Statistics stats )
34 {
35  mStatistics = stats;
36  reset();
37 }
38 
40 {
41  mFirst = std::numeric_limits<double>::quiet_NaN();
42  mLast = std::numeric_limits<double>::quiet_NaN();
43  mCount = 0;
44  mMissing = 0;
45  mSum = 0;
46  mMean = 0;
47  mMedian = 0;
48  mMin = std::numeric_limits<double>::max();
49  mMax = -std::numeric_limits<double>::max();
50  mStdev = 0;
51  mSampleStdev = 0;
52  mMinority = 0;
53  mMajority = 0;
54  mFirstQuartile = 0;
55  mThirdQuartile = 0;
56  mValueCount.clear();
57  mValues.clear();
58 
59  mRequiresHisto = mStatistics & QgsStatisticalSummary::Majority || mStatistics & QgsStatisticalSummary::Minority || mStatistics & QgsStatisticalSummary::Variety;
60 
61  mRequiresAllValueStorage = mStatistics & QgsStatisticalSummary::StDev || mStatistics & QgsStatisticalSummary::StDevSample ||
64 }
65 
66 /***************************************************************************
67  * This class is considered CRITICAL and any change MUST be accompanied with
68  * full unit tests in testqgsstatisticalsummary.cpp.
69  * See details in QEP #17
70  ****************************************************************************/
71 
72 void QgsStatisticalSummary::calculate( const QList<double> &values )
73 {
74  reset();
75 
76  for ( const double value : values )
77  {
78  addValue( value );
79  }
80 
81  finalize();
82 }
83 
84 void QgsStatisticalSummary::addValue( double value )
85 {
86  if ( mCount == 0 )
87  mFirst = value;
88  mCount++;
89  mSum += value;
90  mMin = std::min( mMin, value );
91  mMax = std::max( mMax, value );
92  mLast = value;
93 
94  if ( mRequiresHisto )
95  mValueCount.insert( value, mValueCount.value( value, 0 ) + 1 );
96 
97  if ( mRequiresAllValueStorage )
98  mValues << value;
99 }
100 
101 void QgsStatisticalSummary::addVariant( const QVariant &value )
102 {
103  bool convertOk = false;
104  if ( !value.isValid() || value.isNull() )
105  mMissing++;
106  else
107  {
108  const double val = value.toDouble( &convertOk );
109  if ( convertOk )
110  addValue( val );
111  else
112  mMissing++;
113  }
114 }
115 
117 {
118  if ( mCount == 0 )
119  {
120  mFirst = std::numeric_limits<double>::quiet_NaN();
121  mLast = std::numeric_limits<double>::quiet_NaN();
122  mMin = std::numeric_limits<double>::quiet_NaN();
123  mMax = std::numeric_limits<double>::quiet_NaN();
124  mMean = std::numeric_limits<double>::quiet_NaN();
125  mMedian = std::numeric_limits<double>::quiet_NaN();
126  mStdev = std::numeric_limits<double>::quiet_NaN();
127  mSampleStdev = std::numeric_limits<double>::quiet_NaN();
128  mMinority = std::numeric_limits<double>::quiet_NaN();
129  mMajority = std::numeric_limits<double>::quiet_NaN();
130  mFirstQuartile = std::numeric_limits<double>::quiet_NaN();
131  mThirdQuartile = std::numeric_limits<double>::quiet_NaN();
132  return;
133  }
134 
135  mMean = mSum / mCount;
136 
137  if ( mStatistics & QgsStatisticalSummary::StDev || mStatistics & QgsStatisticalSummary::StDevSample )
138  {
139  double sumSquared = 0;
140  const auto constMValues = mValues;
141  for ( const double value : constMValues )
142  {
143  const double diff = value - mMean;
144  sumSquared += diff * diff;
145  }
146  mStdev = std::pow( sumSquared / mValues.count(), 0.5 );
147  mSampleStdev = std::pow( sumSquared / ( mValues.count() - 1 ), 0.5 );
148  }
149 
150  if ( mStatistics & QgsStatisticalSummary::Median
151  || mStatistics & QgsStatisticalSummary::FirstQuartile
152  || mStatistics & QgsStatisticalSummary::ThirdQuartile
154  {
155  std::sort( mValues.begin(), mValues.end() );
156  const bool even = ( mCount % 2 ) < 1;
157  if ( even )
158  {
159  mMedian = ( mValues[mCount / 2 - 1] + mValues[mCount / 2] ) / 2.0;
160  }
161  else //odd
162  {
163  mMedian = mValues[( mCount + 1 ) / 2 - 1];
164  }
165  }
166 
167  if ( mStatistics & QgsStatisticalSummary::FirstQuartile
169  {
170  if ( ( mCount % 2 ) < 1 )
171  {
172  const int halfCount = mCount / 2;
173  const bool even = ( halfCount % 2 ) < 1;
174  if ( even )
175  {
176  mFirstQuartile = ( mValues[halfCount / 2 - 1] + mValues[halfCount / 2] ) / 2.0;
177  }
178  else //odd
179  {
180  mFirstQuartile = mValues[( halfCount + 1 ) / 2 - 1];
181  }
182  }
183  else
184  {
185  const int halfCount = mCount / 2 + 1;
186  const bool even = ( halfCount % 2 ) < 1;
187  if ( even )
188  {
189  mFirstQuartile = ( mValues[halfCount / 2 - 1] + mValues[halfCount / 2] ) / 2.0;
190  }
191  else //odd
192  {
193  mFirstQuartile = mValues[( halfCount + 1 ) / 2 - 1];
194  }
195  }
196  }
197 
198  if ( mStatistics & QgsStatisticalSummary::ThirdQuartile
200  {
201  if ( ( mCount % 2 ) < 1 )
202  {
203  const int halfCount = mCount / 2;
204  const bool even = ( halfCount % 2 ) < 1;
205  if ( even )
206  {
207  mThirdQuartile = ( mValues[ halfCount + halfCount / 2 - 1] + mValues[ halfCount + halfCount / 2] ) / 2.0;
208  }
209  else //odd
210  {
211  mThirdQuartile = mValues[( halfCount + 1 ) / 2 - 1 + halfCount ];
212  }
213  }
214  else
215  {
216  const int halfCount = mCount / 2 + 1;
217  const bool even = ( halfCount % 2 ) < 1;
218  if ( even )
219  {
220  mThirdQuartile = ( mValues[ halfCount + halfCount / 2 - 2 ] + mValues[ halfCount + halfCount / 2 - 1 ] ) / 2.0;
221  }
222  else //odd
223  {
224  mThirdQuartile = mValues[( halfCount + 1 ) / 2 - 2 + halfCount ];
225  }
226  }
227  }
228 
229  if ( mStatistics & QgsStatisticalSummary::Minority || mStatistics & QgsStatisticalSummary::Majority )
230  {
231  QList<int> valueCounts = mValueCount.values();
232 
233  if ( mStatistics & QgsStatisticalSummary::Minority )
234  {
235  mMinority = mValueCount.key( *std::min_element( valueCounts.begin(), valueCounts.end() ) );
236  }
237  if ( mStatistics & QgsStatisticalSummary::Majority )
238  {
239  mMajority = mValueCount.key( *std::max_element( valueCounts.begin(), valueCounts.end() ) );
240  }
241  }
242 
243 }
244 
245 /***************************************************************************
246  * This class is considered CRITICAL and any change MUST be accompanied with
247  * full unit tests in testqgsstatisticalsummary.cpp.
248  * See details in QEP #17
249  ****************************************************************************/
250 
252 {
253  switch ( stat )
254  {
255  case Count:
256  return mCount;
257  case CountMissing:
258  return mMissing;
259  case Sum:
260  return mSum;
261  case Mean:
262  return mMean;
263  case Median:
264  return mMedian;
265  case StDev:
266  return mStdev;
267  case StDevSample:
268  return mSampleStdev;
269  case Min:
270  return mMin;
271  case Max:
272  return mMax;
273  case Range:
274  return mMax - mMin;
275  case Minority:
276  return mMinority;
277  case Majority:
278  return mMajority;
279  case Variety:
280  return mValueCount.count();
281  case FirstQuartile:
282  return mFirstQuartile;
283  case ThirdQuartile:
284  return mThirdQuartile;
285  case InterQuartileRange:
286  return mThirdQuartile - mFirstQuartile;
287  case First:
288  return mFirst;
289  case Last:
290  return mLast;
291  case All:
292  return 0;
293  }
294  return 0;
295 }
296 
298 {
299  switch ( statistic )
300  {
301  case Count:
302  return QObject::tr( "Count" );
303  case CountMissing:
304  return QObject::tr( "Count (missing)" );
305  case Sum:
306  return QObject::tr( "Sum" );
307  case Mean:
308  return QObject::tr( "Mean" );
309  case Median:
310  return QObject::tr( "Median" );
311  case StDev:
312  return QObject::tr( "St dev (pop)" );
313  case StDevSample:
314  return QObject::tr( "St dev (sample)" );
315  case Min:
316  return QObject::tr( "Minimum" );
317  case Max:
318  return QObject::tr( "Maximum" );
319  case Range:
320  return QObject::tr( "Range" );
321  case Minority:
322  return QObject::tr( "Minority" );
323  case Majority:
324  return QObject::tr( "Majority" );
325  case Variety:
326  return QObject::tr( "Variety" );
327  case FirstQuartile:
328  return QObject::tr( "Q1" );
329  case ThirdQuartile:
330  return QObject::tr( "Q3" );
331  case InterQuartileRange:
332  return QObject::tr( "IQR" );
333  case First:
334  return QObject::tr( "First" );
335  case Last:
336  return QObject::tr( "Last" );
337  case All:
338  return QString();
339  }
340  return QString();
341 }
342 
344 {
345  switch ( statistic )
346  {
347  case Count:
348  return QStringLiteral( "count" );
349  case CountMissing:
350  return QStringLiteral( "countmissing" );
351  case Sum:
352  return QStringLiteral( "sum" );
353  case Mean:
354  return QStringLiteral( "mean" );
355  case Median:
356  return QStringLiteral( "median" );
357  case StDev:
358  return QStringLiteral( "stdev" );
359  case StDevSample:
360  return QStringLiteral( "stdevsample" );
361  case Min:
362  return QStringLiteral( "min" );
363  case Max:
364  return QStringLiteral( "max" );
365  case Range:
366  return QStringLiteral( "range" );
367  case Minority:
368  return QStringLiteral( "minority" );
369  case Majority:
370  return QStringLiteral( "majority" );
371  case Variety:
372  return QStringLiteral( "variety" );
373  case FirstQuartile:
374  return QStringLiteral( "q1" );
375  case ThirdQuartile:
376  return QStringLiteral( "q3" );
377  case InterQuartileRange:
378  return QStringLiteral( "iqr" );
379  case First:
380  return QStringLiteral( "first" );
381  case Last:
382  return QStringLiteral( "last" );
383  case All:
384  return QString();
385  }
386  return QString();
387 }
388 
QgsStatisticalSummary::setStatistics
void setStatistics(QgsStatisticalSummary::Statistics stats)
Sets flags which specify which statistics will be calculated.
Definition: qgsstatisticalsummary.cpp:33
QgsStatisticalSummary::reset
void reset()
Resets the calculated values.
Definition: qgsstatisticalsummary.cpp:39
QgsStatisticalSummary::shortName
static QString shortName(QgsStatisticalSummary::Statistic statistic)
Returns a short, friendly display name for a statistic, suitable for use in a field name.
Definition: qgsstatisticalsummary.cpp:343
QgsStatisticalSummary::StDev
@ StDev
Standard deviation of values.
Definition: qgsstatisticalsummary.h:55
QgsStatisticalSummary::statistic
double statistic(QgsStatisticalSummary::Statistic stat) const
Returns the value of a specified statistic.
Definition: qgsstatisticalsummary.cpp:251
QgsStatisticalSummary::InterQuartileRange
@ InterQuartileRange
Inter quartile range (IQR)
Definition: qgsstatisticalsummary.h:65
QgsStatisticalSummary::ThirdQuartile
@ ThirdQuartile
Third quartile.
Definition: qgsstatisticalsummary.h:64
qgsstatisticalsummary.h
QgsStatisticalSummary::First
@ First
First value (since QGIS 3.6)
Definition: qgsstatisticalsummary.h:66
QgsStatisticalSummary::Sum
@ Sum
Sum of values.
Definition: qgsstatisticalsummary.h:52
QgsStatisticalSummary::Minority
@ Minority
Minority of values.
Definition: qgsstatisticalsummary.h:60
QgsStatisticalSummary::addValue
void addValue(double value)
Adds a single value to the statistics calculation.
Definition: qgsstatisticalsummary.cpp:84
QgsStatisticalSummary::StDevSample
@ StDevSample
Sample standard deviation of values.
Definition: qgsstatisticalsummary.h:56
QgsStatisticalSummary::finalize
void finalize()
Must be called after adding all values with addValues() and before retrieving any calculated statisti...
Definition: qgsstatisticalsummary.cpp:116
QgsStatisticalSummary::Range
@ Range
Range of values (max - min)
Definition: qgsstatisticalsummary.h:59
QgsStatisticalSummary::Majority
@ Majority
Majority of values.
Definition: qgsstatisticalsummary.h:61
QgsStatisticalSummary::QgsStatisticalSummary
QgsStatisticalSummary(QgsStatisticalSummary::Statistics stats=QgsStatisticalSummary::All)
Constructor for QgsStatisticalSummary.
Definition: qgsstatisticalsummary.cpp:27
QgsStatisticalSummary::CountMissing
@ CountMissing
Number of missing (null) values.
Definition: qgsstatisticalsummary.h:51
QgsStatisticalSummary::displayName
static QString displayName(QgsStatisticalSummary::Statistic statistic)
Returns the friendly display name for a statistic.
Definition: qgsstatisticalsummary.cpp:297
QgsStatisticalSummary::Statistic
Statistic
Enumeration of flags that specify statistics to be calculated.
Definition: qgsstatisticalsummary.h:48
QgsStatisticalSummary::Median
@ Median
Median of values.
Definition: qgsstatisticalsummary.h:54
QgsStatisticalSummary::All
@ All
Definition: qgsstatisticalsummary.h:68
QgsStatisticalSummary::Min
@ Min
Min of values.
Definition: qgsstatisticalsummary.h:57
QgsStatisticalSummary::FirstQuartile
@ FirstQuartile
First quartile.
Definition: qgsstatisticalsummary.h:63
QgsStatisticalSummary::Mean
@ Mean
Mean of values.
Definition: qgsstatisticalsummary.h:53
QgsStatisticalSummary::Max
@ Max
Max of values.
Definition: qgsstatisticalsummary.h:58
QgsStatisticalSummary::Count
@ Count
Count.
Definition: qgsstatisticalsummary.h:50
QgsStatisticalSummary::Last
@ Last
Last value (since QGIS 3.6)
Definition: qgsstatisticalsummary.h:67
QgsStatisticalSummary::addVariant
void addVariant(const QVariant &value)
Adds a single value to the statistics calculation.
Definition: qgsstatisticalsummary.cpp:101
QgsStatisticalSummary::Variety
@ Variety
Variety (count of distinct) values.
Definition: qgsstatisticalsummary.h:62
QgsStatisticalSummary::calculate
void calculate(const QList< double > &values)
Calculates summary statistics for a list of values.
Definition: qgsstatisticalsummary.cpp:72