27QString QgsBasicStatisticsAlgorithm::name()
const
29 return QStringLiteral(
"basicstatisticsforfields" );
32QString QgsBasicStatisticsAlgorithm::displayName()
const
34 return QObject::tr(
"Basic statistics for fields" );
37QStringList QgsBasicStatisticsAlgorithm::tags()
const
39 return QObject::tr(
"stats,statistics,date,time,datetime,string,number,text,table,layer,sum,maximum,minimum,mean,average,standard,deviation,count,distinct,unique,variance,median,quartile,range,majority,minority,summary" ).split(
',' );
42QString QgsBasicStatisticsAlgorithm::group()
const
44 return QObject::tr(
"Vector analysis" );
47QString QgsBasicStatisticsAlgorithm::groupId()
const
49 return QStringLiteral(
"vectoranalysis" );
52QString QgsBasicStatisticsAlgorithm::shortHelpString()
const
54 return QObject::tr(
"Generates basic statistics from the analysis of a values in a field in the attribute table of a vector layer. Numeric, date, time and string fields are supported. The statistics returned will depend on the field type." );
57QgsBasicStatisticsAlgorithm *QgsBasicStatisticsAlgorithm::createInstance()
const
59 return new QgsBasicStatisticsAlgorithm();
62void QgsBasicStatisticsAlgorithm::initAlgorithm(
const QVariantMap & )
65 addParameter(
new QgsProcessingParameterField( QStringLiteral(
"FIELD_NAME" ), QObject::tr(
"Field to calculate statistics on" ), QVariant(), QStringLiteral(
"INPUT_LAYER" ) ) );
67 addParameter(
new QgsProcessingParameterFileDestination( QStringLiteral(
"OUTPUT_HTML_FILE" ), QObject::tr(
"Statistics report" ), QObject::tr(
"'HTML files (*.html)" ), QVariant(),
true ) );
84 addOutput(
new QgsProcessingOutputNumber( QStringLiteral(
"MINORITY" ), QObject::tr(
"Minority (rarest occurring value)" ) ) );
85 addOutput(
new QgsProcessingOutputNumber( QStringLiteral(
"MAJORITY" ), QObject::tr(
"Majority (most frequently occurring value)" ) ) );
93 std::unique_ptr< QgsProcessingFeatureSource > source( parameterAsSource( parameters, QStringLiteral(
"INPUT_LAYER" ), context ) );
97 const QString fieldName = parameterAsString( parameters, QStringLiteral(
"FIELD_NAME" ), context );
98 const int fieldIndex = source->fields().lookupField( fieldName );
101 throw QgsProcessingException( QObject::tr(
"Invalid field for statistics: “%1” does not exist" ).arg( fieldName ) );
104 QgsField field = source->fields().at( fieldIndex );
106 QString outputHtml = parameterAsFileOutput( parameters, QStringLiteral(
"OUTPUT_HTML_FILE" ), context );
111 const long long count = source->featureCount();
114 fields.
append(
QgsField( QStringLiteral(
"count" ), QMetaType::Int ) );
115 fields.
append(
QgsField( QStringLiteral(
"unique" ), QMetaType::Int ) );
116 fields.
append(
QgsField( QStringLiteral(
"empty" ), QMetaType::Int ) );
117 fields.
append(
QgsField( QStringLiteral(
"filled" ), QMetaType::Int ) );
121 fields.
append(
QgsField( QStringLiteral(
"min" ), QMetaType::Double ) );
122 fields.
append(
QgsField( QStringLiteral(
"max" ), QMetaType::Double ) );
123 fields.
append(
QgsField( QStringLiteral(
"range" ), QMetaType::Double ) );
124 fields.
append(
QgsField( QStringLiteral(
"sum" ), QMetaType::Double ) );
125 fields.
append(
QgsField( QStringLiteral(
"mean" ), QMetaType::Double ) );
126 fields.
append(
QgsField( QStringLiteral(
"median" ), QMetaType::Double ) );
127 fields.
append(
QgsField( QStringLiteral(
"stddev" ), QMetaType::Double ) );
128 fields.
append(
QgsField( QStringLiteral(
"cv" ), QMetaType::Double ) );
129 fields.
append(
QgsField( QStringLiteral(
"minority" ), QMetaType::Double ) );
130 fields.
append(
QgsField( QStringLiteral(
"majority" ), QMetaType::Double ) );
131 fields.
append(
QgsField( QStringLiteral(
"q1" ), QMetaType::Double ) );
132 fields.
append(
QgsField( QStringLiteral(
"q3" ), QMetaType::Double ) );
133 fields.
append(
QgsField( QStringLiteral(
"iqr" ), QMetaType::Double ) );
137 if ( field.
type() == QMetaType::Type::QDate )
139 fields.
append(
QgsField( QStringLiteral(
"min" ), QMetaType::QDate ) );
140 fields.
append(
QgsField( QStringLiteral(
"max" ), QMetaType::QDate ) );
142 else if ( field.
type() == QMetaType::Type::QTime )
144 fields.
append(
QgsField( QStringLiteral(
"min" ), QMetaType::QTime ) );
145 fields.
append(
QgsField( QStringLiteral(
"max" ), QMetaType::QTime ) );
149 fields.
append(
QgsField( QStringLiteral(
"min" ), QMetaType::QDateTime ) );
150 fields.
append(
QgsField( QStringLiteral(
"max" ), QMetaType::QDateTime ) );
152 fields.
append(
QgsField( QStringLiteral(
"range" ), QMetaType::Double ) );
156 fields.
append(
QgsField( QStringLiteral(
"min" ), QMetaType::QString ) );
157 fields.
append(
QgsField( QStringLiteral(
"max" ), QMetaType::QString ) );
158 fields.
append(
QgsField( QStringLiteral(
"min_length" ), QMetaType::Double ) );
159 fields.
append(
QgsField( QStringLiteral(
"max_length" ), QMetaType::Double ) );
160 fields.
append(
QgsField( QStringLiteral(
"mean_length" ), QMetaType::Double ) );
161 fields.
append(
QgsField( QStringLiteral(
"minority" ), QMetaType::QString ) );
162 fields.
append(
QgsField( QStringLiteral(
"majority" ), QMetaType::QString ) );
167 if ( parameters.value( QStringLiteral(
"OUTPUT" ) ).isValid() && !sink )
171 data << QObject::tr(
"Analyzed field: %1" ).arg( fieldName );
177 outputs = calculateNumericStatistics( fieldIndex, features, count, sink.get(), data, feedback );
181 outputs = calculateDateTimeStatistics( fieldIndex, field, features, count, sink.get(), data, feedback );
185 outputs = calculateStringStatistics( fieldIndex, features, count, sink.get(), data, feedback );
188 if ( !outputHtml.isEmpty() )
190 QFile file( outputHtml );
191 if ( file.open( QIODevice::WriteOnly | QIODevice::Truncate ) )
193 QTextStream out( &file );
194#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
195 out.setCodec(
"UTF-8" );
197 out << QStringLiteral(
"<html><head><meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\"/></head><body>\n" );
198 for (
const QString &s : data )
200 out << QStringLiteral(
"<p>%1</p>" ).arg( s );
202 out << QStringLiteral(
"</body></html>" );
204 outputs.insert( QStringLiteral(
"OUTPUT_HTML_FILE" ), outputHtml );
210 outputs.insert( QStringLiteral(
"OUTPUT" ), destId );
218 const double step = count > 0 ? 100.0 / count : 1;
219 long long current = 0;
237 const double cv = stat.
mean() != 0 ? stat.
stDev() / stat.
mean() : 0;
240 outputs.insert( QStringLiteral(
"COUNT" ), stat.
count() );
241 outputs.insert( QStringLiteral(
"UNIQUE" ), stat.
variety() );
242 outputs.insert( QStringLiteral(
"EMPTY" ), stat.
countMissing() );
243 outputs.insert( QStringLiteral(
"FILLED" ), count - stat.
countMissing() );
244 outputs.insert( QStringLiteral(
"MIN" ), stat.
min() );
245 outputs.insert( QStringLiteral(
"MAX" ), stat.
max() );
246 outputs.insert( QStringLiteral(
"RANGE" ), stat.
range() );
247 outputs.insert( QStringLiteral(
"SUM" ), stat.
sum() );
248 outputs.insert( QStringLiteral(
"MEAN" ), stat.
mean() );
249 outputs.insert( QStringLiteral(
"MEDIAN" ), stat.
median() );
250 outputs.insert( QStringLiteral(
"STD_DEV" ), stat.
stDev() );
251 outputs.insert( QStringLiteral(
"CV" ), cv );
252 outputs.insert( QStringLiteral(
"MINORITY" ), stat.
minority() );
253 outputs.insert( QStringLiteral(
"MAJORITY" ), stat.
majority() );
254 outputs.insert( QStringLiteral(
"FIRSTQUARTILE" ), stat.
firstQuartile() );
255 outputs.insert( QStringLiteral(
"THIRDQUARTILE" ), stat.
thirdQuartile() );
258 data << QObject::tr(
"Count: %1" ).arg( stat.
count() )
259 << QObject::tr(
"Unique values: %1" ).arg( stat.
variety() )
260 << QObject::tr(
"NULL (missing) values: %1" ).arg( stat.
countMissing() )
261 << QObject::tr(
"NOT NULL (filled) values: %1" ).arg( count - stat.
countMissing() )
262 << QObject::tr(
"Minimum value: %1" ).arg( stat.
min() )
263 << QObject::tr(
"Maximum value: %1" ).arg( stat.
max() )
264 << QObject::tr(
"Range: %1" ).arg( stat.
range() )
265 << QObject::tr(
"Sum: %1" ).arg( stat.
sum(), 0,
'f' )
266 << QObject::tr(
"Mean value: %1" ).arg( stat.
mean(), 0,
'f' )
267 << QObject::tr(
"Median value: %1" ).arg( stat.
median(), 0,
'f' )
268 << QObject::tr(
"Standard deviation: %1" ).arg( stat.
stDev(), 0,
'f', 12 )
269 << QObject::tr(
"Coefficient of Variation: %1" ).arg( cv, 0,
'f' )
270 << QObject::tr(
"Minority (rarest occurring value): %1" ).arg( stat.
minority() )
271 << QObject::tr(
"Majority (most frequently occurring value): %1" ).arg( stat.
majority() )
272 << QObject::tr(
"First quartile: %1" ).arg( stat.
firstQuartile(), 0,
'f' )
273 << QObject::tr(
"Third quartile: %1" ).arg( stat.
thirdQuartile(), 0,
'f' )
280 << outputs.value( QStringLiteral(
"UNIQUE" ) )
281 << outputs.value( QStringLiteral(
"EMPTY" ) )
282 << outputs.value( QStringLiteral(
"FILLED" ) )
283 << outputs.value( QStringLiteral(
"MIN" ) )
284 << outputs.value( QStringLiteral(
"MAX" ) )
285 << outputs.value( QStringLiteral(
"RANGE" ) )
286 << outputs.value( QStringLiteral(
"SUM" ) )
287 << outputs.value( QStringLiteral(
"MEAN" ) )
288 << outputs.value( QStringLiteral(
"MEDIAN" ) )
289 << outputs.value( QStringLiteral(
"STD_DEV" ) )
290 << outputs.value( QStringLiteral(
"CV" ) )
291 << outputs.value( QStringLiteral(
"MINORITY" ) )
292 << outputs.value( QStringLiteral(
"MAJORITY" ) )
293 << outputs.value( QStringLiteral(
"FIRSTQUARTILE" ) )
294 << outputs.value( QStringLiteral(
"THIRDQUARTILE" ) )
295 << outputs.value( QStringLiteral(
"IQR" ) ) );
304 const double step = count > 0 ? 100.0 / count : 1;
305 long long current = 0;
324 outputs.insert( QStringLiteral(
"COUNT" ), stat.
count() );
325 outputs.insert( QStringLiteral(
"UNIQUE" ), stat.
countDistinct() );
326 outputs.insert( QStringLiteral(
"EMPTY" ), stat.
countMissing() );
327 outputs.insert( QStringLiteral(
"FILLED" ), stat.
count() - stat.
countMissing() );
330 outputs.insert( QStringLiteral(
"RANGE" ), stat.
range().
seconds() );
332 data << QObject::tr(
"Count: %1" ).arg( stat.
count() )
333 << QObject::tr(
"Unique values: %1" ).arg( stat.
countDistinct() )
334 << QObject::tr(
"NULL (missing) values: %1" ).arg( stat.
countMissing() )
335 << QObject::tr(
"NOT NULL (filled) values: %1" ).arg( stat.
count() - stat.
countMissing() )
338 << QObject::tr(
"Range (seconds): %1" ).arg( stat.
range().
seconds() );
344 << outputs.value( QStringLiteral(
"UNIQUE" ) )
345 << outputs.value( QStringLiteral(
"EMPTY" ) )
346 << outputs.value( QStringLiteral(
"FILLED" ) )
347 << outputs.value( QStringLiteral(
"MIN" ) )
348 << outputs.value( QStringLiteral(
"MAX" ) )
349 << outputs.value( QStringLiteral(
"RANGE" ) ) );
358 const double step = count > 0 ? 100.0 / count : 1;
359 long long current = 0;
378 outputs.insert( QStringLiteral(
"COUNT" ), stat.
count() );
379 outputs.insert( QStringLiteral(
"UNIQUE" ), stat.
countDistinct() );
380 outputs.insert( QStringLiteral(
"EMPTY" ), stat.
countMissing() );
381 outputs.insert( QStringLiteral(
"FILLED" ), stat.
count() - stat.
countMissing() );
382 outputs.insert( QStringLiteral(
"MIN" ), stat.
min() );
383 outputs.insert( QStringLiteral(
"MAX" ), stat.
max() );
384 outputs.insert( QStringLiteral(
"MIN_LENGTH" ), stat.
minLength() );
385 outputs.insert( QStringLiteral(
"MAX_LENGTH" ), stat.
maxLength() );
386 outputs.insert( QStringLiteral(
"MEAN_LENGTH" ), stat.
meanLength() );
387 outputs.insert( QStringLiteral(
"MINORITY" ), stat.
minority() );
388 outputs.insert( QStringLiteral(
"MAJORITY" ), stat.
majority() );
390 data << QObject::tr(
"Count: %1" ).arg( stat.
count() )
391 << QObject::tr(
"Unique values: %1" ).arg( stat.
countDistinct() )
392 << QObject::tr(
"NULL (missing) values: %1" ).arg( stat.
countMissing() )
393 << QObject::tr(
"NOT NULL (filled) values: %1" ).arg( count - stat.
countMissing() )
394 << QObject::tr(
"Minimum value: %1" ).arg( stat.
min() )
395 << QObject::tr(
"Maximum value: %1" ).arg( stat.
max() )
396 << QObject::tr(
"Minimum length: %1" ).arg( stat.
minLength() )
397 << QObject::tr(
"Maximum length: %1" ).arg( stat.
maxLength() )
398 << QObject::tr(
"Mean length: %1" ).arg( stat.
meanLength(), 0,
'f' )
399 << QObject::tr(
"Minority: %1" ).arg( stat.
minority() )
400 << QObject::tr(
"Majority: %1" ).arg( stat.
majority() );
406 << outputs.value( QStringLiteral(
"UNIQUE" ) )
407 << outputs.value( QStringLiteral(
"EMPTY" ) )
408 << outputs.value( QStringLiteral(
"FILLED" ) )
409 << outputs.value( QStringLiteral(
"MIN" ) )
410 << outputs.value( QStringLiteral(
"MAX" ) )
411 << outputs.value( QStringLiteral(
"MIN_LENGTH" ) )
412 << outputs.value( QStringLiteral(
"MAX_LENGTH" ) )
413 << outputs.value( QStringLiteral(
"MEAN_LENGTH" ) )
414 << outputs.value( QStringLiteral(
"MINORITY" ) )
415 << outputs.value( QStringLiteral(
"MAJORITY" ) ) );
@ Vector
Tables (i.e. vector layers with or without geometry). When used for a sink this indicates the sink ha...
@ NoGeometry
Geometry is not required. It may still be returned if e.g. required for a filter condition.
@ SkipGeometryValidityChecks
Invalid geometry checks should always be skipped. This flag can be useful for algorithms which always...
@ Max
Maximum (latest) datetime value.
@ Min
Minimum (earliest) datetime value.
This class represents a coordinate reference system (CRS).
Calculator for summary statistics and aggregates for a list of datetimes.
QVariant statistic(Qgis::DateTimeStatistic stat) const
Returns the value of a specified statistic.
QgsInterval range() const
Returns the range (interval between earliest and latest non-null datetime values).
void addValue(const QVariant &value)
Adds a single datetime to the statistics calculation.
void finalize()
Must be called after adding all datetimes with addValue() and before retrieving any calculated dateti...
int count() const
Returns the calculated count of values.
int countMissing() const
Returns the number of missing (null) datetime values.
int countDistinct() const
Returns the number of distinct datetime values.
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
Fetch next feature and stores in f, returns true on success.
This class wraps a request for features to a vector layer (or directly its vector data provider).
QgsFeatureRequest & setFlags(Qgis::FeatureRequestFlags flags)
Sets flags that affect how features will be fetched.
QgsFeatureRequest & setSubsetOfAttributes(const QgsAttributeList &attrs)
Set a subset of attributes that will be fetched.
An interface for objects which accept features via addFeature(s) methods.
virtual bool addFeature(QgsFeature &feature, QgsFeatureSink::Flags flags=QgsFeatureSink::Flags())
Adds a single feature to the sink.
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
void setAttributes(const QgsAttributes &attrs)
Sets the feature's attributes.
Q_INVOKABLE QVariant attribute(const QString &name) const
Lookup attribute value by attribute name.
bool isCanceled() const
Tells whether the operation has been canceled already.
void setProgress(double progress)
Sets the current progress for the feedback object.
Encapsulate a field in an attribute table or data source.
QString displayString(const QVariant &v) const
Formats string for display.
Container of fields for a vector layer.
bool append(const QgsField &field, Qgis::FieldOrigin origin=Qgis::FieldOrigin::Provider, int originIndex=-1)
Appends a field.
double seconds() const
Returns the interval duration in seconds.
Contains information about the context in which a processing algorithm is executed.
Custom exception class for processing related exceptions.
Base class for providing feedback from a processing algorithm.
A numeric output for processing algorithms.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A vector layer or feature source field parameter for processing algorithms.
A generic file based destination parameter, for specifying the destination path for a file (non-map l...
Calculator for summary statistics for a list of doubles.
void addVariant(const QVariant &value)
Adds a single value to the statistics calculation.
double firstQuartile() const
Returns the first quartile of the values.
double sum() const
Returns calculated sum of values.
double mean() const
Returns calculated mean of values.
double majority() const
Returns majority of values.
int countMissing() const
Returns the number of missing (null) values.
double interQuartileRange() const
Returns the inter quartile range of the values.
double median() const
Returns calculated median of values.
double minority() const
Returns minority of values.
double min() const
Returns calculated minimum from values.
double stDev() const
Returns population standard deviation.
double thirdQuartile() const
Returns the third quartile of the values.
int count() const
Returns calculated count of values.
double range() const
Returns calculated range (difference between maximum and minimum values).
double max() const
Returns calculated maximum from values.
void finalize()
Must be called after adding all values with addValues() and before retrieving any calculated statisti...
int variety() const
Returns variety of values.
Calculator for summary statistics and aggregates for a list of strings.
QString max() const
Returns the maximum (non-null) string value.
QString min() const
Returns the minimum (non-null) string value.
int countMissing() const
Returns the number of missing (null) string values.
int count() const
Returns the calculated count of values.
int countDistinct() const
Returns the number of distinct string values.
void finalize()
Must be called after adding all strings with addString() and before retrieving any calculated string ...
void addValue(const QVariant &value)
Adds a single variant to the statistics calculation.
int minLength() const
Returns the minimum length of strings.
int maxLength() const
Returns the maximum length of strings.
QString majority() const
Returns the most common string.
QString minority() const
Returns the least common string.
double meanLength() const
Returns the mean length of strings.