26QString QgsBasicStatisticsAlgorithm::name()
const
28 return QStringLiteral(
"basicstatisticsforfields" );
31QString QgsBasicStatisticsAlgorithm::displayName()
const
33 return QObject::tr(
"Basic statistics for fields" );
36QStringList QgsBasicStatisticsAlgorithm::tags()
const
38 return QObject::tr(
"stats,statistics,date,time,datetime,string,number,text,table,layer,sum,maximum,minimum,mean,average,standard,deviation,count,distinct,unique,variance,median,quartile,range,majority,minority,summary" ).split(
',' );
41QString QgsBasicStatisticsAlgorithm::group()
const
43 return QObject::tr(
"Vector analysis" );
46QString QgsBasicStatisticsAlgorithm::groupId()
const
48 return QStringLiteral(
"vectoranalysis" );
51QString QgsBasicStatisticsAlgorithm::shortHelpString()
const
53 return QObject::tr(
"Generates basic statistics from the analysis of a values in a field in the attribute table of a vector layer. Numeric, date, time and string fields are supported. The statistics returned will depend on the field type." );
56QgsBasicStatisticsAlgorithm *QgsBasicStatisticsAlgorithm::createInstance()
const
58 return new QgsBasicStatisticsAlgorithm();
61void QgsBasicStatisticsAlgorithm::initAlgorithm(
const QVariantMap & )
64 addParameter(
new QgsProcessingParameterField( QStringLiteral(
"FIELD_NAME" ), QObject::tr(
"Field to calculate statistics on" ), QVariant(), QStringLiteral(
"INPUT_LAYER" ) ) );
66 addParameter(
new QgsProcessingParameterFileDestination( QStringLiteral(
"OUTPUT_HTML_FILE" ), QObject::tr(
"Statistics report" ), QObject::tr(
"'HTML files (*.html)" ), QVariant(),
true ) );
83 addOutput(
new QgsProcessingOutputNumber( QStringLiteral(
"MINORITY" ), QObject::tr(
"Minority (rarest occurring value)" ) ) );
84 addOutput(
new QgsProcessingOutputNumber( QStringLiteral(
"MAJORITY" ), QObject::tr(
"Majority (most frequently occurring value)" ) ) );
92 std::unique_ptr<QgsProcessingFeatureSource> source( parameterAsSource( parameters, QStringLiteral(
"INPUT_LAYER" ), context ) );
96 const QString fieldName = parameterAsString( parameters, QStringLiteral(
"FIELD_NAME" ), context );
97 const int fieldIndex = source->fields().lookupField( fieldName );
100 throw QgsProcessingException( QObject::tr(
"Invalid field for statistics: “%1” does not exist" ).arg( fieldName ) );
103 QgsField field = source->fields().at( fieldIndex );
105 QString outputHtml = parameterAsFileOutput( parameters, QStringLiteral(
"OUTPUT_HTML_FILE" ), context );
110 const long long count = source->featureCount();
113 fields.
append(
QgsField( QStringLiteral(
"count" ), QMetaType::Int ) );
114 fields.
append(
QgsField( QStringLiteral(
"unique" ), QMetaType::Int ) );
115 fields.
append(
QgsField( QStringLiteral(
"empty" ), QMetaType::Int ) );
116 fields.
append(
QgsField( QStringLiteral(
"filled" ), QMetaType::Int ) );
120 fields.
append(
QgsField( QStringLiteral(
"min" ), QMetaType::Double ) );
121 fields.
append(
QgsField( QStringLiteral(
"max" ), QMetaType::Double ) );
122 fields.
append(
QgsField( QStringLiteral(
"range" ), QMetaType::Double ) );
123 fields.
append(
QgsField( QStringLiteral(
"sum" ), QMetaType::Double ) );
124 fields.
append(
QgsField( QStringLiteral(
"mean" ), QMetaType::Double ) );
125 fields.
append(
QgsField( QStringLiteral(
"median" ), QMetaType::Double ) );
126 fields.
append(
QgsField( QStringLiteral(
"stddev" ), QMetaType::Double ) );
127 fields.
append(
QgsField( QStringLiteral(
"cv" ), QMetaType::Double ) );
128 fields.
append(
QgsField( QStringLiteral(
"minority" ), QMetaType::Double ) );
129 fields.
append(
QgsField( QStringLiteral(
"majority" ), QMetaType::Double ) );
130 fields.
append(
QgsField( QStringLiteral(
"q1" ), QMetaType::Double ) );
131 fields.
append(
QgsField( QStringLiteral(
"q3" ), QMetaType::Double ) );
132 fields.
append(
QgsField( QStringLiteral(
"iqr" ), QMetaType::Double ) );
136 if ( field.
type() == QMetaType::Type::QDate )
138 fields.
append(
QgsField( QStringLiteral(
"min" ), QMetaType::QDate ) );
139 fields.
append(
QgsField( QStringLiteral(
"max" ), QMetaType::QDate ) );
141 else if ( field.
type() == QMetaType::Type::QTime )
143 fields.
append(
QgsField( QStringLiteral(
"min" ), QMetaType::QTime ) );
144 fields.
append(
QgsField( QStringLiteral(
"max" ), QMetaType::QTime ) );
148 fields.
append(
QgsField( QStringLiteral(
"min" ), QMetaType::QDateTime ) );
149 fields.
append(
QgsField( QStringLiteral(
"max" ), QMetaType::QDateTime ) );
151 fields.
append(
QgsField( QStringLiteral(
"range" ), QMetaType::Double ) );
155 fields.
append(
QgsField( QStringLiteral(
"min" ), QMetaType::QString ) );
156 fields.
append(
QgsField( QStringLiteral(
"max" ), QMetaType::QString ) );
157 fields.
append(
QgsField( QStringLiteral(
"min_length" ), QMetaType::Double ) );
158 fields.
append(
QgsField( QStringLiteral(
"max_length" ), QMetaType::Double ) );
159 fields.
append(
QgsField( QStringLiteral(
"mean_length" ), QMetaType::Double ) );
160 fields.
append(
QgsField( QStringLiteral(
"minority" ), QMetaType::QString ) );
161 fields.
append(
QgsField( QStringLiteral(
"majority" ), QMetaType::QString ) );
166 if ( parameters.value( QStringLiteral(
"OUTPUT" ) ).isValid() && !sink )
170 data << QObject::tr(
"Analyzed field: %1" ).arg( fieldName );
176 outputs = calculateNumericStatistics( fieldIndex, features, count, sink.get(), data, feedback );
180 outputs = calculateDateTimeStatistics( fieldIndex, field, features, count, sink.get(), data, feedback );
184 outputs = calculateStringStatistics( fieldIndex, features, count, sink.get(), data, feedback );
189 if ( !outputHtml.isEmpty() )
191 QFile file( outputHtml );
192 if ( file.open( QIODevice::WriteOnly | QIODevice::Truncate ) )
194 QTextStream out( &file );
195#if QT_VERSION < QT_VERSION_CHECK( 6, 0, 0 )
196 out.setCodec(
"UTF-8" );
198 out << QStringLiteral(
"<html><head><meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\"/></head><body>\n" );
199 for (
const QString &s : data )
201 out << QStringLiteral(
"<p>%1</p>" ).arg( s );
203 out << QStringLiteral(
"</body></html>" );
205 outputs.insert( QStringLiteral(
"OUTPUT_HTML_FILE" ), outputHtml );
211 outputs.insert( QStringLiteral(
"OUTPUT" ), destId );
219 const double step = count > 0 ? 100.0 / count : 1;
220 long long current = 0;
238 const double cv = stat.
mean() != 0 ? stat.
stDev() / stat.
mean() : 0;
241 outputs.insert( QStringLiteral(
"COUNT" ), stat.
count() );
242 outputs.insert( QStringLiteral(
"UNIQUE" ), stat.
variety() );
243 outputs.insert( QStringLiteral(
"EMPTY" ), stat.
countMissing() );
244 outputs.insert( QStringLiteral(
"FILLED" ), count - stat.
countMissing() );
245 outputs.insert( QStringLiteral(
"MIN" ), stat.
min() );
246 outputs.insert( QStringLiteral(
"MAX" ), stat.
max() );
247 outputs.insert( QStringLiteral(
"RANGE" ), stat.
range() );
248 outputs.insert( QStringLiteral(
"SUM" ), stat.
sum() );
249 outputs.insert( QStringLiteral(
"MEAN" ), stat.
mean() );
250 outputs.insert( QStringLiteral(
"MEDIAN" ), stat.
median() );
251 outputs.insert( QStringLiteral(
"STD_DEV" ), stat.
stDev() );
252 outputs.insert( QStringLiteral(
"CV" ), cv );
253 outputs.insert( QStringLiteral(
"MINORITY" ), stat.
minority() );
254 outputs.insert( QStringLiteral(
"MAJORITY" ), stat.
majority() );
255 outputs.insert( QStringLiteral(
"FIRSTQUARTILE" ), stat.
firstQuartile() );
256 outputs.insert( QStringLiteral(
"THIRDQUARTILE" ), stat.
thirdQuartile() );
259 data << QObject::tr(
"Count: %1" ).arg( stat.
count() )
260 << QObject::tr(
"Unique values: %1" ).arg( stat.
variety() )
261 << QObject::tr(
"NULL (missing) values: %1" ).arg( stat.
countMissing() )
262 << QObject::tr(
"NOT NULL (filled) values: %1" ).arg( count - stat.
countMissing() )
263 << QObject::tr(
"Minimum value: %1" ).arg( stat.
min() )
264 << QObject::tr(
"Maximum value: %1" ).arg( stat.
max() )
265 << QObject::tr(
"Range: %1" ).arg( stat.
range() )
266 << QObject::tr(
"Sum: %1" ).arg( stat.
sum(), 0,
'f' )
267 << QObject::tr(
"Mean value: %1" ).arg( stat.
mean(), 0,
'f' )
268 << QObject::tr(
"Median value: %1" ).arg( stat.
median(), 0,
'f' )
269 << QObject::tr(
"Standard deviation: %1" ).arg( stat.
stDev(), 0,
'f', 12 )
270 << QObject::tr(
"Coefficient of Variation: %1" ).arg( cv, 0,
'f' )
271 << QObject::tr(
"Minority (rarest occurring value): %1" ).arg( stat.
minority() )
272 << QObject::tr(
"Majority (most frequently occurring value): %1" ).arg( stat.
majority() )
273 << QObject::tr(
"First quartile: %1" ).arg( stat.
firstQuartile(), 0,
'f' )
274 << QObject::tr(
"Third quartile: %1" ).arg( stat.
thirdQuartile(), 0,
'f' )
280 f.
setAttributes(
QgsAttributes() << outputs.value( QStringLiteral(
"COUNT" ) ) << outputs.value( QStringLiteral(
"UNIQUE" ) ) << outputs.value( QStringLiteral(
"EMPTY" ) ) << outputs.value( QStringLiteral(
"FILLED" ) ) << outputs.value( QStringLiteral(
"MIN" ) ) << outputs.value( QStringLiteral(
"MAX" ) ) << outputs.value( QStringLiteral(
"RANGE" ) ) << outputs.value( QStringLiteral(
"SUM" ) ) << outputs.value( QStringLiteral(
"MEAN" ) ) << outputs.value( QStringLiteral(
"MEDIAN" ) ) << outputs.value( QStringLiteral(
"STD_DEV" ) ) << outputs.value( QStringLiteral(
"CV" ) ) << outputs.value( QStringLiteral(
"MINORITY" ) ) << outputs.value( QStringLiteral(
"MAJORITY" ) ) << outputs.value( QStringLiteral(
"FIRSTQUARTILE" ) ) << outputs.value( QStringLiteral(
"THIRDQUARTILE" ) ) << outputs.value( QStringLiteral(
"IQR" ) ) );
289 const double step = count > 0 ? 100.0 / count : 1;
290 long long current = 0;
309 outputs.insert( QStringLiteral(
"COUNT" ), stat.
count() );
310 outputs.insert( QStringLiteral(
"UNIQUE" ), stat.
countDistinct() );
311 outputs.insert( QStringLiteral(
"EMPTY" ), stat.
countMissing() );
312 outputs.insert( QStringLiteral(
"FILLED" ), stat.
count() - stat.
countMissing() );
315 outputs.insert( QStringLiteral(
"RANGE" ), stat.
range().
seconds() );
317 data << QObject::tr(
"Count: %1" ).arg( stat.
count() )
318 << QObject::tr(
"Unique values: %1" ).arg( stat.
countDistinct() )
319 << QObject::tr(
"NULL (missing) values: %1" ).arg( stat.
countMissing() )
320 << QObject::tr(
"NOT NULL (filled) values: %1" ).arg( stat.
count() - stat.
countMissing() )
323 << QObject::tr(
"Range (seconds): %1" ).arg( stat.
range().
seconds() );
328 f.
setAttributes(
QgsAttributes() << outputs.value( QStringLiteral(
"COUNT" ) ) << outputs.value( QStringLiteral(
"UNIQUE" ) ) << outputs.value( QStringLiteral(
"EMPTY" ) ) << outputs.value( QStringLiteral(
"FILLED" ) ) << outputs.value( QStringLiteral(
"MIN" ) ) << outputs.value( QStringLiteral(
"MAX" ) ) << outputs.value( QStringLiteral(
"RANGE" ) ) );
337 const double step = count > 0 ? 100.0 / count : 1;
338 long long current = 0;
357 outputs.insert( QStringLiteral(
"COUNT" ), stat.
count() );
358 outputs.insert( QStringLiteral(
"UNIQUE" ), stat.
countDistinct() );
359 outputs.insert( QStringLiteral(
"EMPTY" ), stat.
countMissing() );
360 outputs.insert( QStringLiteral(
"FILLED" ), stat.
count() - stat.
countMissing() );
361 outputs.insert( QStringLiteral(
"MIN" ), stat.
min() );
362 outputs.insert( QStringLiteral(
"MAX" ), stat.
max() );
363 outputs.insert( QStringLiteral(
"MIN_LENGTH" ), stat.
minLength() );
364 outputs.insert( QStringLiteral(
"MAX_LENGTH" ), stat.
maxLength() );
365 outputs.insert( QStringLiteral(
"MEAN_LENGTH" ), stat.
meanLength() );
366 outputs.insert( QStringLiteral(
"MINORITY" ), stat.
minority() );
367 outputs.insert( QStringLiteral(
"MAJORITY" ), stat.
majority() );
369 data << QObject::tr(
"Count: %1" ).arg( stat.
count() )
370 << QObject::tr(
"Unique values: %1" ).arg( stat.
countDistinct() )
371 << QObject::tr(
"NULL (missing) values: %1" ).arg( stat.
countMissing() )
372 << QObject::tr(
"NOT NULL (filled) values: %1" ).arg( count - stat.
countMissing() )
373 << QObject::tr(
"Minimum value: %1" ).arg( stat.
min() )
374 << QObject::tr(
"Maximum value: %1" ).arg( stat.
max() )
375 << QObject::tr(
"Minimum length: %1" ).arg( stat.
minLength() )
376 << QObject::tr(
"Maximum length: %1" ).arg( stat.
maxLength() )
377 << QObject::tr(
"Mean length: %1" ).arg( stat.
meanLength(), 0,
'f' )
378 << QObject::tr(
"Minority: %1" ).arg( stat.
minority() )
379 << QObject::tr(
"Majority: %1" ).arg( stat.
majority() );
384 f.
setAttributes(
QgsAttributes() << outputs.value( QStringLiteral(
"COUNT" ) ) << outputs.value( QStringLiteral(
"UNIQUE" ) ) << outputs.value( QStringLiteral(
"EMPTY" ) ) << outputs.value( QStringLiteral(
"FILLED" ) ) << outputs.value( QStringLiteral(
"MIN" ) ) << outputs.value( QStringLiteral(
"MAX" ) ) << outputs.value( QStringLiteral(
"MIN_LENGTH" ) ) << outputs.value( QStringLiteral(
"MAX_LENGTH" ) ) << outputs.value( QStringLiteral(
"MEAN_LENGTH" ) ) << outputs.value( QStringLiteral(
"MINORITY" ) ) << outputs.value( QStringLiteral(
"MAJORITY" ) ) );
@ Vector
Tables (i.e. vector layers with or without geometry). When used for a sink this indicates the sink ha...
@ NoGeometry
Geometry is not required. It may still be returned if e.g. required for a filter condition.
@ SkipGeometryValidityChecks
Invalid geometry checks should always be skipped. This flag can be useful for algorithms which always...
@ Max
Maximum (latest) datetime value.
@ Min
Minimum (earliest) datetime value.
This class represents a coordinate reference system (CRS).
Calculator for summary statistics and aggregates for a list of datetimes.
QVariant statistic(Qgis::DateTimeStatistic stat) const
Returns the value of a specified statistic.
QgsInterval range() const
Returns the range (interval between earliest and latest non-null datetime values).
void addValue(const QVariant &value)
Adds a single datetime to the statistics calculation.
void finalize()
Must be called after adding all datetimes with addValue() and before retrieving any calculated dateti...
int count() const
Returns the calculated count of values.
int countMissing() const
Returns the number of missing (null) datetime values.
int countDistinct() const
Returns the number of distinct datetime values.
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
Fetch next feature and stores in f, returns true on success.
This class wraps a request for features to a vector layer (or directly its vector data provider).
QgsFeatureRequest & setFlags(Qgis::FeatureRequestFlags flags)
Sets flags that affect how features will be fetched.
QgsFeatureRequest & setSubsetOfAttributes(const QgsAttributeList &attrs)
Set a subset of attributes that will be fetched.
An interface for objects which accept features via addFeature(s) methods.
virtual bool addFeature(QgsFeature &feature, QgsFeatureSink::Flags flags=QgsFeatureSink::Flags())
Adds a single feature to the sink.
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
void setAttributes(const QgsAttributes &attrs)
Sets the feature's attributes.
Q_INVOKABLE QVariant attribute(const QString &name) const
Lookup attribute value by attribute name.
bool isCanceled() const
Tells whether the operation has been canceled already.
void setProgress(double progress)
Sets the current progress for the feedback object.
Encapsulate a field in an attribute table or data source.
QString displayString(const QVariant &v) const
Formats string for display.
Container of fields for a vector layer.
bool append(const QgsField &field, Qgis::FieldOrigin origin=Qgis::FieldOrigin::Provider, int originIndex=-1)
Appends a field.
double seconds() const
Returns the interval duration in seconds.
Contains information about the context in which a processing algorithm is executed.
Custom exception class for processing related exceptions.
Base class for providing feedback from a processing algorithm.
A numeric output for processing algorithms.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A vector layer or feature source field parameter for processing algorithms.
A generic file based destination parameter, for specifying the destination path for a file (non-map l...
Calculator for summary statistics for a list of doubles.
void addVariant(const QVariant &value)
Adds a single value to the statistics calculation.
double firstQuartile() const
Returns the first quartile of the values.
double sum() const
Returns calculated sum of values.
double mean() const
Returns calculated mean of values.
double majority() const
Returns majority of values.
int countMissing() const
Returns the number of missing (null) values.
double interQuartileRange() const
Returns the inter quartile range of the values.
double median() const
Returns calculated median of values.
double minority() const
Returns minority of values.
double min() const
Returns calculated minimum from values.
double stDev() const
Returns population standard deviation.
double thirdQuartile() const
Returns the third quartile of the values.
int count() const
Returns calculated count of values.
double range() const
Returns calculated range (difference between maximum and minimum values).
double max() const
Returns calculated maximum from values.
void finalize()
Must be called after adding all values with addValues() and before retrieving any calculated statisti...
int variety() const
Returns variety of values.
Calculator for summary statistics and aggregates for a list of strings.
QString max() const
Returns the maximum (non-null) string value.
QString min() const
Returns the minimum (non-null) string value.
int countMissing() const
Returns the number of missing (null) string values.
int count() const
Returns the calculated count of values.
int countDistinct() const
Returns the number of distinct string values.
void finalize()
Must be called after adding all strings with addString() and before retrieving any calculated string ...
void addValue(const QVariant &value)
Adds a single variant to the statistics calculation.
int minLength() const
Returns the minimum length of strings.
int maxLength() const
Returns the maximum length of strings.
QString majority() const
Returns the most common string.
QString minority() const
Returns the least common string.
double meanLength() const
Returns the mean length of strings.