26using namespace Qt::StringLiterals;
30QString QgsBasicStatisticsAlgorithm::name()
const
32 return u
"basicstatisticsforfields"_s;
35QString QgsBasicStatisticsAlgorithm::displayName()
const
37 return QObject::tr(
"Basic statistics for fields" );
40QStringList QgsBasicStatisticsAlgorithm::tags()
const
42 return QObject::tr(
"stats,statistics,date,time,datetime,string,number,text,table,layer,sum,maximum,minimum,mean,average,standard,deviation,count,distinct,unique,variance,median,quartile,range,majority,minority,summary" )
46QString QgsBasicStatisticsAlgorithm::group()
const
48 return QObject::tr(
"Vector analysis" );
51QString QgsBasicStatisticsAlgorithm::groupId()
const
53 return u
"vectoranalysis"_s;
56QString QgsBasicStatisticsAlgorithm::shortHelpString()
const
59 "This algorithm generates basic statistics from the analysis of values in a field in the attribute table of a vector layer. Numeric, date, time and string fields are supported. The statistics "
60 "returned will depend on the field type."
64QString QgsBasicStatisticsAlgorithm::shortDescription()
const
66 return QObject::tr(
"Generates basic statistics from the values in a field of a vector layer." );
69QgsBasicStatisticsAlgorithm *QgsBasicStatisticsAlgorithm::createInstance()
const
71 return new QgsBasicStatisticsAlgorithm();
74void QgsBasicStatisticsAlgorithm::initAlgorithm(
const QVariantMap & )
77 addParameter(
new QgsProcessingParameterField( u
"FIELD_NAME"_s, QObject::tr(
"Field to calculate statistics on" ), QVariant(), u
"INPUT_LAYER"_s ) );
105 std::unique_ptr<QgsProcessingFeatureSource> source( parameterAsSource( parameters, u
"INPUT_LAYER"_s, context ) );
109 const QString fieldName = parameterAsString( parameters, u
"FIELD_NAME"_s, context );
110 const int fieldIndex = source->fields().lookupField( fieldName );
111 if ( fieldIndex < 0 )
113 throw QgsProcessingException( QObject::tr(
"Invalid field for statistics: “%1” does not exist" ).arg( fieldName ) );
116 QgsField field = source->fields().at( fieldIndex );
118 QString outputHtml = parameterAsFileOutput( parameters, u
"OUTPUT_HTML_FILE"_s, context );
123 const long long count = source->featureCount();
149 if ( field.
type() == QMetaType::Type::QDate )
154 else if ( field.
type() == QMetaType::Type::QTime )
179 if ( parameters.value( u
"OUTPUT"_s ).isValid() && !sink )
183 data << QObject::tr(
"Analyzed field: %1" ).arg( fieldName );
189 outputs = calculateNumericStatistics( parameters, fieldIndex, features, count, sink.get(), data, feedback );
193 outputs = calculateDateTimeStatistics( parameters, fieldIndex, field, features, count, sink.get(), data, feedback );
197 outputs = calculateStringStatistics( parameters, fieldIndex, features, count, sink.get(), data, feedback );
202 if ( !outputHtml.isEmpty() )
204 QFile file( outputHtml );
205 if ( file.open( QIODevice::WriteOnly | QIODevice::Truncate ) )
207 QTextStream out( &file );
208 out << u
"<html><head><meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\"/></head><body>\n"_s;
209 for (
const QString &s : data )
211 out << u
"<p>%1</p>"_s.arg( s );
213 out << u
"</body></html>"_s;
215 outputs.insert( u
"OUTPUT_HTML_FILE"_s, outputHtml );
221 outputs.insert( u
"OUTPUT"_s, destId );
227QVariantMap QgsBasicStatisticsAlgorithm::calculateNumericStatistics(
231 const double step = count > 0 ? 100.0 / count : 1;
232 long long current = 0;
250 const double cv = stat.
mean() != 0 ? stat.
stDev() / stat.
mean() : 0;
253 outputs.insert( u
"COUNT"_s, stat.
count() );
254 outputs.insert( u
"UNIQUE"_s, stat.
variety() );
256 outputs.insert( u
"FILLED"_s, count - stat.
countMissing() );
257 outputs.insert( u
"MIN"_s, stat.
min() );
258 outputs.insert( u
"MAX"_s, stat.
max() );
259 outputs.insert( u
"RANGE"_s, stat.
range() );
260 outputs.insert( u
"SUM"_s, stat.
sum() );
261 outputs.insert( u
"MEAN"_s, stat.
mean() );
262 outputs.insert( u
"MEDIAN"_s, stat.
median() );
263 outputs.insert( u
"STD_DEV"_s, stat.
stDev() );
264 outputs.insert( u
"CV"_s, cv );
265 outputs.insert( u
"MINORITY"_s, stat.
minority() );
266 outputs.insert( u
"MAJORITY"_s, stat.
majority() );
272 << QObject::tr(
"Count: %1" ).arg( stat.
count() )
273 << QObject::tr(
"Unique values: %1" ).arg( stat.
variety() )
274 << QObject::tr(
"NULL (missing) values: %1" ).arg( stat.
countMissing() )
275 << QObject::tr(
"NOT NULL (filled) values: %1" ).arg( count - stat.
countMissing() )
276 << QObject::tr(
"Minimum value: %1" ).arg( stat.
min() )
277 << QObject::tr(
"Maximum value: %1" ).arg( stat.
max() )
278 << QObject::tr(
"Range: %1" ).arg( stat.
range() )
279 << QObject::tr(
"Sum: %1" ).arg( stat.
sum(), 0,
'f' )
280 << QObject::tr(
"Mean value: %1" ).arg( stat.
mean(), 0,
'f' )
281 << QObject::tr(
"Median value: %1" ).arg( stat.
median(), 0,
'f' )
282 << QObject::tr(
"Standard deviation: %1" ).arg( stat.
stDev(), 0,
'f', 12 )
283 << QObject::tr(
"Coefficient of Variation: %1" ).arg( cv, 0,
'f' )
284 << QObject::tr(
"Minority (rarest occurring value): %1" ).arg( stat.
minority() )
285 << QObject::tr(
"Majority (most frequently occurring value): %1" ).arg( stat.
majority() )
286 << QObject::tr(
"First quartile: %1" ).arg( stat.
firstQuartile(), 0,
'f' )
287 << QObject::tr(
"Third quartile: %1" ).arg( stat.
thirdQuartile(), 0,
'f' )
295 << outputs.value( u
"COUNT"_s )
296 << outputs.value( u
"UNIQUE"_s )
297 << outputs.value( u
"EMPTY"_s )
298 << outputs.value( u
"FILLED"_s )
299 << outputs.value( u
"MIN"_s )
300 << outputs.value( u
"MAX"_s )
301 << outputs.value( u
"RANGE"_s )
302 << outputs.value( u
"SUM"_s )
303 << outputs.value( u
"MEAN"_s )
304 << outputs.value( u
"MEDIAN"_s )
305 << outputs.value( u
"STD_DEV"_s )
306 << outputs.value( u
"CV"_s )
307 << outputs.value( u
"MINORITY"_s )
308 << outputs.value( u
"MAJORITY"_s )
309 << outputs.value( u
"FIRSTQUARTILE"_s )
310 << outputs.value( u
"THIRDQUARTILE"_s )
311 << outputs.value( u
"IQR"_s )
322QVariantMap QgsBasicStatisticsAlgorithm::calculateDateTimeStatistics(
326 const double step = count > 0 ? 100.0 / count : 1;
327 long long current = 0;
346 outputs.insert( u
"COUNT"_s, stat.
count() );
355 << QObject::tr(
"Count: %1" ).arg( stat.
count() )
356 << QObject::tr(
"Unique values: %1" ).arg( stat.
countDistinct() )
357 << QObject::tr(
"NULL (missing) values: %1" ).arg( stat.
countMissing() )
358 << QObject::tr(
"NOT NULL (filled) values: %1" ).arg( stat.
count() - stat.
countMissing() )
361 << QObject::tr(
"Range (seconds): %1" ).arg( stat.
range().
seconds() );
368 << outputs.value( u
"COUNT"_s )
369 << outputs.value( u
"UNIQUE"_s )
370 << outputs.value( u
"EMPTY"_s )
371 << outputs.value( u
"FILLED"_s )
372 << outputs.value( u
"MIN"_s )
373 << outputs.value( u
"MAX"_s )
374 << outputs.value( u
"RANGE"_s )
385QVariantMap QgsBasicStatisticsAlgorithm::calculateStringStatistics(
389 const double step = count > 0 ? 100.0 / count : 1;
390 long long current = 0;
409 outputs.insert( u
"COUNT"_s, stat.
count() );
413 outputs.insert( u
"MIN"_s, stat.
min() );
414 outputs.insert( u
"MAX"_s, stat.
max() );
415 outputs.insert( u
"MIN_LENGTH"_s, stat.
minLength() );
416 outputs.insert( u
"MAX_LENGTH"_s, stat.
maxLength() );
417 outputs.insert( u
"MEAN_LENGTH"_s, stat.
meanLength() );
418 outputs.insert( u
"MINORITY"_s, stat.
minority() );
419 outputs.insert( u
"MAJORITY"_s, stat.
majority() );
422 << QObject::tr(
"Count: %1" ).arg( stat.
count() )
423 << QObject::tr(
"Unique values: %1" ).arg( stat.
countDistinct() )
424 << QObject::tr(
"NULL (missing) values: %1" ).arg( stat.
countMissing() )
425 << QObject::tr(
"NOT NULL (filled) values: %1" ).arg( count - stat.
countMissing() )
426 << QObject::tr(
"Minimum value: %1" ).arg( stat.
min() )
427 << QObject::tr(
"Maximum value: %1" ).arg( stat.
max() )
428 << QObject::tr(
"Minimum length: %1" ).arg( stat.
minLength() )
429 << QObject::tr(
"Maximum length: %1" ).arg( stat.
maxLength() )
430 << QObject::tr(
"Mean length: %1" ).arg( stat.
meanLength(), 0,
'f' )
431 << QObject::tr(
"Minority: %1" ).arg( stat.
minority() )
432 << QObject::tr(
"Majority: %1" ).arg( stat.
majority() );
439 << outputs.value( u
"COUNT"_s )
440 << outputs.value( u
"UNIQUE"_s )
441 << outputs.value( u
"EMPTY"_s )
442 << outputs.value( u
"FILLED"_s )
443 << outputs.value( u
"MIN"_s )
444 << outputs.value( u
"MAX"_s )
445 << outputs.value( u
"MIN_LENGTH"_s )
446 << outputs.value( u
"MAX_LENGTH"_s )
447 << outputs.value( u
"MEAN_LENGTH"_s )
448 << outputs.value( u
"MINORITY"_s )
449 << outputs.value( u
"MAJORITY"_s )
@ Vector
Tables (i.e. vector layers with or without geometry). When used for a sink this indicates the sink ha...
@ NoGeometry
Geometry is not required. It may still be returned if e.g. required for a filter condition.
@ SkipGeometryValidityChecks
Invalid geometry checks should always be skipped. This flag can be useful for algorithms which always...
@ Max
Maximum (latest) datetime value.
@ Min
Minimum (earliest) datetime value.
Represents a coordinate reference system (CRS).
Calculator for summary statistics and aggregates for a list of datetimes.
QVariant statistic(Qgis::DateTimeStatistic stat) const
Returns the value of a specified statistic.
QgsInterval range() const
Returns the range (interval between earliest and latest non-null datetime values).
void addValue(const QVariant &value)
Adds a single datetime to the statistics calculation.
void finalize()
Must be called after adding all datetimes with addValue() and before retrieving any calculated dateti...
int count() const
Returns the calculated count of values.
int countMissing() const
Returns the number of missing (null) datetime values.
int countDistinct() const
Returns the number of distinct datetime values.
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
Fetch next feature and stores in f, returns true on success.
Wraps a request for features to a vector layer (or directly its vector data provider).
QgsFeatureRequest & setFlags(Qgis::FeatureRequestFlags flags)
Sets flags that affect how features will be fetched.
QgsFeatureRequest & setSubsetOfAttributes(const QgsAttributeList &attrs)
Set a subset of attributes that will be fetched.
An interface for objects which accept features via addFeature(s) methods.
virtual bool addFeature(QgsFeature &feature, QgsFeatureSink::Flags flags=QgsFeatureSink::Flags())
Adds a single feature to the sink.
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
void setAttributes(const QgsAttributes &attrs)
Sets the feature's attributes.
Q_INVOKABLE QVariant attribute(const QString &name) const
Lookup attribute value by attribute name.
bool isCanceled() const
Tells whether the operation has been canceled already.
void setProgress(double progress)
Sets the current progress for the feedback object.
Encapsulate a field in an attribute table or data source.
QString displayString(const QVariant &v) const
Formats string for display.
Container of fields for a vector layer.
bool append(const QgsField &field, Qgis::FieldOrigin origin=Qgis::FieldOrigin::Provider, int originIndex=-1)
Appends a field.
double seconds() const
Returns the interval duration in seconds.
Contains information about the context in which a processing algorithm is executed.
Custom exception class for processing related exceptions.
Base class for providing feedback from a processing algorithm.
A numeric output for processing algorithms.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A vector layer or feature source field parameter for processing algorithms.
A generic file based destination parameter, for specifying the destination path for a file (non-map l...
Calculator for summary statistics for a list of doubles.
void addVariant(const QVariant &value)
Adds a single value to the statistics calculation.
double firstQuartile() const
Returns the first quartile of the values.
double sum() const
Returns calculated sum of values.
double mean() const
Returns calculated mean of values.
double majority() const
Returns majority of values.
int countMissing() const
Returns the number of missing (null) values.
double interQuartileRange() const
Returns the inter quartile range of the values.
double median() const
Returns calculated median of values.
double minority() const
Returns minority of values.
double min() const
Returns calculated minimum from values.
double stDev() const
Returns population standard deviation.
double thirdQuartile() const
Returns the third quartile of the values.
int count() const
Returns calculated count of values.
double range() const
Returns calculated range (difference between maximum and minimum values).
double max() const
Returns calculated maximum from values.
void finalize()
Must be called after adding all values with addValues() and before retrieving any calculated statisti...
int variety() const
Returns variety of values.
Calculator for summary statistics and aggregates for a list of strings.
QString max() const
Returns the maximum (non-null) string value.
QString min() const
Returns the minimum (non-null) string value.
int countMissing() const
Returns the number of missing (null) string values.
int count() const
Returns the calculated count of values.
int countDistinct() const
Returns the number of distinct string values.
void finalize()
Must be called after adding all strings with addString() and before retrieving any calculated string ...
void addValue(const QVariant &value)
Adds a single variant to the statistics calculation.
int minLength() const
Returns the minimum length of strings.
int maxLength() const
Returns the maximum length of strings.
QString majority() const
Returns the most common string.
QString minority() const
Returns the least common string.
double meanLength() const
Returns the mean length of strings.