QGIS API Documentation 4.1.0-Master (ca2ac17535b)
Loading...
Searching...
No Matches
qgsalgorithmdbscanclustering.cpp
Go to the documentation of this file.
1/***************************************************************************
2 qgsalgorithmdbscanclustering.cpp
3 ---------------------
4 begin : July 2018
5 copyright : (C) 2018 by Nyall Dawson
6 email : nyall dot dawson at gmail dot com
7 ***************************************************************************/
8
9/***************************************************************************
10 * *
11 * This program is free software; you can redistribute it and/or modify *
12 * it under the terms of the GNU General Public License as published by *
13 * the Free Software Foundation; either version 2 of the License, or *
14 * (at your option) any later version. *
15 * *
16 ***************************************************************************/
17
19
20#include <unordered_set>
21
23
24#include <QString>
25
26using namespace Qt::StringLiterals;
27
29
30QString QgsDbscanClusteringAlgorithm::name() const
31{
32 return u"dbscanclustering"_s;
33}
34
35QString QgsDbscanClusteringAlgorithm::displayName() const
36{
37 return QObject::tr( "DBSCAN clustering" );
38}
39
40QString QgsDbscanClusteringAlgorithm::shortDescription() const
41{
42 return QObject::tr( "Clusters point features using a density based scan algorithm." );
43}
44
45QStringList QgsDbscanClusteringAlgorithm::tags() const
46{
47 return QObject::tr( "clustering,clusters,density,based,points,distance" ).split( ',' );
48}
49
50QString QgsDbscanClusteringAlgorithm::group() const
51{
52 return QObject::tr( "Vector analysis" );
53}
54
55QString QgsDbscanClusteringAlgorithm::groupId() const
56{
57 return u"vectoranalysis"_s;
58}
59
60void QgsDbscanClusteringAlgorithm::initAlgorithm( const QVariantMap & )
61{
62 addParameter( new QgsProcessingParameterFeatureSource( u"INPUT"_s, QObject::tr( "Input layer" ), QList<int>() << static_cast<int>( Qgis::ProcessingSourceType::VectorPoint ) ) );
63 addParameter( new QgsProcessingParameterNumber( u"MIN_SIZE"_s, QObject::tr( "Minimum cluster size" ), Qgis::ProcessingNumberParameterType::Integer, 5, false, 1 ) );
64 addParameter( new QgsProcessingParameterDistance( u"EPS"_s, QObject::tr( "Maximum distance between clustered points" ), 1, u"INPUT"_s, false, 0 ) );
65
66 auto dbscanStarParam = std::make_unique<QgsProcessingParameterBoolean>( u"DBSCAN*"_s, QObject::tr( "Treat border points as noise (DBSCAN*)" ), false );
67 dbscanStarParam->setFlags( dbscanStarParam->flags() | Qgis::ProcessingParameterFlag::Advanced );
68 addParameter( dbscanStarParam.release() );
69
70 auto fieldNameParam = std::make_unique<QgsProcessingParameterString>( u"FIELD_NAME"_s, QObject::tr( "Cluster field name" ), u"CLUSTER_ID"_s );
71 fieldNameParam->setFlags( fieldNameParam->flags() | Qgis::ProcessingParameterFlag::Advanced );
72 addParameter( fieldNameParam.release() );
73 auto sizeFieldNameParam = std::make_unique<QgsProcessingParameterString>( u"SIZE_FIELD_NAME"_s, QObject::tr( "Cluster size field name" ), u"CLUSTER_SIZE"_s );
74 sizeFieldNameParam->setFlags( sizeFieldNameParam->flags() | Qgis::ProcessingParameterFlag::Advanced );
75 addParameter( sizeFieldNameParam.release() );
76
77 addParameter( new QgsProcessingParameterFeatureSink( u"OUTPUT"_s, QObject::tr( "Clusters" ), Qgis::ProcessingSourceType::VectorPoint ) );
78
79 addOutput( new QgsProcessingOutputNumber( u"NUM_CLUSTERS"_s, QObject::tr( "Number of clusters" ) ) );
80}
81
82QString QgsDbscanClusteringAlgorithm::shortHelpString() const
83{
84 return QObject::tr(
85 "This algorithm clusters point features based on a 2D implementation of Density-based spatial clustering of applications with noise (DBSCAN) algorithm.\n\n"
86 "The algorithm requires two parameters, a minimum cluster size (“minPts”), and the maximum distance allowed between clustered points (“eps”)."
87 );
88}
89
90QgsDbscanClusteringAlgorithm *QgsDbscanClusteringAlgorithm::createInstance() const
91{
92 return new QgsDbscanClusteringAlgorithm();
93}
94
95struct KDBushDataEqualById
96{
97 bool operator()( const QgsSpatialIndexKDBushData &a, const QgsSpatialIndexKDBushData &b ) const { return a.id == b.id; }
98};
99
100struct KDBushDataHashById
101{
102 std::size_t operator()( const QgsSpatialIndexKDBushData &a ) const { return std::hash<QgsFeatureId> {}( a.id ); }
103};
104
105QVariantMap QgsDbscanClusteringAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
106{
107 std::unique_ptr<QgsProcessingFeatureSource> source( parameterAsSource( parameters, u"INPUT"_s, context ) );
108 if ( !source )
109 throw QgsProcessingException( invalidSourceError( parameters, u"INPUT"_s ) );
110
111 const std::size_t minSize = static_cast<std::size_t>( parameterAsInt( parameters, u"MIN_SIZE"_s, context ) );
112 const double eps1 = parameterAsDouble( parameters, u"EPS"_s, context );
113 const double eps2 = parameterAsDouble( parameters, u"EPS2"_s, context );
114 const bool borderPointsAreNoise = parameterAsBoolean( parameters, u"DBSCAN*"_s, context );
115
116 QgsFields outputFields = source->fields();
117 QgsFields newFields;
118 const QString clusterFieldName = parameterAsString( parameters, u"FIELD_NAME"_s, context );
119 newFields.append( QgsField( clusterFieldName, QMetaType::Type::Int ) );
120 const QString clusterSizeFieldName = parameterAsString( parameters, u"SIZE_FIELD_NAME"_s, context );
121 newFields.append( QgsField( clusterSizeFieldName, QMetaType::Type::Int ) );
122 outputFields = QgsProcessingUtils::combineFields( outputFields, newFields );
123
124 QString dest;
125 std::unique_ptr<QgsFeatureSink> sink( parameterAsSink( parameters, u"OUTPUT"_s, context, dest, outputFields, source->wkbType(), source->sourceCrs() ) );
126 if ( !sink )
127 throw QgsProcessingException( invalidSinkError( parameters, u"OUTPUT"_s ) );
128
129 QgsFeatureRequest indexRequest;
130
131 std::unordered_map<QgsFeatureId, QDateTime> idToDateTime;
132 const QString dateTimeFieldName = parameterAsString( parameters, u"DATETIME_FIELD"_s, context );
133 int dateTimefieldIndex = -1;
134 if ( !dateTimeFieldName.isEmpty() )
135 {
136 dateTimefieldIndex = source->fields().lookupField( dateTimeFieldName );
137 if ( dateTimefieldIndex == -1 )
138 throw QgsProcessingException( QObject::tr( "Datetime field missing" ) );
139
140 indexRequest.setSubsetOfAttributes( QgsAttributeList() << dateTimefieldIndex );
141 }
142 else
143 {
144 indexRequest.setNoAttributes();
145 }
146
147 // build spatial index, also collecting feature datetimes if required
148 feedback->pushInfo( QObject::tr( "Building spatial index" ) );
149 QgsFeatureIterator indexIterator = source->getFeatures( indexRequest );
151 indexIterator,
152 [&idToDateTime, dateTimefieldIndex]( const QgsFeature &feature ) -> bool {
153 if ( dateTimefieldIndex >= 0 )
154 idToDateTime[feature.id()] = feature.attributes().at( dateTimefieldIndex ).toDateTime();
155 return true;
156 },
157 feedback
158 );
159
160 if ( feedback->isCanceled() )
161 return QVariantMap();
162
163 // stdbscan!
164 feedback->pushInfo( QObject::tr( "Analysing clusters" ) );
165 std::unordered_map<QgsFeatureId, int> idToCluster;
166 idToCluster.reserve( index.size() );
167 const long featureCount = source->featureCount();
168 QgsFeatureIterator features = source->getFeatures( QgsFeatureRequest().setNoAttributes() );
169 stdbscan( minSize, eps1, eps2, borderPointsAreNoise, featureCount, features, index, idToCluster, idToDateTime, feedback );
170
171 // cluster size
172 std::unordered_map<int, int> clusterSize;
173 std::for_each( idToCluster.begin(), idToCluster.end(), [&clusterSize]( std::pair<QgsFeatureId, int> idCluster ) { clusterSize[idCluster.second]++; } );
174
175 // write clusters
176 const double writeStep = featureCount > 0 ? 10.0 / featureCount : 1;
177 features = source->getFeatures();
178 int i = 0;
179 QgsFeature feat;
180 while ( features.nextFeature( feat ) )
181 {
182 i++;
183 if ( feedback->isCanceled() )
184 {
185 break;
186 }
187
188 feedback->setProgress( 90 + i * writeStep );
189 QgsAttributes attr = feat.attributes();
190 const auto cluster = idToCluster.find( feat.id() );
191 if ( cluster != idToCluster.end() )
192 {
193 attr << cluster->second << clusterSize[cluster->second];
194 }
195 else
196 {
197 attr << QVariant() << QVariant();
198 }
199 feat.setAttributes( attr );
200 if ( !sink->addFeature( feat, QgsFeatureSink::FastInsert ) )
201 throw QgsProcessingException( writeFeatureError( sink.get(), parameters, u"OUTPUT"_s ) );
202 else
203 feedback->featureAddedToSink( u"OUTPUT"_s );
204 }
205
206 sink->finalize();
207 feedback->featureSinkFinalized( u"OUTPUT"_s );
208
209 QVariantMap outputs;
210 outputs.insert( u"OUTPUT"_s, dest );
211 outputs.insert( u"NUM_CLUSTERS"_s, static_cast<unsigned int>( clusterSize.size() ) );
212 return outputs;
213}
214
215void QgsDbscanClusteringAlgorithm::stdbscan(
216 const std::size_t minSize,
217 const double eps1,
218 const double eps2,
219 const bool borderPointsAreNoise,
220 const long featureCount,
221 QgsFeatureIterator features,
223 std::unordered_map<QgsFeatureId, int> &idToCluster,
224 std::unordered_map<QgsFeatureId, QDateTime> &idToDateTime,
225 QgsProcessingFeedback *feedback
226)
227{
228 const double step = featureCount > 0 ? 90.0 / featureCount : 1;
229
230 std::unordered_set<QgsFeatureId> visited;
231 visited.reserve( index.size() );
232
233 QgsFeature feat;
234 int i = 0;
235 int clusterCount = 0;
236
237 while ( features.nextFeature( feat ) )
238 {
239 if ( feedback->isCanceled() )
240 {
241 break;
242 }
243
244 if ( !feat.hasGeometry() )
245 {
246 feedback->setProgress( ++i * step );
247 continue;
248 }
249
250 if ( visited.find( feat.id() ) != visited.end() )
251 {
252 // already visited!
253 continue;
254 }
255
256 QgsPointXY point;
259 else
260 {
261 // not a point geometry
262 feedback->reportError( QObject::tr( "Feature %1 is a %2 feature, not a point." ).arg( feat.id() ).arg( QgsWkbTypes::displayString( feat.geometry().wkbType() ) ) );
263 feedback->setProgress( ++i * step );
264 continue;
265 }
266
267 if ( !idToDateTime.empty() && !idToDateTime[feat.id()].isValid() )
268 {
269 // missing datetime value
270 feedback->reportError( QObject::tr( "Feature %1 is missing a valid datetime value." ).arg( feat.id() ).arg( QgsWkbTypes::displayString( feat.geometry().wkbType() ) ) );
271 feedback->setProgress( ++i * step );
272 continue;
273 }
274
275 std::unordered_set<QgsSpatialIndexKDBushData, KDBushDataHashById, KDBushDataEqualById> within;
276
277 if ( minSize > 1 )
278 {
279 index.within( point, eps1, [&within, pointId = feat.id(), &idToDateTime, &eps2]( const QgsSpatialIndexKDBushData &data ) {
280 if ( idToDateTime.empty() || ( idToDateTime[data.id].isValid() && std::abs( idToDateTime[pointId].msecsTo( idToDateTime[data.id] ) ) <= eps2 ) )
281 within.insert( data );
282 } );
283 if ( within.size() < minSize )
284 continue;
285
286 visited.insert( feat.id() );
287 }
288 else
289 {
290 // optimised case for minSize == 1, we can skip the initial check
291 within.insert( QgsSpatialIndexKDBushData( feat.id(), point.x(), point.y() ) );
292 }
293
294 // start new cluster
295 clusterCount++;
296 idToCluster[feat.id()] = clusterCount;
297 feedback->setProgress( ++i * step );
298
299 while ( !within.empty() )
300 {
301 if ( feedback->isCanceled() )
302 {
303 break;
304 }
305
306 const QgsSpatialIndexKDBushData j = *within.begin();
307 within.erase( within.begin() );
308
309 if ( visited.find( j.id ) != visited.end() )
310 {
311 // already visited!
312 continue;
313 }
314
315 visited.insert( j.id );
316 feedback->setProgress( ++i * step );
317
318 // check from this point
319 const QgsPointXY point2 = j.point();
320
321 std::unordered_set<QgsSpatialIndexKDBushData, KDBushDataHashById, KDBushDataEqualById> within2;
322 index.within( point2, eps1, [&within2, point2Id = j.id, &idToDateTime, &eps2]( const QgsSpatialIndexKDBushData &data ) {
323 if ( idToDateTime.empty() || ( idToDateTime[data.id].isValid() && std::abs( idToDateTime[point2Id].msecsTo( idToDateTime[data.id] ) ) <= eps2 ) )
324 within2.insert( data );
325 } );
326
327 if ( within2.size() >= minSize )
328 {
329 // expand neighbourhood
330 std::copy_if( within2.begin(), within2.end(), std::inserter( within, within.end() ), [&visited]( const QgsSpatialIndexKDBushData &needle ) {
331 return visited.find( needle.id ) == visited.end();
332 } );
333 }
334 if ( !borderPointsAreNoise || within2.size() >= minSize )
335 {
336 idToCluster[j.id] = clusterCount;
337 }
338 }
339 }
340}
341
@ VectorPoint
Vector point layers.
Definition qgis.h:3715
@ Point
Point.
Definition qgis.h:296
@ Advanced
Parameter is an advanced parameter which should be hidden from users by default.
Definition qgis.h:3947
A vector of attributes.
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
Fetch next feature and stores in f, returns true on success.
Wraps a request for features to a vector layer (or directly its vector data provider).
QgsFeatureRequest & setSubsetOfAttributes(const QgsAttributeList &attrs)
Set a subset of attributes that will be fetched.
QgsFeatureRequest & setNoAttributes()
Set that no attributes will be fetched.
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
Definition qgsfeature.h:60
QgsAttributes attributes
Definition qgsfeature.h:64
QgsFeatureId id
Definition qgsfeature.h:63
void setAttributes(const QgsAttributes &attrs)
Sets the feature's attributes.
QgsGeometry geometry
Definition qgsfeature.h:66
bool hasGeometry() const
Returns true if the feature has an associated geometry.
bool isCanceled() const
Tells whether the operation has been canceled already.
Definition qgsfeedback.h:56
void setProgress(double progress)
Sets the current progress for the feedback object.
Definition qgsfeedback.h:65
Encapsulate a field in an attribute table or data source.
Definition qgsfield.h:56
Container of fields for a vector layer.
Definition qgsfields.h:46
bool append(const QgsField &field, Qgis::FieldOrigin origin=Qgis::FieldOrigin::Provider, int originIndex=-1)
Appends a field.
Definition qgsfields.cpp:75
const QgsAbstractGeometry * constGet() const
Returns a non-modifiable (const) reference to the underlying abstract geometry primitive.
Qgis::WkbType wkbType() const
Returns type of the geometry as a WKB type (point / linestring / polygon etc.).
Represents a 2D point.
Definition qgspointxy.h:62
double y
Definition qgspointxy.h:66
double x
Definition qgspointxy.h:65
Contains information about the context in which a processing algorithm is executed.
Custom exception class for processing related exceptions.
Base class for providing feedback from a processing algorithm.
void featureAddedToSink(const QString &output)
Reports that a feature was added to the the sink associated with the specified algorithm output.
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
void featureSinkFinalized(const QString &output)
Reports that a feature sink has been finalized.
virtual void reportError(const QString &error, bool fatalError=false)
Reports that the algorithm encountered an error while executing.
A numeric output for processing algorithms.
A double numeric parameter for distance values.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A numeric parameter for processing algorithms.
static QgsFields combineFields(const QgsFields &fieldsA, const QgsFields &fieldsB, const QString &fieldsBPrefix=QString())
Combines two field lists, avoiding duplicate field names (in a case-insensitive manner).
A container for data stored inside a QgsSpatialIndexKDBush index.
QgsPointXY point() const
Returns the indexed point.
A very fast static spatial index for 2D points based on a flat KD-tree.
qgssize size() const
Returns the size of the index, i.e.
QList< QgsSpatialIndexKDBushData > within(const QgsPointXY &point, double radius) const
Returns the list of features which are within the given search radius of point.
static Q_INVOKABLE QString displayString(Qgis::WkbType type)
Returns a non-translated display string type for a WKB type, e.g., the geometry name used in WKT geom...
static Qgis::WkbType flatType(Qgis::WkbType type)
Returns the flat type for a WKB type.
T qgsgeometry_cast(QgsAbstractGeometry *geom)
QList< int > QgsAttributeList
Definition qgsfield.h:30