QGIS API Documentation 3.41.0-Master (3440c17df1d)
Loading...
Searching...
No Matches
qgsalgorithmdbscanclustering.cpp
Go to the documentation of this file.
1/***************************************************************************
2 qgsalgorithmdbscanclustering.cpp
3 ---------------------
4 begin : July 2018
5 copyright : (C) 2018 by Nyall Dawson
6 email : nyall dot dawson at gmail dot com
7 ***************************************************************************/
8
9/***************************************************************************
10 * *
11 * This program is free software; you can redistribute it and/or modify *
12 * it under the terms of the GNU General Public License as published by *
13 * the Free Software Foundation; either version 2 of the License, or *
14 * (at your option) any later version. *
15 * *
16 ***************************************************************************/
17
20#include <unordered_set>
21
23
24QString QgsDbscanClusteringAlgorithm::name() const
25{
26 return QStringLiteral( "dbscanclustering" );
27}
28
29QString QgsDbscanClusteringAlgorithm::displayName() const
30{
31 return QObject::tr( "DBSCAN clustering" );
32}
33
34QString QgsDbscanClusteringAlgorithm::shortDescription() const
35{
36 return QObject::tr( "Clusters point features using a density based scan algorithm." );
37}
38
39QStringList QgsDbscanClusteringAlgorithm::tags() const
40{
41 return QObject::tr( "clustering,clusters,density,based,points,distance" ).split( ',' );
42}
43
44QString QgsDbscanClusteringAlgorithm::group() const
45{
46 return QObject::tr( "Vector analysis" );
47}
48
49QString QgsDbscanClusteringAlgorithm::groupId() const
50{
51 return QStringLiteral( "vectoranalysis" );
52}
53
54void QgsDbscanClusteringAlgorithm::initAlgorithm( const QVariantMap & )
55{
56 addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ),
57 QObject::tr( "Input layer" ), QList< int >() << static_cast< int >( Qgis::ProcessingSourceType::VectorPoint ) ) );
58 addParameter( new QgsProcessingParameterNumber( QStringLiteral( "MIN_SIZE" ), QObject::tr( "Minimum cluster size" ),
60 addParameter( new QgsProcessingParameterDistance( QStringLiteral( "EPS" ),
61 QObject::tr( "Maximum distance between clustered points" ), 1, QStringLiteral( "INPUT" ), false, 0 ) );
62
63 auto dbscanStarParam = std::make_unique<QgsProcessingParameterBoolean>( QStringLiteral( "DBSCAN*" ),
64 QObject::tr( "Treat border points as noise (DBSCAN*)" ), false, true );
65 dbscanStarParam->setFlags( dbscanStarParam->flags() | Qgis::ProcessingParameterFlag::Advanced );
66 addParameter( dbscanStarParam.release() );
67
68 auto fieldNameParam = std::make_unique<QgsProcessingParameterString>( QStringLiteral( "FIELD_NAME" ),
69 QObject::tr( "Cluster field name" ), QStringLiteral( "CLUSTER_ID" ) );
70 fieldNameParam->setFlags( fieldNameParam->flags() | Qgis::ProcessingParameterFlag::Advanced );
71 addParameter( fieldNameParam.release() );
72 auto sizeFieldNameParam = std::make_unique<QgsProcessingParameterString>( QStringLiteral( "SIZE_FIELD_NAME" ),
73 QObject::tr( "Cluster size field name" ), QStringLiteral( "CLUSTER_SIZE" ) );
74 sizeFieldNameParam->setFlags( sizeFieldNameParam->flags() | Qgis::ProcessingParameterFlag::Advanced );
75 addParameter( sizeFieldNameParam.release() );
76
77 addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Clusters" ), Qgis::ProcessingSourceType::VectorPoint ) );
78
79 addOutput( new QgsProcessingOutputNumber( QStringLiteral( "NUM_CLUSTERS" ), QObject::tr( "Number of clusters" ) ) );
80}
81
82QString QgsDbscanClusteringAlgorithm::shortHelpString() const
83{
84 return QObject::tr( "Clusters point features based on a 2D implementation of Density-based spatial clustering of applications with noise (DBSCAN) algorithm.\n\n"
85 "The algorithm requires two parameters, a minimum cluster size (“minPts”), and the maximum distance allowed between clustered points (“eps”)." );
86}
87
88QgsDbscanClusteringAlgorithm *QgsDbscanClusteringAlgorithm::createInstance() const
89{
90 return new QgsDbscanClusteringAlgorithm();
91}
92
93struct KDBushDataEqualById
94{
95 bool operator()( const QgsSpatialIndexKDBushData &a, const QgsSpatialIndexKDBushData &b ) const
96 {
97 return a.id == b.id;
98 }
99};
100
101struct KDBushDataHashById
102{
103 std::size_t operator()( const QgsSpatialIndexKDBushData &a ) const
104 {
105 return std::hash< QgsFeatureId > {}( a.id );
106 }
107};
108
109QVariantMap QgsDbscanClusteringAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
110{
111 std::unique_ptr< QgsProcessingFeatureSource > source( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) );
112 if ( !source )
113 throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );
114
115 const std::size_t minSize = static_cast< std::size_t>( parameterAsInt( parameters, QStringLiteral( "MIN_SIZE" ), context ) );
116 const double eps1 = parameterAsDouble( parameters, QStringLiteral( "EPS" ), context );
117 const double eps2 = parameterAsDouble( parameters, QStringLiteral( "EPS2" ), context );
118 const bool borderPointsAreNoise = parameterAsBoolean( parameters, QStringLiteral( "DBSCAN*" ), context );
119
120 QgsFields outputFields = source->fields();
121 QgsFields newFields;
122 const QString clusterFieldName = parameterAsString( parameters, QStringLiteral( "FIELD_NAME" ), context );
123 newFields.append( QgsField( clusterFieldName, QMetaType::Type::Int ) );
124 const QString clusterSizeFieldName = parameterAsString( parameters, QStringLiteral( "SIZE_FIELD_NAME" ), context );
125 newFields.append( QgsField( clusterSizeFieldName, QMetaType::Type::Int ) );
126 outputFields = QgsProcessingUtils::combineFields( outputFields, newFields );
127
128 QString dest;
129 std::unique_ptr< QgsFeatureSink > sink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, dest, outputFields, source->wkbType(), source->sourceCrs() ) );
130 if ( !sink )
131 throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) );
132
133 QgsFeatureRequest indexRequest;
134
135 std::unordered_map< QgsFeatureId, QDateTime> idToDateTime;
136 const QString dateTimeFieldName = parameterAsString( parameters, QStringLiteral( "DATETIME_FIELD" ), context );
137 int dateTimefieldIndex = -1;
138 if ( !dateTimeFieldName.isEmpty() )
139 {
140 dateTimefieldIndex = source->fields().lookupField( dateTimeFieldName );
141 if ( dateTimefieldIndex == -1 )
142 throw QgsProcessingException( QObject::tr( "Datetime field missing" ) );
143
144 indexRequest.setSubsetOfAttributes( QgsAttributeList() << dateTimefieldIndex );
145 }
146 else
147 {
148 indexRequest.setNoAttributes();
149 }
150
151 // build spatial index, also collecting feature datetimes if required
152 feedback->pushInfo( QObject::tr( "Building spatial index" ) );
153 QgsFeatureIterator indexIterator = source->getFeatures( indexRequest );
154 QgsSpatialIndexKDBush index( indexIterator, [&idToDateTime, dateTimefieldIndex]( const QgsFeature & feature )->bool
155 {
156 if ( dateTimefieldIndex >= 0 )
157 idToDateTime[ feature.id() ] = feature.attributes().at( dateTimefieldIndex ).toDateTime();
158 return true;
159 }, feedback );
160
161 if ( feedback->isCanceled() )
162 return QVariantMap();
163
164 // stdbscan!
165 feedback->pushInfo( QObject::tr( "Analysing clusters" ) );
166 std::unordered_map< QgsFeatureId, int> idToCluster;
167 idToCluster.reserve( index.size() );
168 const long featureCount = source->featureCount();
169 QgsFeatureIterator features = source->getFeatures( QgsFeatureRequest().setNoAttributes() );
170 stdbscan( minSize, eps1, eps2, borderPointsAreNoise, featureCount, features, index, idToCluster, idToDateTime, feedback );
171
172 // cluster size
173 std::unordered_map< int, int> clusterSize;
174 std::for_each( idToCluster.begin(), idToCluster.end(), [ &clusterSize ]( std::pair< QgsFeatureId, int > idCluster ) { clusterSize[ idCluster.second ]++; } );
175
176 // write clusters
177 const double writeStep = featureCount > 0 ? 10.0 / featureCount : 1;
178 features = source->getFeatures();
179 int i = 0;
180 QgsFeature feat;
181 while ( features.nextFeature( feat ) )
182 {
183 i++;
184 if ( feedback->isCanceled() )
185 {
186 break;
187 }
188
189 feedback->setProgress( 90 + i * writeStep );
190 QgsAttributes attr = feat.attributes();
191 const auto cluster = idToCluster.find( feat.id() );
192 if ( cluster != idToCluster.end() )
193 {
194 attr << cluster->second << clusterSize[ cluster->second ];
195 }
196 else
197 {
198 attr << QVariant() << QVariant();
199 }
200 feat.setAttributes( attr );
201 if ( !sink->addFeature( feat, QgsFeatureSink::FastInsert ) )
202 throw QgsProcessingException( writeFeatureError( sink.get(), parameters, QStringLiteral( "OUTPUT" ) ) );
203 }
204
205 sink->finalize();
206
207 QVariantMap outputs;
208 outputs.insert( QStringLiteral( "OUTPUT" ), dest );
209 outputs.insert( QStringLiteral( "NUM_CLUSTERS" ), static_cast< unsigned int >( clusterSize.size() ) );
210 return outputs;
211}
212
213void QgsDbscanClusteringAlgorithm::stdbscan( const std::size_t minSize,
214 const double eps1,
215 const double eps2,
216 const bool borderPointsAreNoise,
217 const long featureCount,
218 QgsFeatureIterator features,
220 std::unordered_map< QgsFeatureId, int> &idToCluster,
221 std::unordered_map< QgsFeatureId, QDateTime> &idToDateTime,
222 QgsProcessingFeedback *feedback )
223{
224 const double step = featureCount > 0 ? 90.0 / featureCount : 1;
225
226 std::unordered_set< QgsFeatureId > visited;
227 visited.reserve( index.size() );
228
229 QgsFeature feat;
230 int i = 0;
231 int clusterCount = 0;
232
233 while ( features.nextFeature( feat ) )
234 {
235 if ( feedback->isCanceled() )
236 {
237 break;
238 }
239
240 if ( !feat.hasGeometry() )
241 {
242 feedback->setProgress( ++i * step );
243 continue;
244 }
245
246 if ( visited.find( feat.id() ) != visited.end() )
247 {
248 // already visited!
249 continue;
250 }
251
252 QgsPointXY point;
254 point = QgsPointXY( *qgsgeometry_cast< const QgsPoint * >( feat.geometry().constGet() ) );
255 else
256 {
257 // not a point geometry
258 feedback->reportError( QObject::tr( "Feature %1 is a %2 feature, not a point." ).arg( feat.id() ).arg( QgsWkbTypes::displayString( feat.geometry().wkbType() ) ) );
259 feedback->setProgress( ++i * step );
260 continue;
261 }
262
263 if ( !idToDateTime.empty() && !idToDateTime[ feat.id() ].isValid() )
264 {
265 // missing datetime value
266 feedback->reportError( QObject::tr( "Feature %1 is missing a valid datetime value." ).arg( feat.id() ).arg( QgsWkbTypes::displayString( feat.geometry().wkbType() ) ) );
267 feedback->setProgress( ++i * step );
268 continue;
269 }
270
271 std::unordered_set< QgsSpatialIndexKDBushData, KDBushDataHashById, KDBushDataEqualById> within;
272
273 if ( minSize > 1 )
274 {
275 index.within( point, eps1, [&within, pointId = feat.id(), &idToDateTime, &eps2]( const QgsSpatialIndexKDBushData & data )
276 {
277 if ( idToDateTime.empty() || ( idToDateTime[ data.id ].isValid() && std::abs( idToDateTime[ pointId ].msecsTo( idToDateTime[ data.id ] ) ) <= eps2 ) )
278 within.insert( data );
279 } );
280 if ( within.size() < minSize )
281 continue;
282
283 visited.insert( feat.id() );
284 }
285 else
286 {
287 // optimised case for minSize == 1, we can skip the initial check
288 within.insert( QgsSpatialIndexKDBushData( feat.id(), point.x(), point.y() ) );
289 }
290
291 // start new cluster
292 clusterCount++;
293 idToCluster[ feat.id() ] = clusterCount;
294 feedback->setProgress( ++i * step );
295
296 while ( !within.empty() )
297 {
298 if ( feedback->isCanceled() )
299 {
300 break;
301 }
302
303 const QgsSpatialIndexKDBushData j = *within.begin();
304 within.erase( within.begin() );
305
306 if ( visited.find( j.id ) != visited.end() )
307 {
308 // already visited!
309 continue;
310 }
311
312 visited.insert( j.id );
313 feedback->setProgress( ++i * step );
314
315 // check from this point
316 const QgsPointXY point2 = j.point();
317
318 std::unordered_set< QgsSpatialIndexKDBushData, KDBushDataHashById, KDBushDataEqualById > within2;
319 index.within( point2, eps1, [&within2, point2Id = j.id, &idToDateTime, &eps2]( const QgsSpatialIndexKDBushData & data )
320 {
321 if ( idToDateTime.empty() || ( idToDateTime[ data.id ].isValid() && std::abs( idToDateTime[ point2Id ].msecsTo( idToDateTime[ data.id ] ) ) <= eps2 ) )
322 within2.insert( data );
323 } );
324
325 if ( within2.size() >= minSize )
326 {
327 // expand neighbourhood
328 std::copy_if( within2.begin(),
329 within2.end(),
330 std::inserter( within, within.end() ),
331 [&visited]( const QgsSpatialIndexKDBushData & needle )
332 {
333 return visited.find( needle.id ) == visited.end();
334 } );
335 }
336 if ( !borderPointsAreNoise || within2.size() >= minSize )
337 {
338 idToCluster[ j.id ] = clusterCount;
339 }
340 }
341 }
342}
343
345
346
@ VectorPoint
Vector point layers.
@ Advanced
Parameter is an advanced parameter which should be hidden from users by default.
A vector of attributes.
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
Fetch next feature and stores in f, returns true on success.
This class wraps a request for features to a vector layer (or directly its vector data provider).
QgsFeatureRequest & setSubsetOfAttributes(const QgsAttributeList &attrs)
Set a subset of attributes that will be fetched.
QgsFeatureRequest & setNoAttributes()
Set that no attributes will be fetched.
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
Definition qgsfeature.h:58
QgsAttributes attributes
Definition qgsfeature.h:67
QgsFeatureId id
Definition qgsfeature.h:66
void setAttributes(const QgsAttributes &attrs)
Sets the feature's attributes.
QgsGeometry geometry
Definition qgsfeature.h:69
bool hasGeometry() const
Returns true if the feature has an associated geometry.
bool isCanceled() const
Tells whether the operation has been canceled already.
Definition qgsfeedback.h:53
void setProgress(double progress)
Sets the current progress for the feedback object.
Definition qgsfeedback.h:61
Encapsulate a field in an attribute table or data source.
Definition qgsfield.h:53
Container of fields for a vector layer.
Definition qgsfields.h:46
bool append(const QgsField &field, Qgis::FieldOrigin origin=Qgis::FieldOrigin::Provider, int originIndex=-1)
Appends a field.
Definition qgsfields.cpp:70
const QgsAbstractGeometry * constGet() const
Returns a non-modifiable (const) reference to the underlying abstract geometry primitive.
Qgis::WkbType wkbType() const
Returns type of the geometry as a WKB type (point / linestring / polygon etc.)
A class to represent a 2D point.
Definition qgspointxy.h:60
double y
Definition qgspointxy.h:64
double x
Definition qgspointxy.h:63
Contains information about the context in which a processing algorithm is executed.
Custom exception class for processing related exceptions.
Base class for providing feedback from a processing algorithm.
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
virtual void reportError(const QString &error, bool fatalError=false)
Reports that the algorithm encountered an error while executing.
A numeric output for processing algorithms.
A double numeric parameter for distance values.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A numeric parameter for processing algorithms.
static QgsFields combineFields(const QgsFields &fieldsA, const QgsFields &fieldsB, const QString &fieldsBPrefix=QString())
Combines two field lists, avoiding duplicate field names (in a case-insensitive manner).
A container for data stored inside a QgsSpatialIndexKDBush index.
QgsPointXY point() const
Returns the indexed point.
A very fast static spatial index for 2D points based on a flat KD-tree.
qgssize size() const
Returns the size of the index, i.e.
QList< QgsSpatialIndexKDBushData > within(const QgsPointXY &point, double radius) const
Returns the list of features which are within the given search radius of point.
static QString displayString(Qgis::WkbType type)
Returns a non-translated display string type for a WKB type, e.g., the geometry name used in WKT geom...
static Qgis::WkbType flatType(Qgis::WkbType type)
Returns the flat type for a WKB type.
QList< int > QgsAttributeList
Definition qgsfield.h:27