QGIS API Documentation  3.16.0-Hannover (43b64b13f3)
qgsalgorithmdbscanclustering.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  qgsalgorithmdbscanclustering.cpp
3  ---------------------
4  begin : July 2018
5  copyright : (C) 2018 by Nyall Dawson
6  email : nyall dot dawson at gmail dot com
7  ***************************************************************************/
8 
9 /***************************************************************************
10  * *
11  * This program is free software; you can redistribute it and/or modify *
12  * it under the terms of the GNU General Public License as published by *
13  * the Free Software Foundation; either version 2 of the License, or *
14  * (at your option) any later version. *
15  * *
16  ***************************************************************************/
17 
19 #include "qgsspatialindexkdbush.h"
20 #include <unordered_set>
21 
23 
24 QString QgsDbscanClusteringAlgorithm::name() const
25 {
26  return QStringLiteral( "dbscanclustering" );
27 }
28 
29 QString QgsDbscanClusteringAlgorithm::displayName() const
30 {
31  return QObject::tr( "DBSCAN clustering" );
32 }
33 
34 QString QgsDbscanClusteringAlgorithm::shortDescription() const
35 {
36  return QObject::tr( "Clusters point features using a density based scan algorithm." );
37 }
38 
39 QStringList QgsDbscanClusteringAlgorithm::tags() const
40 {
41  return QObject::tr( "clustering,clusters,density,based,points" ).split( ',' );
42 }
43 
44 QString QgsDbscanClusteringAlgorithm::group() const
45 {
46  return QObject::tr( "Vector analysis" );
47 }
48 
49 QString QgsDbscanClusteringAlgorithm::groupId() const
50 {
51  return QStringLiteral( "vectoranalysis" );
52 }
53 
54 void QgsDbscanClusteringAlgorithm::initAlgorithm( const QVariantMap & )
55 {
56  addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ),
57  QObject::tr( "Input layer" ), QList< int >() << QgsProcessing::TypeVectorPoint ) );
58  addParameter( new QgsProcessingParameterNumber( QStringLiteral( "MIN_SIZE" ), QObject::tr( "Minimum cluster size" ),
60  addParameter( new QgsProcessingParameterDistance( QStringLiteral( "EPS" ),
61  QObject::tr( "Maximum distance between clustered points" ), 1, QStringLiteral( "INPUT" ), false, 0 ) );
62 
63  auto dbscanStarParam = qgis::make_unique<QgsProcessingParameterBoolean>( QStringLiteral( "DBSCAN*" ),
64  QObject::tr( "Treat border points as noise (DBSCAN*)" ), false, true );
65  dbscanStarParam->setFlags( dbscanStarParam->flags() | QgsProcessingParameterDefinition::FlagAdvanced );
66  addParameter( dbscanStarParam.release() );
67 
68  auto fieldNameParam = qgis::make_unique<QgsProcessingParameterString>( QStringLiteral( "FIELD_NAME" ),
69  QObject::tr( "Cluster field name" ), QStringLiteral( "CLUSTER_ID" ) );
70  fieldNameParam->setFlags( fieldNameParam->flags() | QgsProcessingParameterDefinition::FlagAdvanced );
71  addParameter( fieldNameParam.release() );
72  auto sizeFieldNameParam = qgis::make_unique<QgsProcessingParameterString>( QStringLiteral( "SIZE_FIELD_NAME" ),
73  QObject::tr( "Cluster size field name" ), QStringLiteral( "CLUSTER_SIZE" ) );
74  sizeFieldNameParam->setFlags( sizeFieldNameParam->flags() | QgsProcessingParameterDefinition::FlagAdvanced );
75  addParameter( sizeFieldNameParam.release() );
76 
77  addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Clusters" ), QgsProcessing::TypeVectorPoint ) );
78 
79  addOutput( new QgsProcessingOutputNumber( QStringLiteral( "NUM_CLUSTERS" ), QObject::tr( "Number of clusters" ) ) );
80 }
81 
82 QString QgsDbscanClusteringAlgorithm::shortHelpString() const
83 {
84  return QObject::tr( "Clusters point features based on a 2D implementation of Density-based spatial clustering of applications with noise (DBSCAN) algorithm.\n\n"
85  "The algorithm requires two parameters, a minimum cluster size (“minPts”), and the maximum distance allowed between clustered points (“eps”)." );
86 }
87 
88 QgsDbscanClusteringAlgorithm *QgsDbscanClusteringAlgorithm::createInstance() const
89 {
90  return new QgsDbscanClusteringAlgorithm();
91 }
92 
93 struct KDBushDataEqualById
94 {
95  bool operator()( const QgsSpatialIndexKDBushData &a, const QgsSpatialIndexKDBushData &b ) const
96  {
97  return a.id == b.id;
98  }
99 };
100 
101 struct KDBushDataHashById
102 {
103  std::size_t operator()( const QgsSpatialIndexKDBushData &a ) const
104  {
105  return std::hash< QgsFeatureId > {}( a.id );
106  }
107 };
108 
109 QVariantMap QgsDbscanClusteringAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
110 {
111  std::unique_ptr< QgsProcessingFeatureSource > source( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) );
112  if ( !source )
113  throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );
114 
115  const std::size_t minSize = static_cast< std::size_t>( parameterAsInt( parameters, QStringLiteral( "MIN_SIZE" ), context ) );
116  const double eps = parameterAsDouble( parameters, QStringLiteral( "EPS" ), context );
117  const bool borderPointsAreNoise = parameterAsBoolean( parameters, QStringLiteral( "DBSCAN*" ), context );
118 
119  QgsFields outputFields = source->fields();
120  QgsFields newFields;
121  const QString clusterFieldName = parameterAsString( parameters, QStringLiteral( "FIELD_NAME" ), context );
122  newFields.append( QgsField( clusterFieldName, QVariant::Int ) );
123  const QString clusterSizeFieldName = parameterAsString( parameters, QStringLiteral( "SIZE_FIELD_NAME" ), context );
124  newFields.append( QgsField( clusterSizeFieldName, QVariant::Int ) );
125  outputFields = QgsProcessingUtils::combineFields( outputFields, newFields );
126 
127  QString dest;
128  std::unique_ptr< QgsFeatureSink > sink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, dest, outputFields, source->wkbType(), source->sourceCrs() ) );
129  if ( !sink )
130  throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) );
131 
132  // build spatial index
133  feedback->pushInfo( QObject::tr( "Building spatial index" ) );
134  QgsSpatialIndexKDBush index( *source, feedback );
135  if ( feedback->isCanceled() )
136  return QVariantMap();
137 
138  // dbscan!
139  feedback->pushInfo( QObject::tr( "Analysing clusters" ) );
140  std::unordered_map< QgsFeatureId, int> idToCluster;
141  idToCluster.reserve( index.size() );
142  QgsFeatureIterator features = source->getFeatures( QgsFeatureRequest().setNoAttributes() );
143  const long featureCount = source->featureCount();
144  dbscan( minSize, eps, borderPointsAreNoise, featureCount, features, index, idToCluster, feedback );
145 
146  // cluster size
147  std::unordered_map< int, int> clusterSize;
148  std::for_each( idToCluster.begin(), idToCluster.end(), [ &clusterSize ]( std::pair< QgsFeatureId, int > idCluster ) { clusterSize[ idCluster.second ]++; } );
149 
150  // write clusters
151  const double writeStep = featureCount > 0 ? 10.0 / featureCount : 1;
152  features = source->getFeatures();
153  int i = 0;
154  QgsFeature feat;
155  while ( features.nextFeature( feat ) )
156  {
157  i++;
158  if ( feedback->isCanceled() )
159  {
160  break;
161  }
162 
163  feedback->setProgress( 90 + i * writeStep );
164  QgsAttributes attr = feat.attributes();
165  auto cluster = idToCluster.find( feat.id() );
166  if ( cluster != idToCluster.end() )
167  {
168  attr << cluster->second << clusterSize[ cluster->second ];
169  }
170  else
171  {
172  attr << QVariant() << QVariant();
173  }
174  feat.setAttributes( attr );
175  sink->addFeature( feat, QgsFeatureSink::FastInsert );
176  }
177 
178  QVariantMap outputs;
179  outputs.insert( QStringLiteral( "OUTPUT" ), dest );
180  outputs.insert( QStringLiteral( "NUM_CLUSTERS" ), static_cast< unsigned int >( clusterSize.size() ) );
181  return outputs;
182 }
183 
184 
185 void QgsDbscanClusteringAlgorithm::dbscan( const std::size_t minSize,
186  const double eps,
187  const bool borderPointsAreNoise,
188  const long featureCount,
189  QgsFeatureIterator features,
190  QgsSpatialIndexKDBush &index,
191  std::unordered_map< QgsFeatureId, int> &idToCluster,
192  QgsProcessingFeedback *feedback )
193 {
194  const double step = featureCount > 0 ? 90.0 / featureCount : 1;
195 
196  std::unordered_set< QgsFeatureId > visited;
197  visited.reserve( index.size() );
198 
199  QgsFeature feat;
200  int i = 0;
201  int clusterCount = 0;
202 
203  while ( features.nextFeature( feat ) )
204  {
205  if ( feedback->isCanceled() )
206  {
207  break;
208  }
209 
210  if ( !feat.hasGeometry() )
211  {
212  feedback->setProgress( ++i * step );
213  continue;
214  }
215 
216  if ( visited.find( feat.id() ) != visited.end() )
217  {
218  // already visited!
219  continue;
220  }
221 
222  QgsPointXY point;
224  point = QgsPointXY( *qgsgeometry_cast< const QgsPoint * >( feat.geometry().constGet() ) );
225  else
226  {
227  // not a point geometry
228  feedback->reportError( QObject::tr( "Feature %1 is a %2 feature, not a point." ).arg( feat.id() ).arg( QgsWkbTypes::displayString( feat.geometry().wkbType() ) ) );
229  feedback->setProgress( ++i * step );
230  continue;
231  }
232 
233  std::unordered_set< QgsSpatialIndexKDBushData, KDBushDataHashById, KDBushDataEqualById> within;
234 
235  if ( minSize > 1 )
236  {
237  index.within( point, eps, [ &within]( const QgsSpatialIndexKDBushData & data )
238  {
239  within.insert( data );
240  } );
241  if ( within.size() < minSize )
242  continue;
243 
244  visited.insert( feat.id() );
245  }
246  else
247  {
248  // optimised case for minSize == 1, we can skip the initial check
249  within.insert( QgsSpatialIndexKDBushData( feat.id(), point.x(), point.y() ) );
250  }
251 
252  // start new cluster
253  clusterCount++;
254  idToCluster[ feat.id() ] = clusterCount;
255  feedback->setProgress( ++i * step );
256 
257  while ( !within.empty() )
258  {
259  if ( feedback->isCanceled() )
260  {
261  break;
262  }
263 
264  QgsSpatialIndexKDBushData j = *within.begin();
265  within.erase( within.begin() );
266 
267  if ( visited.find( j.id ) != visited.end() )
268  {
269  // already visited!
270  continue;
271  }
272 
273  visited.insert( j.id );
274  feedback->setProgress( ++i * step );
275 
276  // check from this point
277  QgsPointXY point2 = j.point();
278 
279  std::unordered_set< QgsSpatialIndexKDBushData, KDBushDataHashById, KDBushDataEqualById > within2;
280  index.within( point2, eps, [&within2]( const QgsSpatialIndexKDBushData & data )
281  {
282  within2.insert( data );
283  } );
284  if ( within2.size() >= minSize )
285  {
286  // expand neighbourhood
287  std::copy_if( within2.begin(),
288  within2.end(),
289  std::inserter( within, within.end() ),
290  [&visited]( const QgsSpatialIndexKDBushData & needle )
291  {
292  return visited.find( needle.id ) == visited.end();
293  } );
294  }
295  if ( !borderPointsAreNoise || within2.size() >= minSize )
296  {
297  idToCluster[ j.id ] = clusterCount;
298  }
299  }
300  }
301 }
302 
304 
305 
QgsFeedback::setProgress
void setProgress(double progress)
Sets the current progress for the feedback object.
Definition: qgsfeedback.h:62
QgsFeature::id
Q_GADGET QgsFeatureId id
Definition: qgsfeature.h:64
QgsPointXY::y
double y
Definition: qgspointxy.h:48
QgsWkbTypes::displayString
static QString displayString(Type type) SIP_HOLDGIL
Returns a display string type for a WKB type, e.g., the geometry name used in WKT geometry representa...
Definition: qgswkbtypes.cpp:145
QgsWkbTypes::Point
@ Point
Definition: qgswkbtypes.h:72
QgsProcessingParameterNumber
A numeric parameter for processing algorithms.
Definition: qgsprocessingparameters.h:1960
QgsProcessingFeedback
Base class for providing feedback from a processing algorithm.
Definition: qgsprocessingfeedback.h:38
QgsPointXY::x
Q_GADGET double x
Definition: qgspointxy.h:47
QgsProcessingFeedback::pushInfo
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
Definition: qgsprocessingfeedback.cpp:48
QgsWkbTypes::flatType
static Type flatType(Type type) SIP_HOLDGIL
Returns the flat type for a WKB type.
Definition: qgswkbtypes.h:702
QgsProcessingFeedback::reportError
virtual void reportError(const QString &error, bool fatalError=false)
Reports that the algorithm encountered an error while executing.
Definition: qgsprocessingfeedback.cpp:39
QgsFields
Container of fields for a vector layer.
Definition: qgsfields.h:45
QgsFeature::geometry
QgsGeometry geometry
Definition: qgsfeature.h:67
QgsProcessingParameterDefinition::FlagAdvanced
@ FlagAdvanced
Parameter is an advanced parameter which should be hidden from users by default.
Definition: qgsprocessingparameters.h:423
QgsProcessingParameterFeatureSource
An input feature source (such as vector layers) parameter for processing algorithms.
Definition: qgsprocessingparameters.h:2734
QgsSpatialIndexKDBushData::id
QgsFeatureId id
Feature ID.
Definition: qgsspatialindexkdbushdata.h:60
QgsFields::append
bool append(const QgsField &field, FieldOrigin origin=OriginProvider, int originIndex=-1)
Appends a field. The field must have unique name, otherwise it is rejected (returns false)
Definition: qgsfields.cpp:59
QgsProcessing::TypeVectorPoint
@ TypeVectorPoint
Vector point layers.
Definition: qgsprocessing.h:48
QgsProcessingUtils::combineFields
static QgsFields combineFields(const QgsFields &fieldsA, const QgsFields &fieldsB, const QString &fieldsBPrefix=QString())
Combines two field lists, avoiding duplicate field names (in a case-insensitive manner).
Definition: qgsprocessingutils.cpp:1127
QgsProcessingOutputNumber
A numeric output for processing algorithms.
Definition: qgsprocessingoutputs.h:295
QgsProcessingParameterFeatureSink
A feature sink output for processing algorithms.
Definition: qgsprocessingparameters.h:2895
QgsFeatureRequest
This class wraps a request for features to a vector layer (or directly its vector data provider).
Definition: qgsfeaturerequest.h:76
qgsspatialindexkdbush.h
QgsSpatialIndexKDBush::size
qgssize size() const
Returns the size of the index, i.e.
Definition: qgsspatialindexkdbush.cpp:73
qgsalgorithmdbscanclustering.h
QgsProcessingContext
Contains information about the context in which a processing algorithm is executed.
Definition: qgsprocessingcontext.h:44
QgsGeometry::constGet
const QgsAbstractGeometry * constGet() const SIP_HOLDGIL
Returns a non-modifiable (const) reference to the underlying abstract geometry primitive.
Definition: qgsgeometry.cpp:128
QgsFeature::attributes
QgsAttributes attributes
Definition: qgsfeature.h:65
QgsProcessingParameterNumber::Integer
@ Integer
Integer values.
Definition: qgsprocessingparameters.h:1966
QgsProcessingParameterDistance
A double numeric parameter for distance values.
Definition: qgsprocessingparameters.h:2063
QgsPointXY
A class to represent a 2D point.
Definition: qgspointxy.h:44
QgsFeedback::isCanceled
bool isCanceled() const
Tells whether the operation has been canceled already.
Definition: qgsfeedback.h:53
QgsFeatureIterator::nextFeature
bool nextFeature(QgsFeature &f)
Definition: qgsfeatureiterator.h:374
QgsFeature::hasGeometry
bool hasGeometry() const
Returns true if the feature has an associated geometry.
Definition: qgsfeature.cpp:199
QgsSpatialIndexKDBushData::point
QgsPointXY point() const
Returns the indexed point.
Definition: qgsspatialindexkdbushdata.h:54
QgsAttributes
A vector of attributes.
Definition: qgsattributes.h:58
QgsFeature
The feature class encapsulates a single feature including its id, geometry and a list of field/values...
Definition: qgsfeature.h:56
QgsFeature::setAttributes
void setAttributes(const QgsAttributes &attrs)
Sets the feature's attributes.
Definition: qgsfeature.cpp:129
QgsFeatureIterator
Wrapper for iterator of features from vector data provider or vector layer.
Definition: qgsfeatureiterator.h:265
QgsSpatialIndexKDBush
A very fast static spatial index for 2D points based on a flat KD-tree.
Definition: qgsspatialindexkdbush.h:54
QgsProcessingException
Custom exception class for processing related exceptions.
Definition: qgsexception.h:83
QgsGeometry::wkbType
QgsWkbTypes::Type wkbType() const SIP_HOLDGIL
Returns type of the geometry as a WKB type (point / linestring / polygon etc.)
Definition: qgsgeometry.cpp:345
QgsFeatureSink::FastInsert
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
Definition: qgsfeaturesink.h:70
QgsSpatialIndexKDBushData
A container for data stored inside a QgsSpatialIndexKDBush index.
Definition: qgsspatialindexkdbushdata.h:33
QgsSpatialIndexKDBush::within
QList< QgsSpatialIndexKDBushData > within(const QgsPointXY &point, double radius) const
Returns the list of features which are within the given search radius of point.
Definition: qgsspatialindexkdbush.cpp:61
QgsField
Encapsulate a field in an attribute table or data source.
Definition: qgsfield.h:50