QGIS API Documentation  3.20.0-Odense (decaadbb31)
qgsalgorithmdbscanclustering.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  qgsalgorithmdbscanclustering.cpp
3  ---------------------
4  begin : July 2018
5  copyright : (C) 2018 by Nyall Dawson
6  email : nyall dot dawson at gmail dot com
7  ***************************************************************************/
8 
9 /***************************************************************************
10  * *
11  * This program is free software; you can redistribute it and/or modify *
12  * it under the terms of the GNU General Public License as published by *
13  * the Free Software Foundation; either version 2 of the License, or *
14  * (at your option) any later version. *
15  * *
16  ***************************************************************************/
17 
19 #include "qgsspatialindexkdbush.h"
20 #include <unordered_set>
21 
23 
24 QString QgsDbscanClusteringAlgorithm::name() const
25 {
26  return QStringLiteral( "dbscanclustering" );
27 }
28 
29 QString QgsDbscanClusteringAlgorithm::displayName() const
30 {
31  return QObject::tr( "DBSCAN clustering" );
32 }
33 
34 QString QgsDbscanClusteringAlgorithm::shortDescription() const
35 {
36  return QObject::tr( "Clusters point features using a density based scan algorithm." );
37 }
38 
39 QStringList QgsDbscanClusteringAlgorithm::tags() const
40 {
41  return QObject::tr( "clustering,clusters,density,based,points" ).split( ',' );
42 }
43 
44 QString QgsDbscanClusteringAlgorithm::group() const
45 {
46  return QObject::tr( "Vector analysis" );
47 }
48 
49 QString QgsDbscanClusteringAlgorithm::groupId() const
50 {
51  return QStringLiteral( "vectoranalysis" );
52 }
53 
54 void QgsDbscanClusteringAlgorithm::initAlgorithm( const QVariantMap & )
55 {
56  addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ),
57  QObject::tr( "Input layer" ), QList< int >() << QgsProcessing::TypeVectorPoint ) );
58  addParameter( new QgsProcessingParameterNumber( QStringLiteral( "MIN_SIZE" ), QObject::tr( "Minimum cluster size" ),
60  addParameter( new QgsProcessingParameterDistance( QStringLiteral( "EPS" ),
61  QObject::tr( "Maximum distance between clustered points" ), 1, QStringLiteral( "INPUT" ), false, 0 ) );
62 
63  auto dbscanStarParam = std::make_unique<QgsProcessingParameterBoolean>( QStringLiteral( "DBSCAN*" ),
64  QObject::tr( "Treat border points as noise (DBSCAN*)" ), false, true );
65  dbscanStarParam->setFlags( dbscanStarParam->flags() | QgsProcessingParameterDefinition::FlagAdvanced );
66  addParameter( dbscanStarParam.release() );
67 
68  auto fieldNameParam = std::make_unique<QgsProcessingParameterString>( QStringLiteral( "FIELD_NAME" ),
69  QObject::tr( "Cluster field name" ), QStringLiteral( "CLUSTER_ID" ) );
70  fieldNameParam->setFlags( fieldNameParam->flags() | QgsProcessingParameterDefinition::FlagAdvanced );
71  addParameter( fieldNameParam.release() );
72  auto sizeFieldNameParam = std::make_unique<QgsProcessingParameterString>( QStringLiteral( "SIZE_FIELD_NAME" ),
73  QObject::tr( "Cluster size field name" ), QStringLiteral( "CLUSTER_SIZE" ) );
74  sizeFieldNameParam->setFlags( sizeFieldNameParam->flags() | QgsProcessingParameterDefinition::FlagAdvanced );
75  addParameter( sizeFieldNameParam.release() );
76 
77  addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Clusters" ), QgsProcessing::TypeVectorPoint ) );
78 
79  addOutput( new QgsProcessingOutputNumber( QStringLiteral( "NUM_CLUSTERS" ), QObject::tr( "Number of clusters" ) ) );
80 }
81 
82 QString QgsDbscanClusteringAlgorithm::shortHelpString() const
83 {
84  return QObject::tr( "Clusters point features based on a 2D implementation of Density-based spatial clustering of applications with noise (DBSCAN) algorithm.\n\n"
85  "The algorithm requires two parameters, a minimum cluster size (“minPts”), and the maximum distance allowed between clustered points (“eps”)." );
86 }
87 
88 QgsDbscanClusteringAlgorithm *QgsDbscanClusteringAlgorithm::createInstance() const
89 {
90  return new QgsDbscanClusteringAlgorithm();
91 }
92 
93 struct KDBushDataEqualById
94 {
95  bool operator()( const QgsSpatialIndexKDBushData &a, const QgsSpatialIndexKDBushData &b ) const
96  {
97  return a.id == b.id;
98  }
99 };
100 
101 struct KDBushDataHashById
102 {
103  std::size_t operator()( const QgsSpatialIndexKDBushData &a ) const
104  {
105  return std::hash< QgsFeatureId > {}( a.id );
106  }
107 };
108 
109 QVariantMap QgsDbscanClusteringAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
110 {
111  std::unique_ptr< QgsProcessingFeatureSource > source( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) );
112  if ( !source )
113  throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );
114 
115  const std::size_t minSize = static_cast< std::size_t>( parameterAsInt( parameters, QStringLiteral( "MIN_SIZE" ), context ) );
116  const double eps = parameterAsDouble( parameters, QStringLiteral( "EPS" ), context );
117  const bool borderPointsAreNoise = parameterAsBoolean( parameters, QStringLiteral( "DBSCAN*" ), context );
118 
119  QgsFields outputFields = source->fields();
120  QgsFields newFields;
121  const QString clusterFieldName = parameterAsString( parameters, QStringLiteral( "FIELD_NAME" ), context );
122  newFields.append( QgsField( clusterFieldName, QVariant::Int ) );
123  const QString clusterSizeFieldName = parameterAsString( parameters, QStringLiteral( "SIZE_FIELD_NAME" ), context );
124  newFields.append( QgsField( clusterSizeFieldName, QVariant::Int ) );
125  outputFields = QgsProcessingUtils::combineFields( outputFields, newFields );
126 
127  QString dest;
128  std::unique_ptr< QgsFeatureSink > sink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, dest, outputFields, source->wkbType(), source->sourceCrs() ) );
129  if ( !sink )
130  throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) );
131 
132  // build spatial index
133  feedback->pushInfo( QObject::tr( "Building spatial index" ) );
134  QgsSpatialIndexKDBush index( *source, feedback );
135  if ( feedback->isCanceled() )
136  return QVariantMap();
137 
138  // dbscan!
139  feedback->pushInfo( QObject::tr( "Analysing clusters" ) );
140  std::unordered_map< QgsFeatureId, int> idToCluster;
141  idToCluster.reserve( index.size() );
142  QgsFeatureIterator features = source->getFeatures( QgsFeatureRequest().setNoAttributes() );
143  const long featureCount = source->featureCount();
144  dbscan( minSize, eps, borderPointsAreNoise, featureCount, features, index, idToCluster, feedback );
145 
146  // cluster size
147  std::unordered_map< int, int> clusterSize;
148  std::for_each( idToCluster.begin(), idToCluster.end(), [ &clusterSize ]( std::pair< QgsFeatureId, int > idCluster ) { clusterSize[ idCluster.second ]++; } );
149 
150  // write clusters
151  const double writeStep = featureCount > 0 ? 10.0 / featureCount : 1;
152  features = source->getFeatures();
153  int i = 0;
154  QgsFeature feat;
155  while ( features.nextFeature( feat ) )
156  {
157  i++;
158  if ( feedback->isCanceled() )
159  {
160  break;
161  }
162 
163  feedback->setProgress( 90 + i * writeStep );
164  QgsAttributes attr = feat.attributes();
165  auto cluster = idToCluster.find( feat.id() );
166  if ( cluster != idToCluster.end() )
167  {
168  attr << cluster->second << clusterSize[ cluster->second ];
169  }
170  else
171  {
172  attr << QVariant() << QVariant();
173  }
174  feat.setAttributes( attr );
175  sink->addFeature( feat, QgsFeatureSink::FastInsert );
176  }
177 
178  QVariantMap outputs;
179  outputs.insert( QStringLiteral( "OUTPUT" ), dest );
180  outputs.insert( QStringLiteral( "NUM_CLUSTERS" ), static_cast< unsigned int >( clusterSize.size() ) );
181  return outputs;
182 }
183 
184 
185 void QgsDbscanClusteringAlgorithm::dbscan( const std::size_t minSize,
186  const double eps,
187  const bool borderPointsAreNoise,
188  const long featureCount,
189  QgsFeatureIterator features,
190  QgsSpatialIndexKDBush &index,
191  std::unordered_map< QgsFeatureId, int> &idToCluster,
192  QgsProcessingFeedback *feedback )
193 {
194  const double step = featureCount > 0 ? 90.0 / featureCount : 1;
195 
196  std::unordered_set< QgsFeatureId > visited;
197  visited.reserve( index.size() );
198 
199  QgsFeature feat;
200  int i = 0;
201  int clusterCount = 0;
202 
203  while ( features.nextFeature( feat ) )
204  {
205  if ( feedback->isCanceled() )
206  {
207  break;
208  }
209 
210  if ( !feat.hasGeometry() )
211  {
212  feedback->setProgress( ++i * step );
213  continue;
214  }
215 
216  if ( visited.find( feat.id() ) != visited.end() )
217  {
218  // already visited!
219  continue;
220  }
221 
222  QgsPointXY point;
224  point = QgsPointXY( *qgsgeometry_cast< const QgsPoint * >( feat.geometry().constGet() ) );
225  else
226  {
227  // not a point geometry
228  feedback->reportError( QObject::tr( "Feature %1 is a %2 feature, not a point." ).arg( feat.id() ).arg( QgsWkbTypes::displayString( feat.geometry().wkbType() ) ) );
229  feedback->setProgress( ++i * step );
230  continue;
231  }
232 
233  std::unordered_set< QgsSpatialIndexKDBushData, KDBushDataHashById, KDBushDataEqualById> within;
234 
235  if ( minSize > 1 )
236  {
237  index.within( point, eps, [ &within]( const QgsSpatialIndexKDBushData & data )
238  {
239  within.insert( data );
240  } );
241  if ( within.size() < minSize )
242  continue;
243 
244  visited.insert( feat.id() );
245  }
246  else
247  {
248  // optimised case for minSize == 1, we can skip the initial check
249  within.insert( QgsSpatialIndexKDBushData( feat.id(), point.x(), point.y() ) );
250  }
251 
252  // start new cluster
253  clusterCount++;
254  idToCluster[ feat.id() ] = clusterCount;
255  feedback->setProgress( ++i * step );
256 
257  while ( !within.empty() )
258  {
259  if ( feedback->isCanceled() )
260  {
261  break;
262  }
263 
264  QgsSpatialIndexKDBushData j = *within.begin();
265  within.erase( within.begin() );
266 
267  if ( visited.find( j.id ) != visited.end() )
268  {
269  // already visited!
270  continue;
271  }
272 
273  visited.insert( j.id );
274  feedback->setProgress( ++i * step );
275 
276  // check from this point
277  QgsPointXY point2 = j.point();
278 
279  std::unordered_set< QgsSpatialIndexKDBushData, KDBushDataHashById, KDBushDataEqualById > within2;
280  index.within( point2, eps, [&within2]( const QgsSpatialIndexKDBushData & data )
281  {
282  within2.insert( data );
283  } );
284  if ( within2.size() >= minSize )
285  {
286  // expand neighbourhood
287  std::copy_if( within2.begin(),
288  within2.end(),
289  std::inserter( within, within.end() ),
290  [&visited]( const QgsSpatialIndexKDBushData & needle )
291  {
292  return visited.find( needle.id ) == visited.end();
293  } );
294  }
295  if ( !borderPointsAreNoise || within2.size() >= minSize )
296  {
297  idToCluster[ j.id ] = clusterCount;
298  }
299  }
300  }
301 }
302 
304 
305 
A vector of attributes.
Definition: qgsattributes.h:58
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
This class wraps a request for features to a vector layer (or directly its vector data provider).
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
Definition: qgsfeature.h:56
QgsAttributes attributes
Definition: qgsfeature.h:65
void setAttributes(const QgsAttributes &attrs)
Sets the feature's attributes.
Definition: qgsfeature.cpp:135
QgsGeometry geometry
Definition: qgsfeature.h:67
bool hasGeometry() const
Returns true if the feature has an associated geometry.
Definition: qgsfeature.cpp:205
Q_GADGET QgsFeatureId id
Definition: qgsfeature.h:64
bool isCanceled() const SIP_HOLDGIL
Tells whether the operation has been canceled already.
Definition: qgsfeedback.h:54
void setProgress(double progress)
Sets the current progress for the feedback object.
Definition: qgsfeedback.h:63
Encapsulate a field in an attribute table or data source.
Definition: qgsfield.h:51
Container of fields for a vector layer.
Definition: qgsfields.h:45
bool append(const QgsField &field, FieldOrigin origin=OriginProvider, int originIndex=-1)
Appends a field. The field must have unique name, otherwise it is rejected (returns false)
Definition: qgsfields.cpp:59
const QgsAbstractGeometry * constGet() const SIP_HOLDGIL
Returns a non-modifiable (const) reference to the underlying abstract geometry primitive.
QgsWkbTypes::Type wkbType() const SIP_HOLDGIL
Returns type of the geometry as a WKB type (point / linestring / polygon etc.)
A class to represent a 2D point.
Definition: qgspointxy.h:59
double y
Definition: qgspointxy.h:63
Q_GADGET double x
Definition: qgspointxy.h:62
Contains information about the context in which a processing algorithm is executed.
Custom exception class for processing related exceptions.
Definition: qgsexception.h:83
Base class for providing feedback from a processing algorithm.
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
virtual void reportError(const QString &error, bool fatalError=false)
Reports that the algorithm encountered an error while executing.
A numeric output for processing algorithms.
@ FlagAdvanced
Parameter is an advanced parameter which should be hidden from users by default.
A double numeric parameter for distance values.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A numeric parameter for processing algorithms.
static QgsFields combineFields(const QgsFields &fieldsA, const QgsFields &fieldsB, const QString &fieldsBPrefix=QString())
Combines two field lists, avoiding duplicate field names (in a case-insensitive manner).
@ TypeVectorPoint
Vector point layers.
Definition: qgsprocessing.h:49
A container for data stored inside a QgsSpatialIndexKDBush index.
QgsPointXY point() const
Returns the indexed point.
A very fast static spatial index for 2D points based on a flat KD-tree.
qgssize size() const
Returns the size of the index, i.e.
QList< QgsSpatialIndexKDBushData > within(const QgsPointXY &point, double radius) const
Returns the list of features which are within the given search radius of point.
static QString displayString(Type type) SIP_HOLDGIL
Returns a non-translated display string type for a WKB type, e.g., the geometry name used in WKT geom...
static Type flatType(Type type) SIP_HOLDGIL
Returns the flat type for a WKB type.
Definition: qgswkbtypes.h:702