QGIS API Documentation  3.14.0-Pi (9f7028fd23)
qgsalgorithmdbscanclustering.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  qgsalgorithmdbscanclustering.cpp
3  ---------------------
4  begin : July 2018
5  copyright : (C) 2018 by Nyall Dawson
6  email : nyall dot dawson at gmail dot com
7  ***************************************************************************/
8 
9 /***************************************************************************
10  * *
11  * This program is free software; you can redistribute it and/or modify *
12  * it under the terms of the GNU General Public License as published by *
13  * the Free Software Foundation; either version 2 of the License, or *
14  * (at your option) any later version. *
15  * *
16  ***************************************************************************/
17 
19 #include "qgsspatialindexkdbush.h"
20 #include <unordered_set>
21 
23 
24 QString QgsDbscanClusteringAlgorithm::name() const
25 {
26  return QStringLiteral( "dbscanclustering" );
27 }
28 
29 QString QgsDbscanClusteringAlgorithm::displayName() const
30 {
31  return QObject::tr( "DBSCAN clustering" );
32 }
33 
34 QString QgsDbscanClusteringAlgorithm::shortDescription() const
35 {
36  return QObject::tr( "Clusters point features using a density based scan algorithm." );
37 }
38 
39 QStringList QgsDbscanClusteringAlgorithm::tags() const
40 {
41  return QObject::tr( "clustering,clusters,density,based,points" ).split( ',' );
42 }
43 
44 QString QgsDbscanClusteringAlgorithm::group() const
45 {
46  return QObject::tr( "Vector analysis" );
47 }
48 
49 QString QgsDbscanClusteringAlgorithm::groupId() const
50 {
51  return QStringLiteral( "vectoranalysis" );
52 }
53 
54 void QgsDbscanClusteringAlgorithm::initAlgorithm( const QVariantMap & )
55 {
56  addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ),
57  QObject::tr( "Input layer" ), QList< int >() << QgsProcessing::TypeVectorPoint ) );
58  addParameter( new QgsProcessingParameterNumber( QStringLiteral( "MIN_SIZE" ), QObject::tr( "Minimum cluster size" ),
60  addParameter( new QgsProcessingParameterDistance( QStringLiteral( "EPS" ),
61  QObject::tr( "Maximum distance between clustered points" ), 1, QStringLiteral( "INPUT" ), false, 0 ) );
62 
63  auto dbscanStarParam = qgis::make_unique<QgsProcessingParameterBoolean>( QStringLiteral( "DBSCAN*" ),
64  QObject::tr( "Treat border points as noise (DBSCAN*)" ), false, true );
65  dbscanStarParam->setFlags( dbscanStarParam->flags() | QgsProcessingParameterDefinition::FlagAdvanced );
66  addParameter( dbscanStarParam.release() );
67 
68  auto fieldNameParam = qgis::make_unique<QgsProcessingParameterString>( QStringLiteral( "FIELD_NAME" ),
69  QObject::tr( "Cluster field name" ), QStringLiteral( "CLUSTER_ID" ) );
70  fieldNameParam->setFlags( fieldNameParam->flags() | QgsProcessingParameterDefinition::FlagAdvanced );
71  addParameter( fieldNameParam.release() );
72 
73  addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Clusters" ), QgsProcessing::TypeVectorPoint ) );
74 
75  addOutput( new QgsProcessingOutputNumber( QStringLiteral( "NUM_CLUSTERS" ), QObject::tr( "Number of clusters" ) ) );
76 }
77 
78 QString QgsDbscanClusteringAlgorithm::shortHelpString() const
79 {
80  return QObject::tr( "Clusters point features based on a 2D implementation of Density-based spatial clustering of applications with noise (DBSCAN) algorithm.\n\n"
81  "The algorithm requires two parameters, a minimum cluster size (“minPts”), and the maximum distance allowed between clustered points (“eps”)." );
82 }
83 
84 QgsDbscanClusteringAlgorithm *QgsDbscanClusteringAlgorithm::createInstance() const
85 {
86  return new QgsDbscanClusteringAlgorithm();
87 }
88 
89 struct KDBushDataEqualById
90 {
91  bool operator()( const QgsSpatialIndexKDBushData &a, const QgsSpatialIndexKDBushData &b ) const
92  {
93  return a.id == b.id;
94  }
95 };
96 
97 struct KDBushDataHashById
98 {
99  std::size_t operator()( const QgsSpatialIndexKDBushData &a ) const
100  {
101  return std::hash< QgsFeatureId > {}( a.id );
102  }
103 };
104 
105 QVariantMap QgsDbscanClusteringAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
106 {
107  std::unique_ptr< QgsProcessingFeatureSource > source( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) );
108  if ( !source )
109  throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );
110 
111  const std::size_t minSize = static_cast< std::size_t>( parameterAsInt( parameters, QStringLiteral( "MIN_SIZE" ), context ) );
112  const double eps = parameterAsDouble( parameters, QStringLiteral( "EPS" ), context );
113  const bool borderPointsAreNoise = parameterAsBoolean( parameters, QStringLiteral( "DBSCAN*" ), context );
114 
115  QgsFields outputFields = source->fields();
116  const QString clusterFieldName = parameterAsString( parameters, QStringLiteral( "FIELD_NAME" ), context );
117  QgsFields newFields;
118  newFields.append( QgsField( clusterFieldName, QVariant::Int ) );
119  outputFields = QgsProcessingUtils::combineFields( outputFields, newFields );
120 
121  QString dest;
122  std::unique_ptr< QgsFeatureSink > sink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, dest, outputFields, source->wkbType(), source->sourceCrs() ) );
123  if ( !sink )
124  throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) );
125 
126  // build spatial index
127  feedback->pushInfo( QObject::tr( "Building spatial index" ) );
128  QgsSpatialIndexKDBush index( *source, feedback );
129  if ( feedback->isCanceled() )
130  return QVariantMap();
131 
132  // dbscan!
133  feedback->pushInfo( QObject::tr( "Analysing clusters" ) );
134  std::unordered_map< QgsFeatureId, int> idToCluster;
135  idToCluster.reserve( index.size() );
136  QgsFeatureIterator features = source->getFeatures( QgsFeatureRequest().setNoAttributes() );
137  const long featureCount = source->featureCount();
138 
139  int clusterCount = 0;
140  dbscan( minSize, eps, borderPointsAreNoise, featureCount, features, index, idToCluster, clusterCount, feedback );
141 
142  // write clusters
143  const double writeStep = featureCount > 0 ? 10.0 / featureCount : 1;
144  features = source->getFeatures();
145  int i = 0;
146  QgsFeature feat;
147  while ( features.nextFeature( feat ) )
148  {
149  i++;
150  if ( feedback->isCanceled() )
151  {
152  break;
153  }
154 
155  feedback->setProgress( 90 + i * writeStep );
156  QgsAttributes attr = feat.attributes();
157  auto cluster = idToCluster.find( feat.id() );
158  if ( cluster != idToCluster.end() )
159  {
160  attr << cluster->second;
161  }
162  else
163  {
164  attr << QVariant();
165  }
166  feat.setAttributes( attr );
167  sink->addFeature( feat, QgsFeatureSink::FastInsert );
168  }
169 
170  QVariantMap outputs;
171  outputs.insert( QStringLiteral( "OUTPUT" ), dest );
172  outputs.insert( QStringLiteral( "NUM_CLUSTERS" ), clusterCount );
173  return outputs;
174 }
175 
176 
177 void QgsDbscanClusteringAlgorithm::dbscan( const std::size_t minSize,
178  const double eps,
179  const bool borderPointsAreNoise,
180  const long featureCount,
181  QgsFeatureIterator features,
182  QgsSpatialIndexKDBush &index,
183  std::unordered_map< QgsFeatureId, int> &idToCluster,
184  int &clusterCount,
185  QgsProcessingFeedback *feedback )
186 {
187  const double step = featureCount > 0 ? 90.0 / featureCount : 1;
188 
189  std::unordered_set< QgsFeatureId > visited;
190  visited.reserve( index.size() );
191 
192  QgsFeature feat;
193  int i = 0;
194  clusterCount = 0;
195 
196  while ( features.nextFeature( feat ) )
197  {
198  if ( feedback->isCanceled() )
199  {
200  break;
201  }
202 
203  if ( !feat.hasGeometry() )
204  {
205  feedback->setProgress( ++i * step );
206  continue;
207  }
208 
209  if ( visited.find( feat.id() ) != visited.end() )
210  {
211  // already visited!
212  continue;
213  }
214 
215  QgsPointXY point;
217  point = QgsPointXY( *qgsgeometry_cast< const QgsPoint * >( feat.geometry().constGet() ) );
218  else
219  {
220  // not a point geometry
221  feedback->reportError( QObject::tr( "Feature %1 is a %2 feature, not a point." ).arg( feat.id() ).arg( QgsWkbTypes::displayString( feat.geometry().wkbType() ) ) );
222  feedback->setProgress( ++i * step );
223  continue;
224  }
225 
226  std::unordered_set< QgsSpatialIndexKDBushData, KDBushDataHashById, KDBushDataEqualById> within;
227 
228  if ( minSize > 1 )
229  {
230  index.within( point, eps, [ &within]( const QgsSpatialIndexKDBushData & data )
231  {
232  within.insert( data );
233  } );
234  if ( within.size() < minSize )
235  continue;
236 
237  visited.insert( feat.id() );
238  }
239  else
240  {
241  // optimised case for minSize == 1, we can skip the initial check
242  within.insert( QgsSpatialIndexKDBushData( feat.id(), point.x(), point.y() ) );
243  }
244 
245  // start new cluster
246  clusterCount++;
247  idToCluster[ feat.id() ] = clusterCount;
248  feedback->setProgress( ++i * step );
249 
250  while ( !within.empty() )
251  {
252  if ( feedback->isCanceled() )
253  {
254  break;
255  }
256 
257  QgsSpatialIndexKDBushData j = *within.begin();
258  within.erase( within.begin() );
259 
260  if ( visited.find( j.id ) != visited.end() )
261  {
262  // already visited!
263  continue;
264  }
265 
266  visited.insert( j.id );
267  feedback->setProgress( ++i * step );
268 
269  // check from this point
270  QgsPointXY point2 = j.point();
271 
272  std::unordered_set< QgsSpatialIndexKDBushData, KDBushDataHashById, KDBushDataEqualById > within2;
273  index.within( point2, eps, [&within2]( const QgsSpatialIndexKDBushData & data )
274  {
275  within2.insert( data );
276  } );
277  if ( within2.size() >= minSize )
278  {
279  // expand neighbourhood
280  std::copy_if( within2.begin(),
281  within2.end(),
282  std::inserter( within, within.end() ),
283  [&visited]( const QgsSpatialIndexKDBushData & needle )
284  {
285  return visited.find( needle.id ) == visited.end();
286  } );
287  }
288  if ( !borderPointsAreNoise || within2.size() >= minSize )
289  {
290  idToCluster[ j.id ] = clusterCount;
291  }
292  }
293  }
294 }
295 
297 
298 
QgsFeedback::setProgress
void setProgress(double progress)
Sets the current progress for the feedback object.
Definition: qgsfeedback.h:75
QgsPointXY::y
double y
Definition: qgspointxy.h:48
QgsWkbTypes::Point
@ Point
Definition: qgswkbtypes.h:71
QgsProcessingParameterNumber
Definition: qgsprocessingparameters.h:1838
QgsProcessingFeedback
Definition: qgsprocessingfeedback.h:37
QgsProcessingFeedback::pushInfo
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
Definition: qgsprocessingfeedback.cpp:48
QgsProcessingFeedback::reportError
virtual void reportError(const QString &error, bool fatalError=false)
Reports that the algorithm encountered an error while executing.
Definition: qgsprocessingfeedback.cpp:39
QgsFields
Definition: qgsfields.h:44
QgsFeature::geometry
QgsGeometry geometry
Definition: qgsfeature.h:71
QgsGeometry::wkbType
QgsWkbTypes::Type wkbType() const
Returns type of the geometry as a WKB type (point / linestring / polygon etc.)
Definition: qgsgeometry.cpp:345
QgsProcessingParameterDefinition::FlagAdvanced
@ FlagAdvanced
Parameter is an advanced parameter which should be hidden from users by default.
Definition: qgsprocessingparameters.h:419
QgsProcessingParameterFeatureSource
Definition: qgsprocessingparameters.h:2612
QgsSpatialIndexKDBushData::id
QgsFeatureId id
Feature ID.
Definition: qgsspatialindexkdbushdata.h:60
QgsFields::append
bool append(const QgsField &field, FieldOrigin origin=OriginProvider, int originIndex=-1)
Appends a field. The field must have unique name, otherwise it is rejected (returns false)
Definition: qgsfields.cpp:59
QgsProcessing::TypeVectorPoint
@ TypeVectorPoint
Vector point layers.
Definition: qgsprocessing.h:48
QgsProcessingUtils::combineFields
static QgsFields combineFields(const QgsFields &fieldsA, const QgsFields &fieldsB, const QString &fieldsBPrefix=QString())
Combines two field lists, avoiding duplicate field names (in a case-insensitive manner).
Definition: qgsprocessingutils.cpp:1113
QgsProcessingOutputNumber
Definition: qgsprocessingoutputs.h:294
QgsProcessingParameterFeatureSink
Definition: qgsprocessingparameters.h:2773
QgsFeature::id
QgsFeatureId id
Definition: qgsfeature.h:68
QgsFeatureRequest
Definition: qgsfeaturerequest.h:75
qgsspatialindexkdbush.h
QgsSpatialIndexKDBush::size
qgssize size() const
Returns the size of the index, i.e.
Definition: qgsspatialindexkdbush.cpp:73
qgsalgorithmdbscanclustering.h
QgsProcessingContext
Definition: qgsprocessingcontext.h:43
QgsGeometry::constGet
const QgsAbstractGeometry * constGet() const
Returns a non-modifiable (const) reference to the underlying abstract geometry primitive.
Definition: qgsgeometry.cpp:128
QgsFeature::attributes
QgsAttributes attributes
Definition: qgsfeature.h:69
QgsProcessingParameterNumber::Integer
@ Integer
Integer values.
Definition: qgsprocessingparameters.h:1845
QgsProcessingParameterDistance
Definition: qgsprocessingparameters.h:1941
QgsPointXY
Definition: qgspointxy.h:43
QgsFeedback::isCanceled
bool isCanceled() const
Tells whether the operation has been canceled already.
Definition: qgsfeedback.h:66
QgsFeatureIterator::nextFeature
bool nextFeature(QgsFeature &f)
Definition: qgsfeatureiterator.h:373
QgsFeature::hasGeometry
bool hasGeometry() const
Returns true if the feature has an associated geometry.
Definition: qgsfeature.cpp:197
QgsSpatialIndexKDBushData::point
QgsPointXY point() const
Returns the indexed point.
Definition: qgsspatialindexkdbushdata.h:54
QgsWkbTypes::displayString
static QString displayString(Type type)
Returns a display string type for a WKB type, e.g., the geometry name used in WKT geometry representa...
Definition: qgswkbtypes.cpp:145
QgsPointXY::x
double x
Definition: qgspointxy.h:47
QgsAttributes
Definition: qgsattributes.h:57
QgsFeature
Definition: qgsfeature.h:55
QgsFeature::setAttributes
void setAttributes(const QgsAttributes &attrs)
Sets the feature's attributes.
Definition: qgsfeature.cpp:127
QgsFeatureIterator
Definition: qgsfeatureiterator.h:263
QgsSpatialIndexKDBush
Definition: qgsspatialindexkdbush.h:53
QgsProcessingException
Definition: qgsexception.h:82
QgsWkbTypes::flatType
static Type flatType(Type type)
Returns the flat type for a WKB type.
Definition: qgswkbtypes.h:701
QgsFeatureSink::FastInsert
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
Definition: qgsfeaturesink.h:70
QgsSpatialIndexKDBushData
Definition: qgsspatialindexkdbushdata.h:32
QgsSpatialIndexKDBush::within
QList< QgsSpatialIndexKDBushData > within(const QgsPointXY &point, double radius) const
Returns the list of features which are within the given search radius of point.
Definition: qgsspatialindexkdbush.cpp:61
QgsField
Definition: qgsfield.h:49