QGIS API Documentation  3.22.4-Białowieża (ce8e65e95e)
qgsalgorithmdbscanclustering.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  qgsalgorithmdbscanclustering.cpp
3  ---------------------
4  begin : July 2018
5  copyright : (C) 2018 by Nyall Dawson
6  email : nyall dot dawson at gmail dot com
7  ***************************************************************************/
8 
9 /***************************************************************************
10  * *
11  * This program is free software; you can redistribute it and/or modify *
12  * it under the terms of the GNU General Public License as published by *
13  * the Free Software Foundation; either version 2 of the License, or *
14  * (at your option) any later version. *
15  * *
16  ***************************************************************************/
17 
19 #include "qgsspatialindexkdbush.h"
20 #include <unordered_set>
21 
23 
24 QString QgsDbscanClusteringAlgorithm::name() const
25 {
26  return QStringLiteral( "dbscanclustering" );
27 }
28 
29 QString QgsDbscanClusteringAlgorithm::displayName() const
30 {
31  return QObject::tr( "DBSCAN clustering" );
32 }
33 
34 QString QgsDbscanClusteringAlgorithm::shortDescription() const
35 {
36  return QObject::tr( "Clusters point features using a density based scan algorithm." );
37 }
38 
39 QStringList QgsDbscanClusteringAlgorithm::tags() const
40 {
41  return QObject::tr( "clustering,clusters,density,based,points,distance" ).split( ',' );
42 }
43 
44 QString QgsDbscanClusteringAlgorithm::group() const
45 {
46  return QObject::tr( "Vector analysis" );
47 }
48 
49 QString QgsDbscanClusteringAlgorithm::groupId() const
50 {
51  return QStringLiteral( "vectoranalysis" );
52 }
53 
54 void QgsDbscanClusteringAlgorithm::initAlgorithm( const QVariantMap & )
55 {
56  addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ),
57  QObject::tr( "Input layer" ), QList< int >() << QgsProcessing::TypeVectorPoint ) );
58  addParameter( new QgsProcessingParameterNumber( QStringLiteral( "MIN_SIZE" ), QObject::tr( "Minimum cluster size" ),
60  addParameter( new QgsProcessingParameterDistance( QStringLiteral( "EPS" ),
61  QObject::tr( "Maximum distance between clustered points" ), 1, QStringLiteral( "INPUT" ), false, 0 ) );
62 
63  auto dbscanStarParam = std::make_unique<QgsProcessingParameterBoolean>( QStringLiteral( "DBSCAN*" ),
64  QObject::tr( "Treat border points as noise (DBSCAN*)" ), false, true );
65  dbscanStarParam->setFlags( dbscanStarParam->flags() | QgsProcessingParameterDefinition::FlagAdvanced );
66  addParameter( dbscanStarParam.release() );
67 
68  auto fieldNameParam = std::make_unique<QgsProcessingParameterString>( QStringLiteral( "FIELD_NAME" ),
69  QObject::tr( "Cluster field name" ), QStringLiteral( "CLUSTER_ID" ) );
70  fieldNameParam->setFlags( fieldNameParam->flags() | QgsProcessingParameterDefinition::FlagAdvanced );
71  addParameter( fieldNameParam.release() );
72  auto sizeFieldNameParam = std::make_unique<QgsProcessingParameterString>( QStringLiteral( "SIZE_FIELD_NAME" ),
73  QObject::tr( "Cluster size field name" ), QStringLiteral( "CLUSTER_SIZE" ) );
74  sizeFieldNameParam->setFlags( sizeFieldNameParam->flags() | QgsProcessingParameterDefinition::FlagAdvanced );
75  addParameter( sizeFieldNameParam.release() );
76 
77  addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Clusters" ), QgsProcessing::TypeVectorPoint ) );
78 
79  addOutput( new QgsProcessingOutputNumber( QStringLiteral( "NUM_CLUSTERS" ), QObject::tr( "Number of clusters" ) ) );
80 }
81 
82 QString QgsDbscanClusteringAlgorithm::shortHelpString() const
83 {
84  return QObject::tr( "Clusters point features based on a 2D implementation of Density-based spatial clustering of applications with noise (DBSCAN) algorithm.\n\n"
85  "The algorithm requires two parameters, a minimum cluster size (“minPts”), and the maximum distance allowed between clustered points (“eps”)." );
86 }
87 
88 QgsDbscanClusteringAlgorithm *QgsDbscanClusteringAlgorithm::createInstance() const
89 {
90  return new QgsDbscanClusteringAlgorithm();
91 }
92 
93 struct KDBushDataEqualById
94 {
95  bool operator()( const QgsSpatialIndexKDBushData &a, const QgsSpatialIndexKDBushData &b ) const
96  {
97  return a.id == b.id;
98  }
99 };
100 
101 struct KDBushDataHashById
102 {
103  std::size_t operator()( const QgsSpatialIndexKDBushData &a ) const
104  {
105  return std::hash< QgsFeatureId > {}( a.id );
106  }
107 };
108 
109 QVariantMap QgsDbscanClusteringAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
110 {
111  std::unique_ptr< QgsProcessingFeatureSource > source( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) );
112  if ( !source )
113  throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );
114 
115  const std::size_t minSize = static_cast< std::size_t>( parameterAsInt( parameters, QStringLiteral( "MIN_SIZE" ), context ) );
116  const double eps1 = parameterAsDouble( parameters, QStringLiteral( "EPS" ), context );
117  const double eps2 = parameterAsDouble( parameters, QStringLiteral( "EPS2" ), context );
118  const bool borderPointsAreNoise = parameterAsBoolean( parameters, QStringLiteral( "DBSCAN*" ), context );
119 
120  QgsFields outputFields = source->fields();
121  QgsFields newFields;
122  const QString clusterFieldName = parameterAsString( parameters, QStringLiteral( "FIELD_NAME" ), context );
123  newFields.append( QgsField( clusterFieldName, QVariant::Int ) );
124  const QString clusterSizeFieldName = parameterAsString( parameters, QStringLiteral( "SIZE_FIELD_NAME" ), context );
125  newFields.append( QgsField( clusterSizeFieldName, QVariant::Int ) );
126  outputFields = QgsProcessingUtils::combineFields( outputFields, newFields );
127 
128  QString dest;
129  std::unique_ptr< QgsFeatureSink > sink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, dest, outputFields, source->wkbType(), source->sourceCrs() ) );
130  if ( !sink )
131  throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) );
132 
133  QgsFeatureRequest indexRequest;
134 
135  std::unordered_map< QgsFeatureId, QDateTime> idToDateTime;
136  const QString dateTimeFieldName = parameterAsString( parameters, QStringLiteral( "DATETIME_FIELD" ), context );
137  int dateTimefieldIndex = -1;
138  if ( !dateTimeFieldName.isEmpty() )
139  {
140  dateTimefieldIndex = source->fields().lookupField( dateTimeFieldName );
141  if ( dateTimefieldIndex == -1 )
142  throw QgsProcessingException( QObject::tr( "Datetime field missing" ) );
143 
144  indexRequest.setSubsetOfAttributes( QgsAttributeList() << dateTimefieldIndex );
145  }
146  else
147  {
148  indexRequest.setNoAttributes();
149  }
150 
151  // build spatial index, also collecting feature datetimes if required
152  feedback->pushInfo( QObject::tr( "Building spatial index" ) );
153  QgsFeatureIterator indexIterator = source->getFeatures( indexRequest );
154  QgsSpatialIndexKDBush index( indexIterator, [&idToDateTime, dateTimefieldIndex]( const QgsFeature & feature )->bool
155  {
156  if ( dateTimefieldIndex >= 0 )
157  idToDateTime[ feature.id() ] = feature.attributes().at( dateTimefieldIndex ).toDateTime();
158  return true;
159  }, feedback );
160 
161  if ( feedback->isCanceled() )
162  return QVariantMap();
163 
164  // stdbscan!
165  feedback->pushInfo( QObject::tr( "Analysing clusters" ) );
166  std::unordered_map< QgsFeatureId, int> idToCluster;
167  idToCluster.reserve( index.size() );
168  const long featureCount = source->featureCount();
169  QgsFeatureIterator features = source->getFeatures( QgsFeatureRequest().setNoAttributes() );
170  stdbscan( minSize, eps1, eps2, borderPointsAreNoise, featureCount, features, index, idToCluster, idToDateTime, feedback );
171 
172  // cluster size
173  std::unordered_map< int, int> clusterSize;
174  std::for_each( idToCluster.begin(), idToCluster.end(), [ &clusterSize ]( std::pair< QgsFeatureId, int > idCluster ) { clusterSize[ idCluster.second ]++; } );
175 
176  // write clusters
177  const double writeStep = featureCount > 0 ? 10.0 / featureCount : 1;
178  features = source->getFeatures();
179  int i = 0;
180  QgsFeature feat;
181  while ( features.nextFeature( feat ) )
182  {
183  i++;
184  if ( feedback->isCanceled() )
185  {
186  break;
187  }
188 
189  feedback->setProgress( 90 + i * writeStep );
190  QgsAttributes attr = feat.attributes();
191  const auto cluster = idToCluster.find( feat.id() );
192  if ( cluster != idToCluster.end() )
193  {
194  attr << cluster->second << clusterSize[ cluster->second ];
195  }
196  else
197  {
198  attr << QVariant() << QVariant();
199  }
200  feat.setAttributes( attr );
201  if ( !sink->addFeature( feat, QgsFeatureSink::FastInsert ) )
202  throw QgsProcessingException( writeFeatureError( sink.get(), parameters, QStringLiteral( "OUTPUT" ) ) );
203  }
204 
205  QVariantMap outputs;
206  outputs.insert( QStringLiteral( "OUTPUT" ), dest );
207  outputs.insert( QStringLiteral( "NUM_CLUSTERS" ), static_cast< unsigned int >( clusterSize.size() ) );
208  return outputs;
209 }
210 
211 void QgsDbscanClusteringAlgorithm::stdbscan( const std::size_t minSize,
212  const double eps1,
213  const double eps2,
214  const bool borderPointsAreNoise,
215  const long featureCount,
216  QgsFeatureIterator features,
217  QgsSpatialIndexKDBush &index,
218  std::unordered_map< QgsFeatureId, int> &idToCluster,
219  std::unordered_map< QgsFeatureId, QDateTime> &idToDateTime,
220  QgsProcessingFeedback *feedback )
221 {
222  const double step = featureCount > 0 ? 90.0 / featureCount : 1;
223 
224  std::unordered_set< QgsFeatureId > visited;
225  visited.reserve( index.size() );
226 
227  QgsFeature feat;
228  int i = 0;
229  int clusterCount = 0;
230 
231  while ( features.nextFeature( feat ) )
232  {
233  if ( feedback->isCanceled() )
234  {
235  break;
236  }
237 
238  if ( !feat.hasGeometry() )
239  {
240  feedback->setProgress( ++i * step );
241  continue;
242  }
243 
244  if ( visited.find( feat.id() ) != visited.end() )
245  {
246  // already visited!
247  continue;
248  }
249 
250  QgsPointXY point;
252  point = QgsPointXY( *qgsgeometry_cast< const QgsPoint * >( feat.geometry().constGet() ) );
253  else
254  {
255  // not a point geometry
256  feedback->reportError( QObject::tr( "Feature %1 is a %2 feature, not a point." ).arg( feat.id() ).arg( QgsWkbTypes::displayString( feat.geometry().wkbType() ) ) );
257  feedback->setProgress( ++i * step );
258  continue;
259  }
260 
261  if ( !idToDateTime.empty() && !idToDateTime[ feat.id() ].isValid() )
262  {
263  // missing datetime value
264  feedback->reportError( QObject::tr( "Feature %1 is missing a valid datetime value." ).arg( feat.id() ).arg( QgsWkbTypes::displayString( feat.geometry().wkbType() ) ) );
265  feedback->setProgress( ++i * step );
266  continue;
267  }
268 
269  std::unordered_set< QgsSpatialIndexKDBushData, KDBushDataHashById, KDBushDataEqualById> within;
270 
271  if ( minSize > 1 )
272  {
273  index.within( point, eps1, [&within, pointId = feat.id(), &idToDateTime, &eps2]( const QgsSpatialIndexKDBushData & data )
274  {
275  if ( idToDateTime.empty() || ( idToDateTime[ data.id ].isValid() && std::abs( idToDateTime[ pointId ].secsTo( idToDateTime[ data.id ] ) ) <= eps2 ) )
276  within.insert( data );
277  } );
278  if ( within.size() < minSize )
279  continue;
280 
281  visited.insert( feat.id() );
282  }
283  else
284  {
285  // optimised case for minSize == 1, we can skip the initial check
286  within.insert( QgsSpatialIndexKDBushData( feat.id(), point.x(), point.y() ) );
287  }
288 
289  // start new cluster
290  clusterCount++;
291  idToCluster[ feat.id() ] = clusterCount;
292  feedback->setProgress( ++i * step );
293 
294  while ( !within.empty() )
295  {
296  if ( feedback->isCanceled() )
297  {
298  break;
299  }
300 
301  const QgsSpatialIndexKDBushData j = *within.begin();
302  within.erase( within.begin() );
303 
304  if ( visited.find( j.id ) != visited.end() )
305  {
306  // already visited!
307  continue;
308  }
309 
310  visited.insert( j.id );
311  feedback->setProgress( ++i * step );
312 
313  // check from this point
314  const QgsPointXY point2 = j.point();
315 
316  std::unordered_set< QgsSpatialIndexKDBushData, KDBushDataHashById, KDBushDataEqualById > within2;
317  index.within( point2, eps1, [&within2, point2Id = j.id, &idToDateTime, &eps2]( const QgsSpatialIndexKDBushData & data )
318  {
319  if ( idToDateTime.empty() || ( idToDateTime[ data.id ].isValid() && std::abs( idToDateTime[ point2Id ].secsTo( idToDateTime[ data.id ] ) ) <= eps2 ) )
320  within2.insert( data );
321  } );
322 
323  if ( within2.size() >= minSize )
324  {
325  // expand neighbourhood
326  std::copy_if( within2.begin(),
327  within2.end(),
328  std::inserter( within, within.end() ),
329  [&visited]( const QgsSpatialIndexKDBushData & needle )
330  {
331  return visited.find( needle.id ) == visited.end();
332  } );
333  }
334  if ( !borderPointsAreNoise || within2.size() >= minSize )
335  {
336  idToCluster[ j.id ] = clusterCount;
337  }
338  }
339  }
340 }
341 
343 
344 
A vector of attributes.
Definition: qgsattributes.h:58
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
This class wraps a request for features to a vector layer (or directly its vector data provider).
QgsFeatureRequest & setSubsetOfAttributes(const QgsAttributeList &attrs)
Set a subset of attributes that will be fetched.
QgsFeatureRequest & setNoAttributes()
Set that no attributes will be fetched.
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
Definition: qgsfeature.h:56
QgsAttributes attributes
Definition: qgsfeature.h:65
void setAttributes(const QgsAttributes &attrs)
Sets the feature's attributes.
Definition: qgsfeature.cpp:153
QgsGeometry geometry
Definition: qgsfeature.h:67
bool hasGeometry() const
Returns true if the feature has an associated geometry.
Definition: qgsfeature.cpp:223
Q_GADGET QgsFeatureId id
Definition: qgsfeature.h:64
bool isCanceled() const SIP_HOLDGIL
Tells whether the operation has been canceled already.
Definition: qgsfeedback.h:54
void setProgress(double progress)
Sets the current progress for the feedback object.
Definition: qgsfeedback.h:63
Encapsulate a field in an attribute table or data source.
Definition: qgsfield.h:51
Container of fields for a vector layer.
Definition: qgsfields.h:45
bool append(const QgsField &field, FieldOrigin origin=OriginProvider, int originIndex=-1)
Appends a field. The field must have unique name, otherwise it is rejected (returns false)
Definition: qgsfields.cpp:59
const QgsAbstractGeometry * constGet() const SIP_HOLDGIL
Returns a non-modifiable (const) reference to the underlying abstract geometry primitive.
QgsWkbTypes::Type wkbType() const SIP_HOLDGIL
Returns type of the geometry as a WKB type (point / linestring / polygon etc.)
A class to represent a 2D point.
Definition: qgspointxy.h:59
double y
Definition: qgspointxy.h:63
Q_GADGET double x
Definition: qgspointxy.h:62
Contains information about the context in which a processing algorithm is executed.
Custom exception class for processing related exceptions.
Definition: qgsexception.h:83
Base class for providing feedback from a processing algorithm.
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
virtual void reportError(const QString &error, bool fatalError=false)
Reports that the algorithm encountered an error while executing.
A numeric output for processing algorithms.
@ FlagAdvanced
Parameter is an advanced parameter which should be hidden from users by default.
A double numeric parameter for distance values.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A numeric parameter for processing algorithms.
static QgsFields combineFields(const QgsFields &fieldsA, const QgsFields &fieldsB, const QString &fieldsBPrefix=QString())
Combines two field lists, avoiding duplicate field names (in a case-insensitive manner).
@ TypeVectorPoint
Vector point layers.
Definition: qgsprocessing.h:49
A container for data stored inside a QgsSpatialIndexKDBush index.
QgsPointXY point() const
Returns the indexed point.
A very fast static spatial index for 2D points based on a flat KD-tree.
qgssize size() const
Returns the size of the index, i.e.
QList< QgsSpatialIndexKDBushData > within(const QgsPointXY &point, double radius) const
Returns the list of features which are within the given search radius of point.
static QString displayString(Type type) SIP_HOLDGIL
Returns a non-translated display string type for a WKB type, e.g., the geometry name used in WKT geom...
static Type flatType(Type type) SIP_HOLDGIL
Returns the flat type for a WKB type.
Definition: qgswkbtypes.h:732
QList< int > QgsAttributeList
Definition: qgsfield.h:26