19#include <unordered_map>
23const int KMEANS_MAX_ITERATIONS = 1000;
25QString QgsKMeansClusteringAlgorithm::name()
const
27 return QStringLiteral(
"kmeansclustering" );
30QString QgsKMeansClusteringAlgorithm::displayName()
const
32 return QObject::tr(
"K-means clustering" );
35QStringList QgsKMeansClusteringAlgorithm::tags()
const
37 return QObject::tr(
"clustering,clusters,kmeans,points" ).split(
',' );
40QString QgsKMeansClusteringAlgorithm::group()
const
42 return QObject::tr(
"Vector analysis" );
45QString QgsKMeansClusteringAlgorithm::groupId()
const
47 return QStringLiteral(
"vectoranalysis" );
50void QgsKMeansClusteringAlgorithm::initAlgorithm(
const QVariantMap & )
55 auto fieldNameParam = std::make_unique<QgsProcessingParameterString>( QStringLiteral(
"FIELD_NAME" ), QObject::tr(
"Cluster field name" ), QStringLiteral(
"CLUSTER_ID" ) );
57 addParameter( fieldNameParam.release() );
58 auto sizeFieldNameParam = std::make_unique<QgsProcessingParameterString>( QStringLiteral(
"SIZE_FIELD_NAME" ), QObject::tr(
"Cluster size field name" ), QStringLiteral(
"CLUSTER_SIZE" ) );
60 addParameter( sizeFieldNameParam.release() );
65QString QgsKMeansClusteringAlgorithm::shortHelpString()
const
67 return QObject::tr(
"Calculates the 2D distance based k-means cluster number for each input feature.\n\n"
68 "If input geometries are lines or polygons, the clustering is based on the centroid of the feature." );
71QgsKMeansClusteringAlgorithm *QgsKMeansClusteringAlgorithm::createInstance()
const
73 return new QgsKMeansClusteringAlgorithm();
78 std::unique_ptr<QgsProcessingFeatureSource> source( parameterAsSource( parameters, QStringLiteral(
"INPUT" ), context ) );
82 int k = parameterAsInt( parameters, QStringLiteral(
"CLUSTERS" ), context );
84 QgsFields outputFields = source->fields();
86 const QString clusterFieldName = parameterAsString( parameters, QStringLiteral(
"FIELD_NAME" ), context );
87 newFields.
append(
QgsField( clusterFieldName, QMetaType::Type::Int ) );
88 const QString clusterSizeFieldName = parameterAsString( parameters, QStringLiteral(
"SIZE_FIELD_NAME" ), context );
89 newFields.
append(
QgsField( clusterSizeFieldName, QMetaType::Type::Int ) );
93 std::unique_ptr<QgsFeatureSink> sink( parameterAsSink( parameters, QStringLiteral(
"OUTPUT" ), context, dest, outputFields, source->wkbType(), source->sourceCrs() ) );
98 feedback->
pushInfo( QObject::tr(
"Collecting input points" ) );
99 const double step = source->featureCount() > 0 ? 50.0 / source->featureCount() : 1;
102 int featureWithGeometryCount = 0;
105 std::vector<Feature> clusterFeatures;
107 QHash<QgsFeatureId, int> idToObj;
119 featureWithGeometryCount++;
127 if ( centroid.isNull() )
130 point =
QgsPointXY( *qgsgeometry_cast<const QgsPoint *>( centroid.constGet() ) );
135 idToObj[feat.
id()] = clusterFeatures.size();
136 clusterFeatures.emplace_back( Feature( point ) );
141 feedback->
reportError( QObject::tr(
"Number of geometries is less than the number of clusters requested, not all clusters will get data" ) );
147 feedback->
pushInfo( QObject::tr(
"Calculating clusters" ) );
150 std::vector<QgsPointXY> centers( k );
152 initClusters( clusterFeatures, centers, k, feedback );
153 calculateKMeans( clusterFeatures, centers, k, feedback );
157 std::unordered_map<int, int> clusterSize;
158 for (
const int obj : idToObj )
159 clusterSize[clusterFeatures[obj].cluster]++;
161 features = source->getFeatures();
173 const auto obj = idToObj.find( feat.
id() );
176 attr << QVariant() << QVariant();
180 attr << 0 << featureWithGeometryCount;
184 const int cluster = clusterFeatures[*obj].cluster;
185 attr << cluster << clusterSize[cluster];
195 outputs.insert( QStringLiteral(
"OUTPUT" ), dest );
201void QgsKMeansClusteringAlgorithm::initClusters( std::vector<Feature> &points, std::vector<QgsPointXY> ¢ers,
const int k,
QgsProcessingFeedback *feedback )
203 const std::size_t n = points.size();
209 for (
int i = 0; i < k; i++ )
210 centers[i] = points[0].point;
214 long duplicateCount = 1;
218 double distanceP1 = 0;
219 double distanceP2 = 0;
220 double maxDistance = -1;
221 for ( std::size_t i = 1; i < n; i++ )
223 distanceP1 = points[i].point.sqrDist( points[p1].point );
224 distanceP2 = points[i].point.sqrDist( points[p2].point );
227 if ( ( distanceP1 > maxDistance ) || ( distanceP2 > maxDistance ) )
229 maxDistance = std::max( distanceP1, distanceP2 );
230 if ( distanceP1 > distanceP2 )
241 if ( feedback && duplicateCount > 1 )
243 feedback->
pushInfo( QObject::tr(
"There are at least %n duplicate input(s), the number of output clusters may be less than was requested",
nullptr, duplicateCount ) );
250 centers[0] = points[p1].point;
251 centers[1] = points[p2].point;
256 std::vector<double> distances( n );
259 for ( std::size_t j = 0; j < n; j++ )
261 distances[j] = points[j].point.sqrDist( centers[0] );
267 for (
int i = 2; i < k; i++ )
269 std::size_t candidateCenter = 0;
270 double maxDistance = std::numeric_limits<double>::lowest();
273 for ( std::size_t j = 0; j < n; j++ )
276 if ( distances[j] < 0 )
280 distances[j] = std::min( points[j].point.sqrDist( centers[i - 1] ), distances[j] );
283 if ( distances[j] > maxDistance )
286 maxDistance = distances[j];
291 Q_ASSERT( maxDistance >= 0 );
294 distances[candidateCenter] = -1;
296 centers[i] = points[candidateCenter].point;
303void QgsKMeansClusteringAlgorithm::calculateKMeans( std::vector<QgsKMeansClusteringAlgorithm::Feature> &objs, std::vector<QgsPointXY> ¢ers,
int k,
QgsProcessingFeedback *feedback )
305 int converged =
false;
306 bool changed =
false;
309 std::vector<uint> weights( k );
312 for ( i = 0; i < KMEANS_MAX_ITERATIONS && !converged; i++ )
317 findNearest( objs, centers, k, changed );
318 updateMeans( objs, centers, weights, k );
319 converged = !changed;
322 if ( !converged && feedback )
323 feedback->
reportError( QObject::tr(
"Clustering did not converge after %n iteration(s)",
nullptr, i ) );
325 feedback->
pushInfo( QObject::tr(
"Clustering converged after %n iteration(s)",
nullptr, i ) );
330void QgsKMeansClusteringAlgorithm::findNearest( std::vector<QgsKMeansClusteringAlgorithm::Feature> &points,
const std::vector<QgsPointXY> ¢ers,
const int k,
bool &changed )
333 const std::size_t n = points.size();
334 for ( std::size_t i = 0; i < n; i++ )
336 Feature &point = points[i];
339 double currentDistance = point.point.sqrDist( centers[0] );
340 int currentCluster = 0;
343 for (
int cluster = 1; cluster < k; cluster++ )
345 const double distance = point.point.sqrDist( centers[cluster] );
346 if ( distance < currentDistance )
348 currentDistance = distance;
349 currentCluster = cluster;
354 if ( point.cluster != currentCluster )
357 point.cluster = currentCluster;
364void QgsKMeansClusteringAlgorithm::updateMeans(
const std::vector<Feature> &points, std::vector<QgsPointXY> ¢ers, std::vector<uint> &weights,
const int k )
366 const uint n = points.size();
367 std::fill( weights.begin(), weights.end(), 0 );
368 for (
int i = 0; i < k; i++ )
370 centers[i].setX( 0.0 );
371 centers[i].setY( 0.0 );
373 for ( uint i = 0; i < n; i++ )
375 const int cluster = points[i].cluster;
376 centers[cluster] +=
QgsVector( points[i].point.x(), points[i].point.y() );
377 weights[cluster] += 1;
379 for (
int i = 0; i < k; i++ )
381 centers[i] /= weights[i];
@ VectorAnyGeometry
Any vector layer with geometry.
@ Advanced
Parameter is an advanced parameter which should be hidden from users by default.
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
Fetch next feature and stores in f, returns true on success.
This class wraps a request for features to a vector layer (or directly its vector data provider).
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
void setAttributes(const QgsAttributes &attrs)
Sets the feature's attributes.
bool hasGeometry() const
Returns true if the feature has an associated geometry.
bool isCanceled() const
Tells whether the operation has been canceled already.
void setProgress(double progress)
Sets the current progress for the feedback object.
Encapsulate a field in an attribute table or data source.
Container of fields for a vector layer.
bool append(const QgsField &field, Qgis::FieldOrigin origin=Qgis::FieldOrigin::Provider, int originIndex=-1)
Appends a field.
A geometry is the spatial representation of a feature.
const QgsAbstractGeometry * constGet() const
Returns a non-modifiable (const) reference to the underlying abstract geometry primitive.
QgsGeometry centroid() const
Returns the center of mass of a geometry.
Qgis::WkbType wkbType() const
Returns type of the geometry as a WKB type (point / linestring / polygon etc.)
A class to represent a 2D point.
Contains information about the context in which a processing algorithm is executed.
Custom exception class for processing related exceptions.
Base class for providing feedback from a processing algorithm.
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
virtual void reportError(const QString &error, bool fatalError=false)
Reports that the algorithm encountered an error while executing.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A numeric parameter for processing algorithms.
static QgsFields combineFields(const QgsFields &fieldsA, const QgsFields &fieldsB, const QString &fieldsBPrefix=QString())
Combines two field lists, avoiding duplicate field names (in a case-insensitive manner).
A class to represent a vector.
static Qgis::WkbType flatType(Qgis::WkbType type)
Returns the flat type for a WKB type.
bool qgsDoubleNear(double a, double b, double epsilon=4 *std::numeric_limits< double >::epsilon())
Compare two doubles (but allow some difference)