19#include <unordered_map>
23const int KMEANS_MAX_ITERATIONS = 1000;
25QString QgsKMeansClusteringAlgorithm::name()
const
27 return QStringLiteral(
"kmeansclustering" );
30QString QgsKMeansClusteringAlgorithm::displayName()
const
32 return QObject::tr(
"K-means clustering" );
35QStringList QgsKMeansClusteringAlgorithm::tags()
const
37 return QObject::tr(
"clustering,clusters,kmeans,points" ).split(
',' );
40QString QgsKMeansClusteringAlgorithm::group()
const
42 return QObject::tr(
"Vector analysis" );
45QString QgsKMeansClusteringAlgorithm::groupId()
const
47 return QStringLiteral(
"vectoranalysis" );
50void QgsKMeansClusteringAlgorithm::initAlgorithm(
const QVariantMap & )
55 auto fieldNameParam = std::make_unique<QgsProcessingParameterString>( QStringLiteral(
"FIELD_NAME" ), QObject::tr(
"Cluster field name" ), QStringLiteral(
"CLUSTER_ID" ) );
57 addParameter( fieldNameParam.release() );
58 auto sizeFieldNameParam = std::make_unique<QgsProcessingParameterString>( QStringLiteral(
"SIZE_FIELD_NAME" ), QObject::tr(
"Cluster size field name" ), QStringLiteral(
"CLUSTER_SIZE" ) );
60 addParameter( sizeFieldNameParam.release() );
65QString QgsKMeansClusteringAlgorithm::shortHelpString()
const
67 return QObject::tr(
"This algorithm calculates the 2D distance based k-means cluster number for each input feature.\n\n"
68 "If input geometries are lines or polygons, the clustering is based on the centroid of the feature." );
71QString QgsKMeansClusteringAlgorithm::shortDescription()
const
73 return QObject::tr(
"Calculates the 2D distance based k-means cluster number for each input feature." );
76QgsKMeansClusteringAlgorithm *QgsKMeansClusteringAlgorithm::createInstance()
const
78 return new QgsKMeansClusteringAlgorithm();
83 std::unique_ptr<QgsProcessingFeatureSource> source( parameterAsSource( parameters, QStringLiteral(
"INPUT" ), context ) );
87 int k = parameterAsInt( parameters, QStringLiteral(
"CLUSTERS" ), context );
89 QgsFields outputFields = source->fields();
91 const QString clusterFieldName = parameterAsString( parameters, QStringLiteral(
"FIELD_NAME" ), context );
92 newFields.
append(
QgsField( clusterFieldName, QMetaType::Type::Int ) );
93 const QString clusterSizeFieldName = parameterAsString( parameters, QStringLiteral(
"SIZE_FIELD_NAME" ), context );
94 newFields.
append(
QgsField( clusterSizeFieldName, QMetaType::Type::Int ) );
98 std::unique_ptr<QgsFeatureSink> sink( parameterAsSink( parameters, QStringLiteral(
"OUTPUT" ), context, dest, outputFields, source->wkbType(), source->sourceCrs() ) );
103 feedback->
pushInfo( QObject::tr(
"Collecting input points" ) );
104 const double step = source->featureCount() > 0 ? 50.0 / source->featureCount() : 1;
107 int featureWithGeometryCount = 0;
110 std::vector<Feature> clusterFeatures;
112 QHash<QgsFeatureId, int> idToObj;
124 featureWithGeometryCount++;
140 idToObj[feat.
id()] = clusterFeatures.size();
141 clusterFeatures.emplace_back( Feature( point ) );
146 feedback->
reportError( QObject::tr(
"Number of geometries is less than the number of clusters requested, not all clusters will get data" ) );
152 feedback->
pushInfo( QObject::tr(
"Calculating clusters" ) );
155 std::vector<QgsPointXY> centers( k );
157 initClusters( clusterFeatures, centers, k, feedback );
158 calculateKMeans( clusterFeatures, centers, k, feedback );
162 std::unordered_map<int, int> clusterSize;
163 for (
const int obj : idToObj )
164 clusterSize[clusterFeatures[obj].cluster]++;
166 features = source->getFeatures();
178 const auto obj = idToObj.find( feat.
id() );
181 attr << QVariant() << QVariant();
185 attr << 0 << featureWithGeometryCount;
189 const int cluster = clusterFeatures[*obj].cluster;
190 attr << cluster << clusterSize[cluster];
200 outputs.insert( QStringLiteral(
"OUTPUT" ), dest );
206void QgsKMeansClusteringAlgorithm::initClusters( std::vector<Feature> &points, std::vector<QgsPointXY> ¢ers,
const int k,
QgsProcessingFeedback *feedback )
208 const std::size_t n = points.size();
214 for (
int i = 0; i < k; i++ )
215 centers[i] = points[0].point;
219 long duplicateCount = 1;
223 double distanceP1 = 0;
224 double distanceP2 = 0;
225 double maxDistance = -1;
226 for ( std::size_t i = 1; i < n; i++ )
228 distanceP1 = points[i].point.sqrDist( points[p1].point );
229 distanceP2 = points[i].point.sqrDist( points[p2].point );
232 if ( ( distanceP1 > maxDistance ) || ( distanceP2 > maxDistance ) )
234 maxDistance = std::max( distanceP1, distanceP2 );
235 if ( distanceP1 > distanceP2 )
246 if ( feedback && duplicateCount > 1 )
248 feedback->
pushInfo( QObject::tr(
"There are at least %n duplicate input(s), the number of output clusters may be less than was requested",
nullptr, duplicateCount ) );
255 centers[0] = points[p1].point;
256 centers[1] = points[p2].point;
261 std::vector<double> distances( n );
264 for ( std::size_t j = 0; j < n; j++ )
266 distances[j] = points[j].point.sqrDist( centers[0] );
272 for (
int i = 2; i < k; i++ )
274 std::size_t candidateCenter = 0;
275 double maxDistance = std::numeric_limits<double>::lowest();
278 for ( std::size_t j = 0; j < n; j++ )
281 if ( distances[j] < 0 )
285 distances[j] = std::min( points[j].point.sqrDist( centers[i - 1] ), distances[j] );
288 if ( distances[j] > maxDistance )
291 maxDistance = distances[j];
296 Q_ASSERT( maxDistance >= 0 );
299 distances[candidateCenter] = -1;
301 centers[i] = points[candidateCenter].point;
308void QgsKMeansClusteringAlgorithm::calculateKMeans( std::vector<QgsKMeansClusteringAlgorithm::Feature> &objs, std::vector<QgsPointXY> ¢ers,
int k,
QgsProcessingFeedback *feedback )
310 int converged =
false;
311 bool changed =
false;
314 std::vector<uint> weights( k );
317 for ( i = 0; i < KMEANS_MAX_ITERATIONS && !converged; i++ )
322 findNearest( objs, centers, k, changed );
323 updateMeans( objs, centers, weights, k );
324 converged = !changed;
327 if ( !converged && feedback )
328 feedback->
reportError( QObject::tr(
"Clustering did not converge after %n iteration(s)",
nullptr, i ) );
330 feedback->
pushInfo( QObject::tr(
"Clustering converged after %n iteration(s)",
nullptr, i ) );
335void QgsKMeansClusteringAlgorithm::findNearest( std::vector<QgsKMeansClusteringAlgorithm::Feature> &points,
const std::vector<QgsPointXY> ¢ers,
const int k,
bool &changed )
338 const std::size_t n = points.size();
339 for ( std::size_t i = 0; i < n; i++ )
341 Feature &point = points[i];
344 double currentDistance = point.point.sqrDist( centers[0] );
345 int currentCluster = 0;
348 for (
int cluster = 1; cluster < k; cluster++ )
350 const double distance = point.point.sqrDist( centers[cluster] );
351 if ( distance < currentDistance )
353 currentDistance = distance;
354 currentCluster = cluster;
359 if ( point.cluster != currentCluster )
362 point.cluster = currentCluster;
369void QgsKMeansClusteringAlgorithm::updateMeans(
const std::vector<Feature> &points, std::vector<QgsPointXY> ¢ers, std::vector<uint> &weights,
const int k )
371 const uint n = points.size();
372 std::fill( weights.begin(), weights.end(), 0 );
373 for (
int i = 0; i < k; i++ )
375 centers[i].setX( 0.0 );
376 centers[i].setY( 0.0 );
378 for ( uint i = 0; i < n; i++ )
380 const int cluster = points[i].cluster;
381 centers[cluster] +=
QgsVector( points[i].point.x(), points[i].point.y() );
382 weights[cluster] += 1;
384 for (
int i = 0; i < k; i++ )
386 centers[i] /= weights[i];
@ VectorAnyGeometry
Any vector layer with geometry.
@ Advanced
Parameter is an advanced parameter which should be hidden from users by default.
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
Fetch next feature and stores in f, returns true on success.
Wraps a request for features to a vector layer (or directly its vector data provider).
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
void setAttributes(const QgsAttributes &attrs)
Sets the feature's attributes.
bool hasGeometry() const
Returns true if the feature has an associated geometry.
bool isCanceled() const
Tells whether the operation has been canceled already.
void setProgress(double progress)
Sets the current progress for the feedback object.
Encapsulate a field in an attribute table or data source.
Container of fields for a vector layer.
bool append(const QgsField &field, Qgis::FieldOrigin origin=Qgis::FieldOrigin::Provider, int originIndex=-1)
Appends a field.
A geometry is the spatial representation of a feature.
const QgsAbstractGeometry * constGet() const
Returns a non-modifiable (const) reference to the underlying abstract geometry primitive.
QgsGeometry centroid() const
Returns the center of mass of a geometry.
Qgis::WkbType wkbType() const
Returns type of the geometry as a WKB type (point / linestring / polygon etc.)
Contains information about the context in which a processing algorithm is executed.
Custom exception class for processing related exceptions.
Base class for providing feedback from a processing algorithm.
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
virtual void reportError(const QString &error, bool fatalError=false)
Reports that the algorithm encountered an error while executing.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A numeric parameter for processing algorithms.
static QgsFields combineFields(const QgsFields &fieldsA, const QgsFields &fieldsB, const QString &fieldsBPrefix=QString())
Combines two field lists, avoiding duplicate field names (in a case-insensitive manner).
Represent a 2-dimensional vector.
static Qgis::WkbType flatType(Qgis::WkbType type)
Returns the flat type for a WKB type.
bool qgsDoubleNear(double a, double b, double epsilon=4 *std::numeric_limits< double >::epsilon())
Compare two doubles (but allow some difference)