19#include <unordered_map>
23const int KMEANS_MAX_ITERATIONS = 1000;
25QString QgsKMeansClusteringAlgorithm::name()
const
27 return QStringLiteral(
"kmeansclustering" );
30QString QgsKMeansClusteringAlgorithm::displayName()
const
32 return QObject::tr(
"K-means clustering" );
35QStringList QgsKMeansClusteringAlgorithm::tags()
const
37 return QObject::tr(
"clustering,clusters,kmeans,points" ).split(
',' );
40QString QgsKMeansClusteringAlgorithm::group()
const
42 return QObject::tr(
"Vector analysis" );
45QString QgsKMeansClusteringAlgorithm::groupId()
const
47 return QStringLiteral(
"vectoranalysis" );
50void QgsKMeansClusteringAlgorithm::initAlgorithm(
const QVariantMap & )
57 auto fieldNameParam = std::make_unique<QgsProcessingParameterString>( QStringLiteral(
"FIELD_NAME" ),
58 QObject::tr(
"Cluster field name" ), QStringLiteral(
"CLUSTER_ID" ) );
60 addParameter( fieldNameParam.release() );
61 auto sizeFieldNameParam = std::make_unique<QgsProcessingParameterString>( QStringLiteral(
"SIZE_FIELD_NAME" ),
62 QObject::tr(
"Cluster size field name" ), QStringLiteral(
"CLUSTER_SIZE" ) );
64 addParameter( sizeFieldNameParam.release() );
69QString QgsKMeansClusteringAlgorithm::shortHelpString()
const
71 return QObject::tr(
"Calculates the 2D distance based k-means cluster number for each input feature.\n\n"
72 "If input geometries are lines or polygons, the clustering is based on the centroid of the feature." );
75QgsKMeansClusteringAlgorithm *QgsKMeansClusteringAlgorithm::createInstance()
const
77 return new QgsKMeansClusteringAlgorithm();
82 std::unique_ptr< QgsProcessingFeatureSource > source( parameterAsSource( parameters, QStringLiteral(
"INPUT" ), context ) );
86 int k = parameterAsInt( parameters, QStringLiteral(
"CLUSTERS" ), context );
88 QgsFields outputFields = source->fields();
90 const QString clusterFieldName = parameterAsString( parameters, QStringLiteral(
"FIELD_NAME" ), context );
91 newFields.
append(
QgsField( clusterFieldName, QMetaType::Type::Int ) );
92 const QString clusterSizeFieldName = parameterAsString( parameters, QStringLiteral(
"SIZE_FIELD_NAME" ), context );
93 newFields.
append(
QgsField( clusterSizeFieldName, QMetaType::Type::Int ) );
97 std::unique_ptr< QgsFeatureSink > sink( parameterAsSink( parameters, QStringLiteral(
"OUTPUT" ), context, dest, outputFields, source->wkbType(), source->sourceCrs() ) );
102 feedback->
pushInfo( QObject::tr(
"Collecting input points" ) );
103 const double step = source->featureCount() > 0 ? 50.0 / source->featureCount() : 1;
106 int featureWithGeometryCount = 0;
109 std::vector< Feature > clusterFeatures;
111 QHash< QgsFeatureId, int > idToObj;
123 featureWithGeometryCount++;
131 if ( centroid.isNull() )
134 point =
QgsPointXY( *qgsgeometry_cast< const QgsPoint * >( centroid.constGet() ) );
139 idToObj[ feat.
id() ] = clusterFeatures.size();
140 clusterFeatures.emplace_back( Feature( point ) );
145 feedback->
reportError( QObject::tr(
"Number of geometries is less than the number of clusters requested, not all clusters will get data" ) );
151 feedback->
pushInfo( QObject::tr(
"Calculating clusters" ) );
154 std::vector< QgsPointXY > centers( k );
156 initClusters( clusterFeatures, centers, k, feedback );
157 calculateKMeans( clusterFeatures, centers, k, feedback );
161 std::unordered_map< int, int> clusterSize;
162 for (
const int obj : idToObj )
163 clusterSize[ clusterFeatures[ obj ].cluster ]++;
165 features = source->getFeatures();
177 const auto obj = idToObj.find( feat.
id() );
180 attr << QVariant() << QVariant();
184 attr << 0 << featureWithGeometryCount;
188 const int cluster = clusterFeatures[ *obj ].cluster;
189 attr << cluster << clusterSize[ cluster ];
197 outputs.insert( QStringLiteral(
"OUTPUT" ), dest );
203void QgsKMeansClusteringAlgorithm::initClusters( std::vector<Feature> &points, std::vector<QgsPointXY> ¢ers,
const int k,
QgsProcessingFeedback *feedback )
205 const std::size_t n = points.size();
211 for (
int i = 0; i < k; i++ )
212 centers[ i ] = points[ 0 ].point;
216 long duplicateCount = 1;
220 double distanceP1 = 0;
221 double distanceP2 = 0;
222 double maxDistance = -1;
223 for ( std::size_t i = 1; i < n; i++ )
225 distanceP1 = points[i].point.sqrDist( points[p1].point );
226 distanceP2 = points[i].point.sqrDist( points[p2].point );
229 if ( ( distanceP1 > maxDistance ) || ( distanceP2 > maxDistance ) )
231 maxDistance = std::max( distanceP1, distanceP2 );
232 if ( distanceP1 > distanceP2 )
243 if ( feedback && duplicateCount > 1 )
245 feedback->
pushInfo( QObject::tr(
"There are at least %n duplicate input(s), the number of output clusters may be less than was requested",
nullptr, duplicateCount ) );
252 centers[0] = points[p1].point;
253 centers[1] = points[p2].point;
258 std::vector< double > distances( n );
261 for ( std::size_t j = 0; j < n; j++ )
263 distances[j] = points[j].point.sqrDist( centers[0] );
269 for (
int i = 2; i < k; i++ )
271 std::size_t candidateCenter = 0;
272 double maxDistance = std::numeric_limits<double>::lowest();
275 for ( std::size_t j = 0; j < n; j++ )
278 if ( distances[j] < 0 )
282 distances[j] = std::min( points[j].point.sqrDist( centers[i - 1] ), distances[j] );
285 if ( distances[j] > maxDistance )
288 maxDistance = distances[j];
293 Q_ASSERT( maxDistance >= 0 );
296 distances[candidateCenter] = -1;
298 centers[i] = points[candidateCenter].point;
305void QgsKMeansClusteringAlgorithm::calculateKMeans( std::vector<QgsKMeansClusteringAlgorithm::Feature> &objs, std::vector<QgsPointXY> ¢ers,
int k,
QgsProcessingFeedback *feedback )
307 int converged =
false;
308 bool changed =
false;
311 std::vector< uint > weights( k );
314 for ( i = 0; i < KMEANS_MAX_ITERATIONS && !converged; i++ )
319 findNearest( objs, centers, k, changed );
320 updateMeans( objs, centers, weights, k );
321 converged = !changed;
324 if ( !converged && feedback )
325 feedback->
reportError( QObject::tr(
"Clustering did not converge after %n iteration(s)",
nullptr, i ) );
327 feedback->
pushInfo( QObject::tr(
"Clustering converged after %n iteration(s)",
nullptr, i ) );
332void QgsKMeansClusteringAlgorithm::findNearest( std::vector<QgsKMeansClusteringAlgorithm::Feature> &points,
const std::vector<QgsPointXY> ¢ers,
const int k,
bool &changed )
335 const std::size_t n = points.size();
336 for ( std::size_t i = 0; i < n; i++ )
338 Feature &point = points[i];
341 double currentDistance = point.point.sqrDist( centers[0] );
342 int currentCluster = 0;
345 for (
int cluster = 1; cluster < k; cluster++ )
347 const double distance = point.point.sqrDist( centers[cluster] );
348 if ( distance < currentDistance )
350 currentDistance = distance;
351 currentCluster = cluster;
356 if ( point.cluster != currentCluster )
359 point.cluster = currentCluster;
366void QgsKMeansClusteringAlgorithm::updateMeans(
const std::vector<Feature> &points, std::vector<QgsPointXY> ¢ers, std::vector<uint> &weights,
const int k )
368 const uint n = points.size();
369 std::fill( weights.begin(), weights.end(), 0 );
370 for (
int i = 0; i < k; i++ )
372 centers[i].setX( 0.0 );
373 centers[i].setY( 0.0 );
375 for ( uint i = 0; i < n; i++ )
377 const int cluster = points[i].cluster;
378 centers[cluster] +=
QgsVector( points[i].point.x(),
379 points[i].point.y() );
380 weights[cluster] += 1;
382 for (
int i = 0; i < k; i++ )
384 centers[i] /= weights[i];
@ VectorAnyGeometry
Any vector layer with geometry.
@ Advanced
Parameter is an advanced parameter which should be hidden from users by default.
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
Fetch next feature and stores in f, returns true on success.
This class wraps a request for features to a vector layer (or directly its vector data provider).
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
void setAttributes(const QgsAttributes &attrs)
Sets the feature's attributes.
bool hasGeometry() const
Returns true if the feature has an associated geometry.
bool isCanceled() const
Tells whether the operation has been canceled already.
void setProgress(double progress)
Sets the current progress for the feedback object.
Encapsulate a field in an attribute table or data source.
Container of fields for a vector layer.
bool append(const QgsField &field, Qgis::FieldOrigin origin=Qgis::FieldOrigin::Provider, int originIndex=-1)
Appends a field.
A geometry is the spatial representation of a feature.
const QgsAbstractGeometry * constGet() const
Returns a non-modifiable (const) reference to the underlying abstract geometry primitive.
QgsGeometry centroid() const
Returns the center of mass of a geometry.
Qgis::WkbType wkbType() const
Returns type of the geometry as a WKB type (point / linestring / polygon etc.)
A class to represent a 2D point.
Contains information about the context in which a processing algorithm is executed.
Custom exception class for processing related exceptions.
Base class for providing feedback from a processing algorithm.
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
virtual void reportError(const QString &error, bool fatalError=false)
Reports that the algorithm encountered an error while executing.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A numeric parameter for processing algorithms.
static QgsFields combineFields(const QgsFields &fieldsA, const QgsFields &fieldsB, const QString &fieldsBPrefix=QString())
Combines two field lists, avoiding duplicate field names (in a case-insensitive manner).
A class to represent a vector.
static Qgis::WkbType flatType(Qgis::WkbType type)
Returns the flat type for a WKB type.
bool qgsDoubleNear(double a, double b, double epsilon=4 *std::numeric_limits< double >::epsilon())
Compare two doubles (but allow some difference)