22 const int KMEANS_MAX_ITERATIONS = 1000;
24 QString QgsKMeansClusteringAlgorithm::name()
const 26 return QStringLiteral(
"kmeansclustering" );
29 QString QgsKMeansClusteringAlgorithm::displayName()
const 31 return QObject::tr(
"K-means clustering" );
34 QStringList QgsKMeansClusteringAlgorithm::tags()
const 36 return QObject::tr(
"clustering,clusters,kmeans,points" ).split(
',' );
39 QString QgsKMeansClusteringAlgorithm::group()
const 41 return QObject::tr(
"Vector analysis" );
44 QString QgsKMeansClusteringAlgorithm::groupId()
const 46 return QStringLiteral(
"vectoranalysis" );
49 void QgsKMeansClusteringAlgorithm::initAlgorithm(
const QVariantMap & )
56 QObject::tr(
"Cluster field name" ), QStringLiteral(
"CLUSTER_ID" ) ) );
60 QString QgsKMeansClusteringAlgorithm::shortHelpString()
const 62 return QObject::tr(
"Calculates the 2D distance based k-means cluster number for each input feature.\n\n" 63 "If input geometries are lines or polygons, the clustering is based on the centroid of the feature." );
66 QgsKMeansClusteringAlgorithm *QgsKMeansClusteringAlgorithm::createInstance()
const 68 return new QgsKMeansClusteringAlgorithm();
73 std::unique_ptr< QgsProcessingFeatureSource > source( parameterAsSource( parameters, QStringLiteral(
"INPUT" ), context ) );
77 int k = parameterAsInt( parameters, QStringLiteral(
"CLUSTERS" ), context );
79 QgsFields outputFields = source->fields();
80 const QString clusterFieldName = parameterAsString( parameters, QStringLiteral(
"FIELD_NAME" ), context );
86 std::unique_ptr< QgsFeatureSink > sink( parameterAsSink( parameters, QStringLiteral(
"OUTPUT" ), context, dest, outputFields, source->wkbType(), source->sourceCrs() ) );
91 feedback->
pushInfo( QObject::tr(
"Collecting input points" ) );
92 double step = source->featureCount() > 0 ? 50.0 / source->featureCount() : 1;
97 std::vector< Feature > clusterFeatures;
99 QHash< QgsFeatureId, int > idToObj;
126 idToObj.insert( feat.
id(), clusterFeatures.size() );
127 clusterFeatures.emplace_back( Feature( point ) );
132 feedback->
reportError( QObject::tr(
"Number of geometries is less than the number of clusters requested, not all clusters will get data" ) );
138 feedback->
pushInfo( QObject::tr(
"Calculating clusters" ) );
141 std::vector< QgsPointXY > centers( k );
143 initClusters( clusterFeatures, centers, k, feedback );
144 calculateKMeans( clusterFeatures, centers, k, feedback );
147 features = source->getFeatures();
167 else if ( !idToObj.contains( feat.
id() ) )
173 attr << clusterFeatures[ idToObj.value( feat.
id() ) ].cluster;
180 outputs.insert( QStringLiteral(
"OUTPUT" ), dest );
186 void QgsKMeansClusteringAlgorithm::initClusters( std::vector<Feature> &points, std::vector<QgsPointXY> ¢ers,
const int k,
QgsProcessingFeedback *feedback )
188 std::size_t n = points.size();
194 for (
int i = 0; i < k; i++ )
195 centers[ i ] = points[ 0 ].point;
199 long duplicateCount = 1;
203 double distanceP1 = 0;
204 double distanceP2 = 0;
205 double maxDistance = -1;
206 for ( std::size_t i = 1; i < n; i++ )
208 distanceP1 = points[i].point.
sqrDist( points[p1].point );
209 distanceP2 = points[i].point.sqrDist( points[p2].point );
212 if ( ( distanceP1 > maxDistance ) || ( distanceP2 > maxDistance ) )
214 maxDistance = std::max( distanceP1, distanceP2 );
215 if ( distanceP1 > distanceP2 )
226 if ( feedback && duplicateCount > 1 )
228 feedback->
pushInfo( QObject::tr(
"There are at least %1 duplicate inputs, the number of output clusters may be less than was requested" ).arg( duplicateCount ) );
235 centers[0] = points[p1].point;
236 centers[1] = points[p2].point;
241 std::vector< double > distances( n );
244 for ( std::size_t j = 0; j < n; j++ )
246 distances[j] = points[j].point.sqrDist( centers[0] );
252 for (
int i = 2; i < k; i++ )
254 std::size_t candidateCenter = 0;
255 double maxDistance = std::numeric_limits<double>::lowest();
258 for ( std::size_t j = 0; j < n; j++ )
261 if ( distances[j] < 0 )
265 distances[j] = std::min( points[j].point.sqrDist( centers[i - 1] ), distances[j] );
268 if ( distances[j] > maxDistance )
271 maxDistance = distances[j];
276 Q_ASSERT( maxDistance >= 0 );
279 distances[candidateCenter] = -1;
281 centers[i] = points[candidateCenter].point;
288 void QgsKMeansClusteringAlgorithm::calculateKMeans( std::vector<QgsKMeansClusteringAlgorithm::Feature> &objs, std::vector<QgsPointXY> ¢ers,
int k,
QgsProcessingFeedback *feedback )
290 int converged =
false;
291 bool changed =
false;
294 std::vector< uint > weights( k );
297 for ( i = 0; i < KMEANS_MAX_ITERATIONS && !converged; i++ )
302 findNearest( objs, centers, k, changed );
303 updateMeans( objs, centers, weights, k );
304 converged = !changed;
307 if ( !converged && feedback )
308 feedback->
reportError( QObject::tr(
"Clustering did not converge after %1 iterations" ).arg( i ) );
310 feedback->
pushInfo( QObject::tr(
"Clustering converged after %1 iterations" ).arg( i ) );
315 void QgsKMeansClusteringAlgorithm::findNearest( std::vector<QgsKMeansClusteringAlgorithm::Feature> &points,
const std::vector<QgsPointXY> ¢ers,
const int k,
bool &changed )
318 std::size_t n = points.size();
319 for ( std::size_t i = 0; i < n; i++ )
321 Feature &point = points[i];
324 double currentDistance = point.point.sqrDist( centers[0] );
325 int currentCluster = 0;
328 for (
int cluster = 1; cluster < k; cluster++ )
330 const double distance = point.point.sqrDist( centers[cluster] );
331 if ( distance < currentDistance )
333 currentDistance = distance;
334 currentCluster = cluster;
339 if ( point.cluster != currentCluster )
342 point.cluster = currentCluster;
349 void QgsKMeansClusteringAlgorithm::updateMeans(
const std::vector<Feature> &points, std::vector<QgsPointXY> ¢ers, std::vector<uint> &weights,
const int k )
351 uint n = points.size();
352 std::fill( weights.begin(), weights.end(), 0 );
353 for (
int i = 0; i < k; i++ )
355 centers[i].setX( 0.0 );
356 centers[i].setY( 0.0 );
358 for ( uint i = 0; i < n; i++ )
360 int cluster = points[i].cluster;
361 centers[cluster] +=
QgsVector( points[i].point.x(),
362 points[i].point.y() );
363 weights[cluster] += 1;
365 for (
int i = 0; i < k; i++ )
367 centers[i] /= weights[i];
Wrapper for iterator of features from vector data provider or vector layer.
bool isCanceled() const
Tells whether the operation has been canceled already.
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
Base class for providing feedback from a processing algorithm.
bool isNull() const
Returns true if the geometry is null (ie, contains no underlying geometry accessible via geometry() )...
A class to represent a 2D point.
bool qgsDoubleNear(double a, double b, double epsilon=4 *std::numeric_limits< double >::epsilon())
Compare two doubles (but allow some difference)
void setProgress(double progress)
Sets the current progress for the feedback object.
double sqrDist(double x, double y) const
Returns the squared distance between this point a specified x, y coordinate.
Container of fields for a vector layer.
A geometry is the spatial representation of a feature.
void setAttributes(const QgsAttributes &attrs)
Sets the feature's attributes.
The feature class encapsulates a single feature including its id, geometry and a list of field/values...
A feature sink output for processing algorithms.
const QgsAbstractGeometry * constGet() const
Returns a non-modifiable (const) reference to the underlying abstract geometry primitive.
static QgsFields combineFields(const QgsFields &fieldsA, const QgsFields &fieldsB)
Combines two field lists, avoiding duplicate field names (in a case-insensitive manner).
This class wraps a request for features to a vector layer (or directly its vector data provider)...
Custom exception class for processing related exceptions.
bool append(const QgsField &field, FieldOrigin origin=OriginProvider, int originIndex=-1)
Appends a field. The field must have unique name, otherwise it is rejected (returns false) ...
Encapsulate a field in an attribute table or data source.
A numeric parameter for processing algorithms.
A class to represent a vector.
An input feature source (such as vector layers) parameter for processing algorithms.
bool hasGeometry() const
Returns true if the feature has an associated geometry.
bool nextFeature(QgsFeature &f)
static Type flatType(Type type)
Returns the flat type for a WKB type.
Contains information about the context in which a processing algorithm is executed.
QgsWkbTypes::Type wkbType() const
Returns type of the geometry as a WKB type (point / linestring / polygon etc.)
virtual void reportError(const QString &error, bool fatalError=false)
Reports that the algorithm encountered an error while executing.
A string parameter for processing algorithms.
Any vector layer with geometry.
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
QgsGeometry centroid() const
Returns the center of mass of a geometry.