19 #include <unordered_map>
23 const int KMEANS_MAX_ITERATIONS = 1000;
25 QString QgsKMeansClusteringAlgorithm::name()
const
27 return QStringLiteral(
"kmeansclustering" );
30 QString QgsKMeansClusteringAlgorithm::displayName()
const
32 return QObject::tr(
"K-means clustering" );
35 QStringList QgsKMeansClusteringAlgorithm::tags()
const
37 return QObject::tr(
"clustering,clusters,kmeans,points" ).split(
',' );
40 QString QgsKMeansClusteringAlgorithm::group()
const
42 return QObject::tr(
"Vector analysis" );
45 QString QgsKMeansClusteringAlgorithm::groupId()
const
47 return QStringLiteral(
"vectoranalysis" );
50 void QgsKMeansClusteringAlgorithm::initAlgorithm(
const QVariantMap & )
57 auto fieldNameParam = std::make_unique<QgsProcessingParameterString>( QStringLiteral(
"FIELD_NAME" ),
58 QObject::tr(
"Cluster field name" ), QStringLiteral(
"CLUSTER_ID" ) );
60 addParameter( fieldNameParam.release() );
61 auto sizeFieldNameParam = std::make_unique<QgsProcessingParameterString>( QStringLiteral(
"SIZE_FIELD_NAME" ),
62 QObject::tr(
"Cluster size field name" ), QStringLiteral(
"CLUSTER_SIZE" ) );
64 addParameter( sizeFieldNameParam.release() );
69 QString QgsKMeansClusteringAlgorithm::shortHelpString()
const
71 return QObject::tr(
"Calculates the 2D distance based k-means cluster number for each input feature.\n\n"
72 "If input geometries are lines or polygons, the clustering is based on the centroid of the feature." );
75 QgsKMeansClusteringAlgorithm *QgsKMeansClusteringAlgorithm::createInstance()
const
77 return new QgsKMeansClusteringAlgorithm();
82 std::unique_ptr< QgsProcessingFeatureSource > source( parameterAsSource( parameters, QStringLiteral(
"INPUT" ), context ) );
86 int k = parameterAsInt( parameters, QStringLiteral(
"CLUSTERS" ), context );
88 QgsFields outputFields = source->fields();
90 const QString clusterFieldName = parameterAsString( parameters, QStringLiteral(
"FIELD_NAME" ), context );
92 const QString clusterSizeFieldName = parameterAsString( parameters, QStringLiteral(
"SIZE_FIELD_NAME" ), context );
93 newFields.
append(
QgsField( clusterSizeFieldName, QVariant::Int ) );
97 std::unique_ptr< QgsFeatureSink > sink( parameterAsSink( parameters, QStringLiteral(
"OUTPUT" ), context, dest, outputFields, source->wkbType(), source->sourceCrs() ) );
102 feedback->
pushInfo( QObject::tr(
"Collecting input points" ) );
103 double step = source->featureCount() > 0 ? 50.0 / source->featureCount() : 1;
106 int featureWithGeometryCount = 0;
109 std::vector< Feature > clusterFeatures;
111 QHash< QgsFeatureId, int > idToObj;
123 featureWithGeometryCount++;
139 idToObj[ feat.
id() ] = clusterFeatures.size();
140 clusterFeatures.emplace_back( Feature( point ) );
145 feedback->
reportError( QObject::tr(
"Number of geometries is less than the number of clusters requested, not all clusters will get data" ) );
151 feedback->
pushInfo( QObject::tr(
"Calculating clusters" ) );
154 std::vector< QgsPointXY > centers( k );
156 initClusters( clusterFeatures, centers, k, feedback );
157 calculateKMeans( clusterFeatures, centers, k, feedback );
161 std::unordered_map< int, int> clusterSize;
162 for (
int obj : idToObj )
163 clusterSize[ clusterFeatures[ obj ].cluster ]++;
165 features = source->getFeatures();
177 auto obj = idToObj.find( feat.
id() );
180 attr << QVariant() << QVariant();
184 attr << 0 << featureWithGeometryCount;
188 int cluster = clusterFeatures[ *obj ].cluster;
189 attr << cluster << clusterSize[ cluster ];
196 outputs.insert( QStringLiteral(
"OUTPUT" ), dest );
202 void QgsKMeansClusteringAlgorithm::initClusters( std::vector<Feature> &points, std::vector<QgsPointXY> ¢ers,
const int k,
QgsProcessingFeedback *feedback )
204 std::size_t n = points.size();
210 for (
int i = 0; i < k; i++ )
211 centers[ i ] = points[ 0 ].point;
215 long duplicateCount = 1;
219 double distanceP1 = 0;
220 double distanceP2 = 0;
221 double maxDistance = -1;
222 for ( std::size_t i = 1; i < n; i++ )
224 distanceP1 = points[i].point.sqrDist( points[p1].point );
225 distanceP2 = points[i].point.sqrDist( points[p2].point );
228 if ( ( distanceP1 > maxDistance ) || ( distanceP2 > maxDistance ) )
230 maxDistance = std::max( distanceP1, distanceP2 );
231 if ( distanceP1 > distanceP2 )
242 if ( feedback && duplicateCount > 1 )
244 feedback->
pushInfo( QObject::tr(
"There are at least %1 duplicate inputs, the number of output clusters may be less than was requested" ).arg( duplicateCount ) );
251 centers[0] = points[p1].point;
252 centers[1] = points[p2].point;
257 std::vector< double > distances( n );
260 for ( std::size_t j = 0; j < n; j++ )
262 distances[j] = points[j].point.sqrDist( centers[0] );
268 for (
int i = 2; i < k; i++ )
270 std::size_t candidateCenter = 0;
271 double maxDistance = std::numeric_limits<double>::lowest();
274 for ( std::size_t j = 0; j < n; j++ )
277 if ( distances[j] < 0 )
281 distances[j] = std::min( points[j].point.sqrDist( centers[i - 1] ), distances[j] );
284 if ( distances[j] > maxDistance )
287 maxDistance = distances[j];
292 Q_ASSERT( maxDistance >= 0 );
295 distances[candidateCenter] = -1;
297 centers[i] = points[candidateCenter].point;
304 void QgsKMeansClusteringAlgorithm::calculateKMeans( std::vector<QgsKMeansClusteringAlgorithm::Feature> &objs, std::vector<QgsPointXY> ¢ers,
int k,
QgsProcessingFeedback *feedback )
306 int converged =
false;
307 bool changed =
false;
310 std::vector< uint > weights( k );
313 for ( i = 0; i < KMEANS_MAX_ITERATIONS && !converged; i++ )
318 findNearest( objs, centers, k, changed );
319 updateMeans( objs, centers, weights, k );
320 converged = !changed;
323 if ( !converged && feedback )
324 feedback->
reportError( QObject::tr(
"Clustering did not converge after %1 iterations" ).arg( i ) );
326 feedback->
pushInfo( QObject::tr(
"Clustering converged after %1 iterations" ).arg( i ) );
331 void QgsKMeansClusteringAlgorithm::findNearest( std::vector<QgsKMeansClusteringAlgorithm::Feature> &points,
const std::vector<QgsPointXY> ¢ers,
const int k,
bool &changed )
334 std::size_t n = points.size();
335 for ( std::size_t i = 0; i < n; i++ )
337 Feature &point = points[i];
340 double currentDistance = point.point.sqrDist( centers[0] );
341 int currentCluster = 0;
344 for (
int cluster = 1; cluster < k; cluster++ )
346 const double distance = point.point.sqrDist( centers[cluster] );
347 if ( distance < currentDistance )
349 currentDistance = distance;
350 currentCluster = cluster;
355 if ( point.cluster != currentCluster )
358 point.cluster = currentCluster;
365 void QgsKMeansClusteringAlgorithm::updateMeans(
const std::vector<Feature> &points, std::vector<QgsPointXY> ¢ers, std::vector<uint> &weights,
const int k )
367 uint n = points.size();
368 std::fill( weights.begin(), weights.end(), 0 );
369 for (
int i = 0; i < k; i++ )
371 centers[i].setX( 0.0 );
372 centers[i].setY( 0.0 );
374 for ( uint i = 0; i < n; i++ )
376 int cluster = points[i].cluster;
377 centers[cluster] +=
QgsVector( points[i].point.x(),
378 points[i].point.y() );
379 weights[cluster] += 1;
381 for (
int i = 0; i < k; i++ )
383 centers[i] /= weights[i];
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
This class wraps a request for features to a vector layer (or directly its vector data provider).
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
void setAttributes(const QgsAttributes &attrs)
Sets the feature's attributes.
bool hasGeometry() const
Returns true if the feature has an associated geometry.
bool isCanceled() const SIP_HOLDGIL
Tells whether the operation has been canceled already.
void setProgress(double progress)
Sets the current progress for the feedback object.
Encapsulate a field in an attribute table or data source.
Container of fields for a vector layer.
bool append(const QgsField &field, FieldOrigin origin=OriginProvider, int originIndex=-1)
Appends a field. The field must have unique name, otherwise it is rejected (returns false)
A geometry is the spatial representation of a feature.
const QgsAbstractGeometry * constGet() const SIP_HOLDGIL
Returns a non-modifiable (const) reference to the underlying abstract geometry primitive.
QgsWkbTypes::Type wkbType() const SIP_HOLDGIL
Returns type of the geometry as a WKB type (point / linestring / polygon etc.)
QgsGeometry centroid() const
Returns the center of mass of a geometry.
A class to represent a 2D point.
Contains information about the context in which a processing algorithm is executed.
Custom exception class for processing related exceptions.
Base class for providing feedback from a processing algorithm.
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
virtual void reportError(const QString &error, bool fatalError=false)
Reports that the algorithm encountered an error while executing.
@ FlagAdvanced
Parameter is an advanced parameter which should be hidden from users by default.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A numeric parameter for processing algorithms.
static QgsFields combineFields(const QgsFields &fieldsA, const QgsFields &fieldsB, const QString &fieldsBPrefix=QString())
Combines two field lists, avoiding duplicate field names (in a case-insensitive manner).
@ TypeVectorAnyGeometry
Any vector layer with geometry.
A class to represent a vector.
static Type flatType(Type type) SIP_HOLDGIL
Returns the flat type for a WKB type.
bool qgsDoubleNear(double a, double b, double epsilon=4 *std::numeric_limits< double >::epsilon())
Compare two doubles (but allow some difference)