19 #include <unordered_map>
23 const int KMEANS_MAX_ITERATIONS = 1000;
25 QString QgsKMeansClusteringAlgorithm::name()
const
27 return QStringLiteral(
"kmeansclustering" );
30 QString QgsKMeansClusteringAlgorithm::displayName()
const
32 return QObject::tr(
"K-means clustering" );
35 QStringList QgsKMeansClusteringAlgorithm::tags()
const
37 return QObject::tr(
"clustering,clusters,kmeans,points" ).split(
',' );
40 QString QgsKMeansClusteringAlgorithm::group()
const
42 return QObject::tr(
"Vector analysis" );
45 QString QgsKMeansClusteringAlgorithm::groupId()
const
47 return QStringLiteral(
"vectoranalysis" );
50 void QgsKMeansClusteringAlgorithm::initAlgorithm(
const QVariantMap & )
57 auto fieldNameParam = qgis::make_unique<QgsProcessingParameterString>( QStringLiteral(
"FIELD_NAME" ),
58 QObject::tr(
"Cluster field name" ), QStringLiteral(
"CLUSTER_ID" ) );
60 addParameter( fieldNameParam.release() );
61 auto sizeFieldNameParam = qgis::make_unique<QgsProcessingParameterString>( QStringLiteral(
"SIZE_FIELD_NAME" ),
62 QObject::tr(
"Cluster size field name" ), QStringLiteral(
"CLUSTER_SIZE" ) );
64 addParameter( sizeFieldNameParam.release() );
69 QString QgsKMeansClusteringAlgorithm::shortHelpString()
const
71 return QObject::tr(
"Calculates the 2D distance based k-means cluster number for each input feature.\n\n"
72 "If input geometries are lines or polygons, the clustering is based on the centroid of the feature." );
75 QgsKMeansClusteringAlgorithm *QgsKMeansClusteringAlgorithm::createInstance()
const
77 return new QgsKMeansClusteringAlgorithm();
82 std::unique_ptr< QgsProcessingFeatureSource > source( parameterAsSource( parameters, QStringLiteral(
"INPUT" ), context ) );
86 int k = parameterAsInt( parameters, QStringLiteral(
"CLUSTERS" ), context );
88 QgsFields outputFields = source->fields();
90 const QString clusterFieldName = parameterAsString( parameters, QStringLiteral(
"FIELD_NAME" ), context );
92 const QString clusterSizeFieldName = parameterAsString( parameters, QStringLiteral(
"SIZE_FIELD_NAME" ), context );
93 newFields.
append(
QgsField( clusterSizeFieldName, QVariant::Int ) );
97 std::unique_ptr< QgsFeatureSink > sink( parameterAsSink( parameters, QStringLiteral(
"OUTPUT" ), context, dest, outputFields, source->wkbType(), source->sourceCrs() ) );
102 feedback->
pushInfo( QObject::tr(
"Collecting input points" ) );
103 double step = source->featureCount() > 0 ? 50.0 / source->featureCount() : 1;
106 int featureWithGeometryCount = 0;
109 std::vector< Feature > clusterFeatures;
111 QHash< QgsFeatureId, int > idToObj;
123 featureWithGeometryCount++;
139 idToObj[ feat.
id() ] = clusterFeatures.size();
140 clusterFeatures.emplace_back( Feature( point ) );
145 feedback->
reportError( QObject::tr(
"Number of geometries is less than the number of clusters requested, not all clusters will get data" ) );
151 feedback->
pushInfo( QObject::tr(
"Calculating clusters" ) );
154 std::vector< QgsPointXY > centers( k );
156 initClusters( clusterFeatures, centers, k, feedback );
157 calculateKMeans( clusterFeatures, centers, k, feedback );
161 std::unordered_map< int, int> clusterSize;
162 for (
int obj : idToObj )
163 clusterSize[ clusterFeatures[ obj ].cluster ]++;
165 features = source->getFeatures();
177 auto obj = idToObj.find( feat.
id() );
180 attr << QVariant() << QVariant();
184 attr << 0 << featureWithGeometryCount;
188 int cluster = clusterFeatures[ *obj ].cluster;
189 attr << cluster << clusterSize[ cluster ];
196 outputs.insert( QStringLiteral(
"OUTPUT" ), dest );
202 void QgsKMeansClusteringAlgorithm::initClusters( std::vector<Feature> &points, std::vector<QgsPointXY> ¢ers,
const int k,
QgsProcessingFeedback *feedback )
204 std::size_t n = points.size();
210 for (
int i = 0; i < k; i++ )
211 centers[ i ] = points[ 0 ].point;
215 long duplicateCount = 1;
219 double distanceP1 = 0;
220 double distanceP2 = 0;
221 double maxDistance = -1;
222 for ( std::size_t i = 1; i < n; i++ )
224 distanceP1 = points[i].point.sqrDist( points[p1].point );
225 distanceP2 = points[i].point.sqrDist( points[p2].point );
228 if ( ( distanceP1 > maxDistance ) || ( distanceP2 > maxDistance ) )
230 maxDistance = std::max( distanceP1, distanceP2 );
231 if ( distanceP1 > distanceP2 )
242 if ( feedback && duplicateCount > 1 )
244 feedback->
pushInfo( QObject::tr(
"There are at least %1 duplicate inputs, the number of output clusters may be less than was requested" ).arg( duplicateCount ) );
251 centers[0] = points[p1].point;
252 centers[1] = points[p2].point;
257 std::vector< double > distances( n );
260 for ( std::size_t j = 0; j < n; j++ )
262 distances[j] = points[j].point.sqrDist( centers[0] );
268 for (
int i = 2; i < k; i++ )
270 std::size_t candidateCenter = 0;
271 double maxDistance = std::numeric_limits<double>::lowest();
274 for ( std::size_t j = 0; j < n; j++ )
277 if ( distances[j] < 0 )
281 distances[j] = std::min( points[j].point.sqrDist( centers[i - 1] ), distances[j] );
284 if ( distances[j] > maxDistance )
287 maxDistance = distances[j];
292 Q_ASSERT( maxDistance >= 0 );
295 distances[candidateCenter] = -1;
297 centers[i] = points[candidateCenter].point;
304 void QgsKMeansClusteringAlgorithm::calculateKMeans( std::vector<QgsKMeansClusteringAlgorithm::Feature> &objs, std::vector<QgsPointXY> ¢ers,
int k,
QgsProcessingFeedback *feedback )
306 int converged =
false;
307 bool changed =
false;
310 std::vector< uint > weights( k );
313 for ( i = 0; i < KMEANS_MAX_ITERATIONS && !converged; i++ )
318 findNearest( objs, centers, k, changed );
319 updateMeans( objs, centers, weights, k );
320 converged = !changed;
323 if ( !converged && feedback )
324 feedback->
reportError( QObject::tr(
"Clustering did not converge after %1 iterations" ).arg( i ) );
326 feedback->
pushInfo( QObject::tr(
"Clustering converged after %1 iterations" ).arg( i ) );
331 void QgsKMeansClusteringAlgorithm::findNearest( std::vector<QgsKMeansClusteringAlgorithm::Feature> &points,
const std::vector<QgsPointXY> ¢ers,
const int k,
bool &changed )
334 std::size_t n = points.size();
335 for ( std::size_t i = 0; i < n; i++ )
337 Feature &point = points[i];
340 double currentDistance = point.point.sqrDist( centers[0] );
341 int currentCluster = 0;
344 for (
int cluster = 1; cluster < k; cluster++ )
346 const double distance = point.point.sqrDist( centers[cluster] );
347 if ( distance < currentDistance )
349 currentDistance = distance;
350 currentCluster = cluster;
355 if ( point.cluster != currentCluster )
358 point.cluster = currentCluster;
365 void QgsKMeansClusteringAlgorithm::updateMeans(
const std::vector<Feature> &points, std::vector<QgsPointXY> ¢ers, std::vector<uint> &weights,
const int k )
367 uint n = points.size();
368 std::fill( weights.begin(), weights.end(), 0 );
369 for (
int i = 0; i < k; i++ )
371 centers[i].setX( 0.0 );
372 centers[i].setY( 0.0 );
374 for ( uint i = 0; i < n; i++ )
376 int cluster = points[i].cluster;
377 centers[cluster] +=
QgsVector( points[i].point.x(),
378 points[i].point.y() );
379 weights[cluster] += 1;
381 for (
int i = 0; i < k; i++ )
383 centers[i] /= weights[i];
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
This class wraps a request for features to a vector layer (or directly its vector data provider).
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
The feature class encapsulates a single feature including its id, geometry and a list of field/values...
void setAttributes(const QgsAttributes &attrs)
Sets the feature's attributes.
bool hasGeometry() const
Returns true if the feature has an associated geometry.
bool isCanceled() const
Tells whether the operation has been canceled already.
void setProgress(double progress)
Sets the current progress for the feedback object.
Encapsulate a field in an attribute table or data source.
Container of fields for a vector layer.
bool append(const QgsField &field, FieldOrigin origin=OriginProvider, int originIndex=-1)
Appends a field. The field must have unique name, otherwise it is rejected (returns false)
A geometry is the spatial representation of a feature.
const QgsAbstractGeometry * constGet() const SIP_HOLDGIL
Returns a non-modifiable (const) reference to the underlying abstract geometry primitive.
QgsWkbTypes::Type wkbType() const SIP_HOLDGIL
Returns type of the geometry as a WKB type (point / linestring / polygon etc.)
QgsGeometry centroid() const
Returns the center of mass of a geometry.
A class to represent a 2D point.
Contains information about the context in which a processing algorithm is executed.
Custom exception class for processing related exceptions.
Base class for providing feedback from a processing algorithm.
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
virtual void reportError(const QString &error, bool fatalError=false)
Reports that the algorithm encountered an error while executing.
@ FlagAdvanced
Parameter is an advanced parameter which should be hidden from users by default.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A numeric parameter for processing algorithms.
static QgsFields combineFields(const QgsFields &fieldsA, const QgsFields &fieldsB, const QString &fieldsBPrefix=QString())
Combines two field lists, avoiding duplicate field names (in a case-insensitive manner).
@ TypeVectorAnyGeometry
Any vector layer with geometry.
A class to represent a vector.
static Type flatType(Type type) SIP_HOLDGIL
Returns the flat type for a WKB type.
bool qgsDoubleNear(double a, double b, double epsilon=4 *std::numeric_limits< double >::epsilon())
Compare two doubles (but allow some difference)