22 const int KMEANS_MAX_ITERATIONS = 1000;
24 QString QgsKMeansClusteringAlgorithm::name()
const
26 return QStringLiteral(
"kmeansclustering" );
29 QString QgsKMeansClusteringAlgorithm::displayName()
const
31 return QObject::tr(
"K-means clustering" );
34 QStringList QgsKMeansClusteringAlgorithm::tags()
const
36 return QObject::tr(
"clustering,clusters,kmeans,points" ).split(
',' );
39 QString QgsKMeansClusteringAlgorithm::group()
const
41 return QObject::tr(
"Vector analysis" );
44 QString QgsKMeansClusteringAlgorithm::groupId()
const
46 return QStringLiteral(
"vectoranalysis" );
49 void QgsKMeansClusteringAlgorithm::initAlgorithm(
const QVariantMap & )
56 QObject::tr(
"Cluster field name" ), QStringLiteral(
"CLUSTER_ID" ) ) );
60 QString QgsKMeansClusteringAlgorithm::shortHelpString()
const
62 return QObject::tr(
"Calculates the 2D distance based k-means cluster number for each input feature.\n\n"
63 "If input geometries are lines or polygons, the clustering is based on the centroid of the feature." );
66 QgsKMeansClusteringAlgorithm *QgsKMeansClusteringAlgorithm::createInstance()
const
68 return new QgsKMeansClusteringAlgorithm();
73 std::unique_ptr< QgsProcessingFeatureSource > source( parameterAsSource( parameters, QStringLiteral(
"INPUT" ), context ) );
77 int k = parameterAsInt( parameters, QStringLiteral(
"CLUSTERS" ), context );
79 QgsFields outputFields = source->fields();
80 const QString clusterFieldName = parameterAsString( parameters, QStringLiteral(
"FIELD_NAME" ), context );
86 std::unique_ptr< QgsFeatureSink > sink( parameterAsSink( parameters, QStringLiteral(
"OUTPUT" ), context, dest, outputFields, source->wkbType(), source->sourceCrs() ) );
91 feedback->
pushInfo( QObject::tr(
"Collecting input points" ) );
92 double step = source->featureCount() > 0 ? 50.0 / source->featureCount() : 1;
97 std::vector< Feature > clusterFeatures;
99 QHash< QgsFeatureId, int > idToObj;
126 idToObj.insert( feat.
id(), clusterFeatures.size() );
127 clusterFeatures.emplace_back( Feature( point ) );
132 feedback->
reportError( QObject::tr(
"Number of geometries is less than the number of clusters requested, not all clusters will get data" ) );
138 feedback->
pushInfo( QObject::tr(
"Calculating clusters" ) );
141 std::vector< QgsPointXY > centers( k );
143 initClusters( clusterFeatures, centers, k, feedback );
144 calculateKMeans( clusterFeatures, centers, k, feedback );
147 features = source->getFeatures();
167 else if ( !idToObj.contains( feat.
id() ) )
173 attr << clusterFeatures[ idToObj.value( feat.
id() ) ].cluster;
180 outputs.insert( QStringLiteral(
"OUTPUT" ), dest );
186 void QgsKMeansClusteringAlgorithm::initClusters( std::vector<Feature> &points, std::vector<QgsPointXY> ¢ers,
const int k,
QgsProcessingFeedback *feedback )
188 std::size_t n = points.size();
194 for (
int i = 0; i < k; i++ )
195 centers[ i ] = points[ 0 ].point;
199 long duplicateCount = 1;
203 double distanceP1 = 0;
204 double distanceP2 = 0;
205 double maxDistance = -1;
206 for ( std::size_t i = 1; i < n; i++ )
208 distanceP1 = points[i].point.sqrDist( points[p1].point );
209 distanceP2 = points[i].point.sqrDist( points[p2].point );
212 if ( ( distanceP1 > maxDistance ) || ( distanceP2 > maxDistance ) )
214 maxDistance = std::max( distanceP1, distanceP2 );
215 if ( distanceP1 > distanceP2 )
226 if ( feedback && duplicateCount > 1 )
228 feedback->
pushInfo( QObject::tr(
"There are at least %1 duplicate inputs, the number of output clusters may be less than was requested" ).arg( duplicateCount ) );
235 centers[0] = points[p1].point;
236 centers[1] = points[p2].point;
241 std::vector< double > distances( n );
244 for ( std::size_t j = 0; j < n; j++ )
246 distances[j] = points[j].point.sqrDist( centers[0] );
252 for (
int i = 2; i < k; i++ )
254 std::size_t candidateCenter = 0;
255 double maxDistance = std::numeric_limits<double>::lowest();
258 for ( std::size_t j = 0; j < n; j++ )
261 if ( distances[j] < 0 )
265 distances[j] = std::min( points[j].point.sqrDist( centers[i - 1] ), distances[j] );
268 if ( distances[j] > maxDistance )
271 maxDistance = distances[j];
276 Q_ASSERT( maxDistance >= 0 );
279 distances[candidateCenter] = -1;
281 centers[i] = points[candidateCenter].point;
288 void QgsKMeansClusteringAlgorithm::calculateKMeans( std::vector<QgsKMeansClusteringAlgorithm::Feature> &objs, std::vector<QgsPointXY> ¢ers,
int k,
QgsProcessingFeedback *feedback )
290 int converged =
false;
291 bool changed =
false;
294 std::vector< uint > weights( k );
297 for ( i = 0; i < KMEANS_MAX_ITERATIONS && !converged; i++ )
302 findNearest( objs, centers, k, changed );
303 updateMeans( objs, centers, weights, k );
304 converged = !changed;
307 if ( !converged && feedback )
308 feedback->
reportError( QObject::tr(
"Clustering did not converge after %1 iterations" ).arg( i ) );
310 feedback->
pushInfo( QObject::tr(
"Clustering converged after %1 iterations" ).arg( i ) );
315 void QgsKMeansClusteringAlgorithm::findNearest( std::vector<QgsKMeansClusteringAlgorithm::Feature> &points,
const std::vector<QgsPointXY> ¢ers,
const int k,
bool &changed )
318 std::size_t n = points.size();
319 for ( std::size_t i = 0; i < n; i++ )
321 Feature &point = points[i];
324 double currentDistance = point.point.sqrDist( centers[0] );
325 int currentCluster = 0;
328 for (
int cluster = 1; cluster < k; cluster++ )
330 const double distance = point.point.sqrDist( centers[cluster] );
331 if ( distance < currentDistance )
333 currentDistance = distance;
334 currentCluster = cluster;
339 if ( point.cluster != currentCluster )
342 point.cluster = currentCluster;
349 void QgsKMeansClusteringAlgorithm::updateMeans(
const std::vector<Feature> &points, std::vector<QgsPointXY> ¢ers, std::vector<uint> &weights,
const int k )
351 uint n = points.size();
352 std::fill( weights.begin(), weights.end(), 0 );
353 for (
int i = 0; i < k; i++ )
355 centers[i].setX( 0.0 );
356 centers[i].setY( 0.0 );
358 for ( uint i = 0; i < n; i++ )
360 int cluster = points[i].cluster;
361 centers[cluster] +=
QgsVector( points[i].point.x(),
362 points[i].point.y() );
363 weights[cluster] += 1;
365 for (
int i = 0; i < k; i++ )
367 centers[i] /= weights[i];