19 #include <unordered_map> 
   23 const int KMEANS_MAX_ITERATIONS = 1000;
 
   25 QString QgsKMeansClusteringAlgorithm::name()
 const 
   27   return QStringLiteral( 
"kmeansclustering" );
 
   30 QString QgsKMeansClusteringAlgorithm::displayName()
 const 
   32   return QObject::tr( 
"K-means clustering" );
 
   35 QStringList QgsKMeansClusteringAlgorithm::tags()
 const 
   37   return QObject::tr( 
"clustering,clusters,kmeans,points" ).split( 
',' );
 
   40 QString QgsKMeansClusteringAlgorithm::group()
 const 
   42   return QObject::tr( 
"Vector analysis" );
 
   45 QString QgsKMeansClusteringAlgorithm::groupId()
 const 
   47   return QStringLiteral( 
"vectoranalysis" );
 
   50 void QgsKMeansClusteringAlgorithm::initAlgorithm( 
const QVariantMap & )
 
   57   auto fieldNameParam = std::make_unique<QgsProcessingParameterString>( QStringLiteral( 
"FIELD_NAME" ),
 
   58                         QObject::tr( 
"Cluster field name" ), QStringLiteral( 
"CLUSTER_ID" ) );
 
   60   addParameter( fieldNameParam.release() );
 
   61   auto sizeFieldNameParam = std::make_unique<QgsProcessingParameterString>( QStringLiteral( 
"SIZE_FIELD_NAME" ),
 
   62                             QObject::tr( 
"Cluster size field name" ), QStringLiteral( 
"CLUSTER_SIZE" ) );
 
   64   addParameter( sizeFieldNameParam.release() );
 
   69 QString QgsKMeansClusteringAlgorithm::shortHelpString()
 const 
   71   return QObject::tr( 
"Calculates the 2D distance based k-means cluster number for each input feature.\n\n" 
   72                       "If input geometries are lines or polygons, the clustering is based on the centroid of the feature." );
 
   75 QgsKMeansClusteringAlgorithm *QgsKMeansClusteringAlgorithm::createInstance()
 const 
   77   return new QgsKMeansClusteringAlgorithm();
 
   82   std::unique_ptr< QgsProcessingFeatureSource > source( parameterAsSource( parameters, QStringLiteral( 
"INPUT" ), context ) );
 
   86   int k = parameterAsInt( parameters, QStringLiteral( 
"CLUSTERS" ), context );
 
   88   QgsFields outputFields = source->fields();
 
   90   const QString clusterFieldName = parameterAsString( parameters, QStringLiteral( 
"FIELD_NAME" ), context );
 
   92   const QString clusterSizeFieldName = parameterAsString( parameters, QStringLiteral( 
"SIZE_FIELD_NAME" ), context );
 
   93   newFields.
append( 
QgsField( clusterSizeFieldName, QVariant::Int ) );
 
   97   std::unique_ptr< QgsFeatureSink > sink( parameterAsSink( parameters, QStringLiteral( 
"OUTPUT" ), context, dest, outputFields, source->wkbType(), source->sourceCrs() ) );
 
  102   feedback->
pushInfo( QObject::tr( 
"Collecting input points" ) );
 
  103   double step = source->featureCount() > 0 ? 50.0 / source->featureCount() : 1;
 
  106   int featureWithGeometryCount = 0;
 
  109   std::vector< Feature > clusterFeatures;
 
  111   QHash< QgsFeatureId, int > idToObj;
 
  123     featureWithGeometryCount++;
 
  139     idToObj[ feat.
id() ] = clusterFeatures.size();
 
  140     clusterFeatures.emplace_back( Feature( point ) );
 
  145     feedback->
reportError( QObject::tr( 
"Number of geometries is less than the number of clusters requested, not all clusters will get data" ) );
 
  151     feedback->
pushInfo( QObject::tr( 
"Calculating clusters" ) );
 
  154     std::vector< QgsPointXY > centers( k );
 
  156     initClusters( clusterFeatures, centers, k, feedback );
 
  157     calculateKMeans( clusterFeatures, centers, k, feedback );
 
  161   std::unordered_map< int, int> clusterSize;
 
  162   for ( 
int obj : idToObj )
 
  163     clusterSize[ clusterFeatures[ obj ].cluster ]++;
 
  165   features = source->getFeatures();
 
  177     auto obj = idToObj.find( feat.
id() );
 
  180       attr << QVariant() << QVariant();
 
  184       attr << 0 << featureWithGeometryCount;
 
  188       int cluster = clusterFeatures[ *obj ].cluster;
 
  189       attr << cluster << clusterSize[ cluster ];
 
  196   outputs.insert( QStringLiteral( 
"OUTPUT" ), dest );
 
  202 void QgsKMeansClusteringAlgorithm::initClusters( std::vector<Feature> &points, std::vector<QgsPointXY> ¢ers, 
const int k, 
QgsProcessingFeedback *feedback )
 
  204   std::size_t n = points.size();
 
  210     for ( 
int i = 0; i < k; i++ )
 
  211       centers[ i ] = points[ 0 ].point;
 
  215   long duplicateCount = 1;
 
  219   double distanceP1 = 0;
 
  220   double distanceP2 = 0;
 
  221   double maxDistance = -1;
 
  222   for ( std::size_t i = 1; i < n; i++ )
 
  224     distanceP1 = points[i].point.sqrDist( points[p1].point );
 
  225     distanceP2 = points[i].point.sqrDist( points[p2].point );
 
  228     if ( ( distanceP1 > maxDistance ) || ( distanceP2 > maxDistance ) )
 
  230       maxDistance = std::max( distanceP1, distanceP2 );
 
  231       if ( distanceP1 > distanceP2 )
 
  242   if ( feedback && duplicateCount > 1 )
 
  244     feedback->
pushInfo( QObject::tr( 
"There are at least %1 duplicate inputs, the number of output clusters may be less than was requested" ).arg( duplicateCount ) );
 
  251   centers[0] = points[p1].point;
 
  252   centers[1] = points[p2].point;
 
  257     std::vector< double > distances( n );
 
  260     for ( std::size_t j = 0; j < n; j++ )
 
  262       distances[j] = points[j].point.sqrDist( centers[0] );
 
  268     for ( 
int i = 2; i < k; i++ )
 
  270       std::size_t candidateCenter = 0;
 
  271       double maxDistance = std::numeric_limits<double>::lowest();
 
  274       for ( std::size_t j = 0; j < n; j++ )
 
  277         if ( distances[j] < 0 )
 
  281         distances[j] = std::min( points[j].point.sqrDist( centers[i - 1] ), distances[j] );
 
  284         if ( distances[j] > maxDistance )
 
  287           maxDistance = distances[j];
 
  292       Q_ASSERT( maxDistance >= 0 );
 
  295       distances[candidateCenter] = -1;
 
  297       centers[i] = points[candidateCenter].point;
 
  304 void QgsKMeansClusteringAlgorithm::calculateKMeans( std::vector<QgsKMeansClusteringAlgorithm::Feature> &objs, std::vector<QgsPointXY> ¢ers, 
int k, 
QgsProcessingFeedback *feedback )
 
  306   int converged = 
false;
 
  307   bool changed = 
false;
 
  310   std::vector< uint > weights( k );
 
  313   for ( i = 0; i < KMEANS_MAX_ITERATIONS && !converged; i++ )
 
  318     findNearest( objs, centers, k, changed );
 
  319     updateMeans( objs, centers, weights, k );
 
  320     converged = !changed;
 
  323   if ( !converged && feedback )
 
  324     feedback->
reportError( QObject::tr( 
"Clustering did not converge after %1 iterations" ).arg( i ) );
 
  326     feedback->
pushInfo( QObject::tr( 
"Clustering converged after %1 iterations" ).arg( i ) );
 
  331 void QgsKMeansClusteringAlgorithm::findNearest( std::vector<QgsKMeansClusteringAlgorithm::Feature> &points, 
const std::vector<QgsPointXY> ¢ers, 
const int k, 
bool &changed )
 
  334   std::size_t n = points.size();
 
  335   for ( std::size_t i = 0; i < n; i++ )
 
  337     Feature &point = points[i];
 
  340     double currentDistance = point.point.sqrDist( centers[0] );
 
  341     int currentCluster = 0;
 
  344     for ( 
int cluster = 1; cluster < k; cluster++ )
 
  346       const double distance = point.point.sqrDist( centers[cluster] );
 
  347       if ( distance < currentDistance )
 
  349         currentDistance = distance;
 
  350         currentCluster = cluster;
 
  355     if ( point.cluster != currentCluster )
 
  358       point.cluster = currentCluster;
 
  365 void QgsKMeansClusteringAlgorithm::updateMeans( 
const std::vector<Feature> &points, std::vector<QgsPointXY> ¢ers, std::vector<uint> &weights, 
const int k )
 
  367   uint n = points.size();
 
  368   std::fill( weights.begin(), weights.end(), 0 );
 
  369   for ( 
int i = 0; i < k; i++ )
 
  371     centers[i].setX( 0.0 );
 
  372     centers[i].setY( 0.0 );
 
  374   for ( uint i = 0; i < n; i++ )
 
  376     int cluster = points[i].cluster;
 
  377     centers[cluster] += 
QgsVector( points[i].point.x(),
 
  378                                    points[i].point.y() );
 
  379     weights[cluster] += 1;
 
  381   for ( 
int i = 0; i < k; i++ )
 
  383     centers[i] /= weights[i];
 
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
This class wraps a request for features to a vector layer (or directly its vector data provider).
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
void setAttributes(const QgsAttributes &attrs)
Sets the feature's attributes.
bool hasGeometry() const
Returns true if the feature has an associated geometry.
bool isCanceled() const SIP_HOLDGIL
Tells whether the operation has been canceled already.
void setProgress(double progress)
Sets the current progress for the feedback object.
Encapsulate a field in an attribute table or data source.
Container of fields for a vector layer.
bool append(const QgsField &field, FieldOrigin origin=OriginProvider, int originIndex=-1)
Appends a field. The field must have unique name, otherwise it is rejected (returns false)
A geometry is the spatial representation of a feature.
const QgsAbstractGeometry * constGet() const SIP_HOLDGIL
Returns a non-modifiable (const) reference to the underlying abstract geometry primitive.
QgsWkbTypes::Type wkbType() const SIP_HOLDGIL
Returns type of the geometry as a WKB type (point / linestring / polygon etc.)
QgsGeometry centroid() const
Returns the center of mass of a geometry.
A class to represent a 2D point.
Contains information about the context in which a processing algorithm is executed.
Custom exception class for processing related exceptions.
Base class for providing feedback from a processing algorithm.
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
virtual void reportError(const QString &error, bool fatalError=false)
Reports that the algorithm encountered an error while executing.
@ FlagAdvanced
Parameter is an advanced parameter which should be hidden from users by default.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A numeric parameter for processing algorithms.
static QgsFields combineFields(const QgsFields &fieldsA, const QgsFields &fieldsB, const QString &fieldsBPrefix=QString())
Combines two field lists, avoiding duplicate field names (in a case-insensitive manner).
@ TypeVectorAnyGeometry
Any vector layer with geometry.
A class to represent a vector.
static Type flatType(Type type) SIP_HOLDGIL
Returns the flat type for a WKB type.
bool qgsDoubleNear(double a, double b, double epsilon=4 *std::numeric_limits< double >::epsilon())
Compare two doubles (but allow some difference)