QGIS API Documentation  3.20.0-Odense (decaadbb31)
qgsalgorithmdeleteduplicategeometries.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  qgsalgorithmdeleteduplicategeometries.cpp
3  -----------------------------------------
4  begin : December 2019
5  copyright : (C) 2019 by Nyall Dawson
6  email : nyall dot dawson at gmail dot com
7  ***************************************************************************/
8 
9 /***************************************************************************
10  * *
11  * This program is free software; you can redistribute it and/or modify *
12  * it under the terms of the GNU General Public License as published by *
13  * the Free Software Foundation; either version 2 of the License, or *
14  * (at your option) any later version. *
15  * *
16  ***************************************************************************/
17 
19 #include "qgsvectorlayer.h"
20 #include "qgsgeometryengine.h"
21 
23 
24 QString QgsDeleteDuplicateGeometriesAlgorithm::name() const
25 {
26  return QStringLiteral( "deleteduplicategeometries" );
27 }
28 
29 QString QgsDeleteDuplicateGeometriesAlgorithm::displayName() const
30 {
31  return QObject::tr( "Delete duplicate geometries" );
32 }
33 
34 QStringList QgsDeleteDuplicateGeometriesAlgorithm::tags() const
35 {
36  return QObject::tr( "drop,remove,same,points,coincident,overlapping,filter" ).split( ',' );
37 }
38 
39 QString QgsDeleteDuplicateGeometriesAlgorithm::group() const
40 {
41  return QObject::tr( "Vector general" );
42 }
43 
44 QString QgsDeleteDuplicateGeometriesAlgorithm::groupId() const
45 {
46  return QStringLiteral( "vectorgeneral" );
47 }
48 
49 void QgsDeleteDuplicateGeometriesAlgorithm::initAlgorithm( const QVariantMap & )
50 {
51  addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ), QObject::tr( "Input layer" ) ) );
52  addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Cleaned" ) ) );
53  addOutput( new QgsProcessingOutputNumber( QStringLiteral( "RETAINED_COUNT" ), QObject::tr( "Count of retained records" ) ) );
54  addOutput( new QgsProcessingOutputNumber( QStringLiteral( "DUPLICATE_COUNT" ), QObject::tr( "Count of discarded duplicate records" ) ) );
55 }
56 
57 QString QgsDeleteDuplicateGeometriesAlgorithm::shortHelpString() const
58 {
59  return QObject::tr( "This algorithm finds duplicated geometries and removes them.\n\nAttributes are not checked, "
60  "so in case two features have identical geometries but different attributes, only one of "
61  "them will be added to the result layer." );
62 }
63 
64 QString QgsDeleteDuplicateGeometriesAlgorithm::shortDescription() const
65 {
66  return QObject::tr( "Finds duplicated geometries in a layer and removes them." );
67 }
68 
69 QgsDeleteDuplicateGeometriesAlgorithm *QgsDeleteDuplicateGeometriesAlgorithm::createInstance() const
70 {
71  return new QgsDeleteDuplicateGeometriesAlgorithm();
72 }
73 
74 bool QgsDeleteDuplicateGeometriesAlgorithm::prepareAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback * )
75 {
76  mSource.reset( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) );
77  if ( !mSource )
78  throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );
79 
80  return true;
81 }
82 
83 QVariantMap QgsDeleteDuplicateGeometriesAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
84 {
85  QString destId;
86  std::unique_ptr< QgsFeatureSink > sink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, destId, mSource->fields(),
87  mSource->wkbType(), mSource->sourceCrs() ) );
88  if ( !sink )
89  throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) );
90 
91  QgsFeatureIterator it = mSource->getFeatures( QgsFeatureRequest().setSubsetOfAttributes( QgsAttributeList() ) );
92 
93  double step = mSource->featureCount() > 0 ? 100.0 / mSource->featureCount() : 0;
94  QHash< QgsFeatureId, QgsGeometry > geometries;
95  QSet< QgsFeatureId > nullGeometryFeatures;
96  long current = 0;
97  QgsSpatialIndex index( it, [&]( const QgsFeature & f ) ->bool
98  {
99  if ( feedback->isCanceled() )
100  return false;
101 
102  if ( !f.hasGeometry() )
103  {
104  nullGeometryFeatures.insert( f.id() );
105  }
106  else
107  {
108  geometries.insert( f.id(), f.geometry() );
109  }
110 
111  // overall this loop takes about 10% of time
112  current++;
113  feedback->setProgress( 0.10 * current * step );
114  return true;
115  } );
116 
117  QgsFeature f;
118 
119  // start by assuming everything is unique, and chop away at this list
120  QHash< QgsFeatureId, QgsGeometry > uniqueFeatures = geometries;
121  current = 0;
122  long removed = 0;
123 
124  for ( auto it = geometries.constBegin(); it != geometries.constEnd(); ++it )
125  {
126  const QgsFeatureId featureId = it.key();
127  const QgsGeometry geometry = it.value();
128 
129  if ( feedback->isCanceled() )
130  break;
131 
132  if ( !uniqueFeatures.contains( featureId ) )
133  {
134  // feature was already marked as a duplicate
135  }
136  else
137  {
138  const QList<QgsFeatureId> candidates = index.intersects( geometry.boundingBox() );
139 
140  for ( const QgsFeatureId candidateId : candidates )
141  {
142  if ( candidateId == featureId )
143  continue;
144 
145  if ( !uniqueFeatures.contains( candidateId ) )
146  {
147  // candidate already marked as a duplicate (not sure if this is possible,
148  // since it would mean the current feature would also have to be a duplicate!
149  // but let's be safe!)
150  continue;
151  }
152  else if ( geometry.isGeosEqual( geometries.value( candidateId ) ) )
153  {
154  // candidate is a duplicate of feature
155  uniqueFeatures.remove( candidateId );
156  removed++;
157  }
158  }
159  }
160 
161  current++;
162  feedback->setProgress( 0.80 * current * step + 10 ); // takes about 80% of time
163  }
164 
165  // now, fetch all the feature attributes for the unique features only
166  // be super-smart and don't re-fetch geometries
167  QSet< QgsFeatureId > outputFeatureIds = qgis::listToSet( uniqueFeatures.keys() );
168  outputFeatureIds.unite( nullGeometryFeatures );
169  step = outputFeatureIds.empty() ? 1 : 100.0 / outputFeatureIds.size();
170 
172  it = mSource->getFeatures( request );
173  current = 0;
174  while ( it.nextFeature( f ) )
175  {
176  if ( feedback->isCanceled() )
177  break;
178 
179  // use already fetched geometry
180  if ( !nullGeometryFeatures.contains( f.id() ) )
181  {
182  f.setGeometry( uniqueFeatures.value( f.id() ) );
183  }
184  sink->addFeature( f, QgsFeatureSink::FastInsert );
185 
186  current++;
187  feedback->setProgress( 0.10 * current * step + 90 ); // takes about 10% of time
188  }
189 
190  feedback->pushInfo( QObject::tr( "%1 duplicate features removed" ).arg( removed ) );
191 
192  QVariantMap outputs;
193  outputs.insert( QStringLiteral( "OUTPUT" ), destId );
194  outputs.insert( QStringLiteral( "DUPLICATE_COUNT" ), static_cast< long long >( removed ) );
195  outputs.insert( QStringLiteral( "RETAINED_COUNT" ), outputFeatureIds.size() );
196  return outputs;
197 }
198 
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
This class wraps a request for features to a vector layer (or directly its vector data provider).
QgsFeatureRequest & setFilterFids(const QgsFeatureIds &fids)
Sets feature IDs that should be fetched.
QgsFeatureRequest & setFlags(QgsFeatureRequest::Flags flags)
Sets flags that affect how features will be fetched.
@ NoGeometry
Geometry is not required. It may still be returned if e.g. required for a filter condition.
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
Definition: qgsfeature.h:56
bool hasGeometry() const
Returns true if the feature has an associated geometry.
Definition: qgsfeature.cpp:205
bool isCanceled() const SIP_HOLDGIL
Tells whether the operation has been canceled already.
Definition: qgsfeedback.h:54
void setProgress(double progress)
Sets the current progress for the feedback object.
Definition: qgsfeedback.h:63
A geometry is the spatial representation of a feature.
Definition: qgsgeometry.h:124
QgsRectangle boundingBox() const
Returns the bounding box of the geometry.
bool isGeosEqual(const QgsGeometry &) const
Compares the geometry with another geometry using GEOS.
Contains information about the context in which a processing algorithm is executed.
Custom exception class for processing related exceptions.
Definition: qgsexception.h:83
Base class for providing feedback from a processing algorithm.
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
A numeric output for processing algorithms.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A spatial index for QgsFeature objects.
qint64 QgsFeatureId
64 bit feature ids negative numbers are used for uncommitted/newly added features
Definition: qgsfeatureid.h:28
QList< int > QgsAttributeList
Definition: qgsfield.h:26