QGIS API Documentation 3.99.0-Master (26c88405ac0)
Loading...
Searching...
No Matches
qgsalgorithmdeleteduplicategeometries.cpp
Go to the documentation of this file.
1/***************************************************************************
2 qgsalgorithmdeleteduplicategeometries.cpp
3 -----------------------------------------
4 begin : December 2019
5 copyright : (C) 2019 by Nyall Dawson
6 email : nyall dot dawson at gmail dot com
7 ***************************************************************************/
8
9/***************************************************************************
10 * *
11 * This program is free software; you can redistribute it and/or modify *
12 * it under the terms of the GNU General Public License as published by *
13 * the Free Software Foundation; either version 2 of the License, or *
14 * (at your option) any later version. *
15 * *
16 ***************************************************************************/
17
19
20#include "qgsgeometryengine.h"
21#include "qgsspatialindex.h"
22#include "qgsvectorlayer.h"
23
25
26QString QgsDeleteDuplicateGeometriesAlgorithm::name() const
27{
28 return QStringLiteral( "deleteduplicategeometries" );
29}
30
31QString QgsDeleteDuplicateGeometriesAlgorithm::displayName() const
32{
33 return QObject::tr( "Delete duplicate geometries" );
34}
35
36QStringList QgsDeleteDuplicateGeometriesAlgorithm::tags() const
37{
38 return QObject::tr( "drop,remove,same,points,coincident,overlapping,filter" ).split( ',' );
39}
40
41QString QgsDeleteDuplicateGeometriesAlgorithm::group() const
42{
43 return QObject::tr( "Vector general" );
44}
45
46QString QgsDeleteDuplicateGeometriesAlgorithm::groupId() const
47{
48 return QStringLiteral( "vectorgeneral" );
49}
50
51void QgsDeleteDuplicateGeometriesAlgorithm::initAlgorithm( const QVariantMap & )
52{
53 addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ), QObject::tr( "Input layer" ) ) );
54 addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Cleaned" ) ) );
55
56 QgsProcessingParameterFeatureSink *duplicatesOutput = new QgsProcessingParameterFeatureSink( QStringLiteral( "DUPLICATES" ), QObject::tr( "Duplicates" ), Qgis::ProcessingSourceType::VectorAnyGeometry, QVariant(), true );
57 duplicatesOutput->setCreateByDefault( false );
58 addParameter( duplicatesOutput );
59
60 addOutput( new QgsProcessingOutputNumber( QStringLiteral( "RETAINED_COUNT" ), QObject::tr( "Count of retained records" ) ) );
61 addOutput( new QgsProcessingOutputNumber( QStringLiteral( "DUPLICATE_COUNT" ), QObject::tr( "Count of discarded duplicate records" ) ) );
62}
63
64QString QgsDeleteDuplicateGeometriesAlgorithm::shortHelpString() const
65{
66 return QObject::tr( "This algorithm finds duplicated geometries and removes them.\n\nAttributes are not checked, "
67 "so in case two features have identical geometries but different attributes, only one of "
68 "them will be added to the result layer.\n\n"
69 "Optionally, these duplicate features can be saved to a separate output for analysis." );
70}
71
72QString QgsDeleteDuplicateGeometriesAlgorithm::shortDescription() const
73{
74 return QObject::tr( "Finds duplicated geometries in a layer and removes them." );
75}
76
77QgsDeleteDuplicateGeometriesAlgorithm *QgsDeleteDuplicateGeometriesAlgorithm::createInstance() const
78{
79 return new QgsDeleteDuplicateGeometriesAlgorithm();
80}
81
82bool QgsDeleteDuplicateGeometriesAlgorithm::prepareAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback * )
83{
84 mSource.reset( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) );
85 if ( !mSource )
86 throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );
87
88 return true;
89}
90
91QVariantMap QgsDeleteDuplicateGeometriesAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
92{
93 QString destId;
94 std::unique_ptr<QgsFeatureSink> sink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, destId, mSource->fields(), mSource->wkbType(), mSource->sourceCrs() ) );
95 if ( !sink )
96 throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) );
97
98 QString dupesSinkId;
99 std::unique_ptr<QgsFeatureSink> dupesSink( parameterAsSink( parameters, QStringLiteral( "DUPLICATES" ), context, dupesSinkId, mSource->fields(), mSource->wkbType(), mSource->sourceCrs() ) );
100
102
103 double step = mSource->featureCount() > 0 ? 100.0 / mSource->featureCount() : 0;
104 QHash<QgsFeatureId, QgsGeometry> geometries;
105 QSet<QgsFeatureId> nullGeometryFeatures;
106 long current = 0;
107 const QgsSpatialIndex index( it, [&]( const QgsFeature &f ) -> bool {
108 if ( feedback->isCanceled() )
109 return false;
110
111 if ( !f.hasGeometry() )
112 {
113 nullGeometryFeatures.insert( f.id() );
114 }
115 else
116 {
117 geometries.insert( f.id(), f.geometry() );
118 }
119
120 // overall this loop takes about 10% of time
121 current++;
122 feedback->setProgress( 0.10 * static_cast<double>( current ) * step );
123 return true;
124 } );
125
126 QgsFeature f;
127
128 // start by assuming everything is unique, and chop away at this list
129 QHash<QgsFeatureId, QgsGeometry> uniqueFeatures = geometries;
130 QHash<QgsFeatureId, QgsGeometry> duplicateFeatures;
131 current = 0;
132 long removed = 0;
133
134 for ( auto it = geometries.constBegin(); it != geometries.constEnd(); ++it )
135 {
136 const QgsFeatureId featureId = it.key();
137 const QgsGeometry geometry = it.value();
138
139 if ( feedback->isCanceled() )
140 break;
141
142 if ( !uniqueFeatures.contains( featureId ) )
143 {
144 // feature was already marked as a duplicate
145 }
146 else
147 {
148 const QList<QgsFeatureId> candidates = index.intersects( geometry.boundingBox() );
149
150 for ( const QgsFeatureId candidateId : candidates )
151 {
152 if ( candidateId == featureId )
153 continue;
154
155 if ( !uniqueFeatures.contains( candidateId ) )
156 {
157 // candidate already marked as a duplicate (not sure if this is possible,
158 // since it would mean the current feature would also have to be a duplicate!
159 // but let's be safe!)
160 continue;
161 }
162
163 const QgsGeometry candidateGeom = geometries.value( candidateId );
164 if ( geometry.isGeosEqual( candidateGeom ) )
165 {
166 // candidate is a duplicate of feature
167 uniqueFeatures.remove( candidateId );
168 if ( dupesSink )
169 {
170 duplicateFeatures.insert( candidateId, candidateGeom );
171 }
172 removed++;
173 }
174 }
175 }
176
177 current++;
178 feedback->setProgress( 0.80 * static_cast<double>( current ) * step + 10 ); // takes about 80% of time
179 }
180
181 // now, fetch all the feature attributes for the unique features only
182 // be super-smart and don't re-fetch geometries
183 QSet<QgsFeatureId> outputFeatureIds = qgis::listToSet( uniqueFeatures.keys() );
184 outputFeatureIds.unite( nullGeometryFeatures );
185 step = outputFeatureIds.empty() ? 1 : 100.0 / outputFeatureIds.size();
186 const double stepTime = dupesSink ? 0.05 : 0.10;
187
189 it = mSource->getFeatures( request, Qgis::ProcessingFeatureSourceFlag::SkipGeometryValidityChecks );
190 current = 0;
191 while ( it.nextFeature( f ) )
192 {
193 if ( feedback->isCanceled() )
194 break;
195
196 // use already fetched geometry
197 if ( !nullGeometryFeatures.contains( f.id() ) )
198 {
199 f.setGeometry( uniqueFeatures.value( f.id() ) );
200 }
201 if ( !sink->addFeature( f, QgsFeatureSink::FastInsert ) )
202 throw QgsProcessingException( writeFeatureError( sink.get(), parameters, QStringLiteral( "OUTPUT" ) ) );
203
204 current++;
205 feedback->setProgress( stepTime * static_cast<double>( current ) * step + 90 ); // takes about 5%-10% of time
206 }
207
208 feedback->pushInfo( QObject::tr( "%n duplicate feature(s) removed", nullptr, removed ) );
209
210 sink->finalize();
211
212 if ( dupesSink )
213 {
214 // now, fetch all the feature attributes for the duplicate features
215 QSet<QgsFeatureId> duplicateFeatureIds = qgis::listToSet( duplicateFeatures.keys() );
216 step = duplicateFeatureIds.empty() ? 1 : 100.0 / duplicateFeatureIds.size();
217
219 it = mSource->getFeatures( request, Qgis::ProcessingFeatureSourceFlag::SkipGeometryValidityChecks );
220 current = 0;
221 while ( it.nextFeature( f ) )
222 {
223 if ( feedback->isCanceled() )
224 break;
225
226 // use already fetched geometry
227 f.setGeometry( duplicateFeatures.value( f.id() ) );
228 if ( !dupesSink->addFeature( f, QgsFeatureSink::FastInsert ) )
229 throw QgsProcessingException( writeFeatureError( dupesSink.get(), parameters, QStringLiteral( "DUPLICATES" ) ) );
230
231 current++;
232 feedback->setProgress( 0.05 * static_cast<double>( current ) * step + 95 ); // takes about 5% of time
233 }
234
235 dupesSink->finalize();
236 }
237
238 QVariantMap outputs;
239 outputs.insert( QStringLiteral( "OUTPUT" ), destId );
240 outputs.insert( QStringLiteral( "DUPLICATE_COUNT" ), static_cast<long long>( removed ) );
241 outputs.insert( QStringLiteral( "RETAINED_COUNT" ), outputFeatureIds.size() );
242 if ( dupesSink )
243 {
244 outputs.insert( QStringLiteral( "DUPLICATES" ), dupesSinkId );
245 }
246 return outputs;
247}
248
@ VectorAnyGeometry
Any vector layer with geometry.
Definition qgis.h:3533
@ NoGeometry
Geometry is not required. It may still be returned if e.g. required for a filter condition.
Definition qgis.h:2196
@ SkipGeometryValidityChecks
Invalid geometry checks should always be skipped. This flag can be useful for algorithms which always...
Definition qgis.h:3711
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
Fetch next feature and stores in f, returns true on success.
Wraps a request for features to a vector layer (or directly its vector data provider).
QgsFeatureRequest & setFlags(Qgis::FeatureRequestFlags flags)
Sets flags that affect how features will be fetched.
QgsFeatureRequest & setFilterFids(const QgsFeatureIds &fids)
Sets the feature IDs that should be fetched.
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
Definition qgsfeature.h:58
QgsFeatureId id
Definition qgsfeature.h:66
QgsGeometry geometry
Definition qgsfeature.h:69
bool hasGeometry() const
Returns true if the feature has an associated geometry.
bool isCanceled() const
Tells whether the operation has been canceled already.
Definition qgsfeedback.h:53
void setProgress(double progress)
Sets the current progress for the feedback object.
Definition qgsfeedback.h:61
A geometry is the spatial representation of a feature.
QgsRectangle boundingBox() const
Returns the bounding box of the geometry.
bool isGeosEqual(const QgsGeometry &) const
Compares the geometry with another geometry using GEOS.
Contains information about the context in which a processing algorithm is executed.
void setCreateByDefault(bool createByDefault)
Sets whether the destination should be created by default.
Custom exception class for processing related exceptions.
Base class for providing feedback from a processing algorithm.
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
A numeric output for processing algorithms.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A spatial index for QgsFeature objects.
qint64 QgsFeatureId
64 bit feature ids negative numbers are used for uncommitted/newly added features
QList< int > QgsAttributeList
Definition qgsfield.h:28