QGIS API Documentation 3.99.0-Master (09f76ad7019)
Loading...
Searching...
No Matches
qgsalgorithmdeleteduplicategeometries.cpp
Go to the documentation of this file.
1/***************************************************************************
2 qgsalgorithmdeleteduplicategeometries.cpp
3 -----------------------------------------
4 begin : December 2019
5 copyright : (C) 2019 by Nyall Dawson
6 email : nyall dot dawson at gmail dot com
7 ***************************************************************************/
8
9/***************************************************************************
10 * *
11 * This program is free software; you can redistribute it and/or modify *
12 * it under the terms of the GNU General Public License as published by *
13 * the Free Software Foundation; either version 2 of the License, or *
14 * (at your option) any later version. *
15 * *
16 ***************************************************************************/
17
19
20#include "qgsgeometryengine.h"
21#include "qgsspatialindex.h"
22#include "qgsvectorlayer.h"
23
24#include <QString>
25
26using namespace Qt::StringLiterals;
27
29
30QString QgsDeleteDuplicateGeometriesAlgorithm::name() const
31{
32 return u"deleteduplicategeometries"_s;
33}
34
35QString QgsDeleteDuplicateGeometriesAlgorithm::displayName() const
36{
37 return QObject::tr( "Delete duplicate geometries" );
38}
39
40QStringList QgsDeleteDuplicateGeometriesAlgorithm::tags() const
41{
42 return QObject::tr( "drop,remove,same,points,coincident,overlapping,filter" ).split( ',' );
43}
44
45QString QgsDeleteDuplicateGeometriesAlgorithm::group() const
46{
47 return QObject::tr( "Vector general" );
48}
49
50QString QgsDeleteDuplicateGeometriesAlgorithm::groupId() const
51{
52 return u"vectorgeneral"_s;
53}
54
55void QgsDeleteDuplicateGeometriesAlgorithm::initAlgorithm( const QVariantMap & )
56{
57 addParameter( new QgsProcessingParameterFeatureSource( u"INPUT"_s, QObject::tr( "Input layer" ) ) );
58 addParameter( new QgsProcessingParameterFeatureSink( u"OUTPUT"_s, QObject::tr( "Cleaned" ) ) );
59
60 QgsProcessingParameterFeatureSink *duplicatesOutput = new QgsProcessingParameterFeatureSink( u"DUPLICATES"_s, QObject::tr( "Duplicates" ), Qgis::ProcessingSourceType::VectorAnyGeometry, QVariant(), true );
61 duplicatesOutput->setCreateByDefault( false );
62 addParameter( duplicatesOutput );
63
64 addOutput( new QgsProcessingOutputNumber( u"RETAINED_COUNT"_s, QObject::tr( "Count of retained records" ) ) );
65 addOutput( new QgsProcessingOutputNumber( u"DUPLICATE_COUNT"_s, QObject::tr( "Count of discarded duplicate records" ) ) );
66}
67
68QString QgsDeleteDuplicateGeometriesAlgorithm::shortHelpString() const
69{
70 return QObject::tr( "This algorithm finds duplicated geometries and removes them.\n\nAttributes are not checked, "
71 "so in case two features have identical geometries but different attributes, only one of "
72 "them will be added to the result layer.\n\n"
73 "Optionally, these duplicate features can be saved to a separate output for analysis." );
74}
75
76QString QgsDeleteDuplicateGeometriesAlgorithm::shortDescription() const
77{
78 return QObject::tr( "Finds duplicated geometries in a layer and removes them." );
79}
80
81QgsDeleteDuplicateGeometriesAlgorithm *QgsDeleteDuplicateGeometriesAlgorithm::createInstance() const
82{
83 return new QgsDeleteDuplicateGeometriesAlgorithm();
84}
85
86bool QgsDeleteDuplicateGeometriesAlgorithm::prepareAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback * )
87{
88 mSource.reset( parameterAsSource( parameters, u"INPUT"_s, context ) );
89 if ( !mSource )
90 throw QgsProcessingException( invalidSourceError( parameters, u"INPUT"_s ) );
91
92 return true;
93}
94
95QVariantMap QgsDeleteDuplicateGeometriesAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
96{
97 QString destId;
98 std::unique_ptr<QgsFeatureSink> sink( parameterAsSink( parameters, u"OUTPUT"_s, context, destId, mSource->fields(), mSource->wkbType(), mSource->sourceCrs() ) );
99 if ( !sink )
100 throw QgsProcessingException( invalidSinkError( parameters, u"OUTPUT"_s ) );
101
102 QString dupesSinkId;
103 std::unique_ptr<QgsFeatureSink> dupesSink( parameterAsSink( parameters, u"DUPLICATES"_s, context, dupesSinkId, mSource->fields(), mSource->wkbType(), mSource->sourceCrs() ) );
104
106
107 double step = mSource->featureCount() > 0 ? 100.0 / mSource->featureCount() : 0;
108 QHash<QgsFeatureId, QgsGeometry> geometries;
109 QSet<QgsFeatureId> nullGeometryFeatures;
110 long current = 0;
111 const QgsSpatialIndex index( it, [&]( const QgsFeature &f ) -> bool {
112 if ( feedback->isCanceled() )
113 return false;
114
115 if ( !f.hasGeometry() )
116 {
117 nullGeometryFeatures.insert( f.id() );
118 }
119 else
120 {
121 geometries.insert( f.id(), f.geometry() );
122 }
123
124 // overall this loop takes about 10% of time
125 current++;
126 feedback->setProgress( 0.10 * static_cast<double>( current ) * step );
127 return true;
128 } );
129
130 QgsFeature f;
131
132 // start by assuming everything is unique, and chop away at this list
133 QHash<QgsFeatureId, QgsGeometry> uniqueFeatures = geometries;
134 QHash<QgsFeatureId, QgsGeometry> duplicateFeatures;
135 current = 0;
136 long removed = 0;
137
138 for ( auto it = geometries.constBegin(); it != geometries.constEnd(); ++it )
139 {
140 const QgsFeatureId featureId = it.key();
141 const QgsGeometry geometry = it.value();
142
143 if ( feedback->isCanceled() )
144 break;
145
146 if ( !uniqueFeatures.contains( featureId ) )
147 {
148 // feature was already marked as a duplicate
149 }
150 else
151 {
152 const QList<QgsFeatureId> candidates = index.intersects( geometry.boundingBox() );
153
154 for ( const QgsFeatureId candidateId : candidates )
155 {
156 if ( candidateId == featureId )
157 continue;
158
159 if ( !uniqueFeatures.contains( candidateId ) )
160 {
161 // candidate already marked as a duplicate (not sure if this is possible,
162 // since it would mean the current feature would also have to be a duplicate!
163 // but let's be safe!)
164 continue;
165 }
166
167 const QgsGeometry candidateGeom = geometries.value( candidateId );
168 if ( geometry.isGeosEqual( candidateGeom ) )
169 {
170 // candidate is a duplicate of feature
171 uniqueFeatures.remove( candidateId );
172 if ( dupesSink )
173 {
174 duplicateFeatures.insert( candidateId, candidateGeom );
175 }
176 removed++;
177 }
178 }
179 }
180
181 current++;
182 feedback->setProgress( 0.80 * static_cast<double>( current ) * step + 10 ); // takes about 80% of time
183 }
184
185 // now, fetch all the feature attributes for the unique features only
186 // be super-smart and don't re-fetch geometries
187 QSet<QgsFeatureId> outputFeatureIds = qgis::listToSet( uniqueFeatures.keys() );
188 outputFeatureIds.unite( nullGeometryFeatures );
189 step = outputFeatureIds.empty() ? 1 : 100.0 / outputFeatureIds.size();
190 const double stepTime = dupesSink ? 0.05 : 0.10;
191
193 it = mSource->getFeatures( request, Qgis::ProcessingFeatureSourceFlag::SkipGeometryValidityChecks );
194 current = 0;
195 while ( it.nextFeature( f ) )
196 {
197 if ( feedback->isCanceled() )
198 break;
199
200 // use already fetched geometry
201 if ( !nullGeometryFeatures.contains( f.id() ) )
202 {
203 f.setGeometry( uniqueFeatures.value( f.id() ) );
204 }
205 if ( !sink->addFeature( f, QgsFeatureSink::FastInsert ) )
206 throw QgsProcessingException( writeFeatureError( sink.get(), parameters, u"OUTPUT"_s ) );
207
208 current++;
209 feedback->setProgress( stepTime * static_cast<double>( current ) * step + 90 ); // takes about 5%-10% of time
210 }
211
212 feedback->pushInfo( QObject::tr( "%n duplicate feature(s) removed", nullptr, removed ) );
213
214 sink->finalize();
215
216 if ( dupesSink )
217 {
218 // now, fetch all the feature attributes for the duplicate features
219 QSet<QgsFeatureId> duplicateFeatureIds = qgis::listToSet( duplicateFeatures.keys() );
220 step = duplicateFeatureIds.empty() ? 1 : 100.0 / duplicateFeatureIds.size();
221
223 it = mSource->getFeatures( request, Qgis::ProcessingFeatureSourceFlag::SkipGeometryValidityChecks );
224 current = 0;
225 while ( it.nextFeature( f ) )
226 {
227 if ( feedback->isCanceled() )
228 break;
229
230 // use already fetched geometry
231 f.setGeometry( duplicateFeatures.value( f.id() ) );
232 if ( !dupesSink->addFeature( f, QgsFeatureSink::FastInsert ) )
233 throw QgsProcessingException( writeFeatureError( dupesSink.get(), parameters, u"DUPLICATES"_s ) );
234
235 current++;
236 feedback->setProgress( 0.05 * static_cast<double>( current ) * step + 95 ); // takes about 5% of time
237 }
238
239 dupesSink->finalize();
240 }
241
242 QVariantMap outputs;
243 outputs.insert( u"OUTPUT"_s, destId );
244 outputs.insert( u"DUPLICATE_COUNT"_s, static_cast<long long>( removed ) );
245 outputs.insert( u"RETAINED_COUNT"_s, outputFeatureIds.size() );
246 if ( dupesSink )
247 {
248 outputs.insert( u"DUPLICATES"_s, dupesSinkId );
249 }
250 return outputs;
251}
252
@ VectorAnyGeometry
Any vector layer with geometry.
Definition qgis.h:3604
@ NoGeometry
Geometry is not required. It may still be returned if e.g. required for a filter condition.
Definition qgis.h:2254
@ SkipGeometryValidityChecks
Invalid geometry checks should always be skipped. This flag can be useful for algorithms which always...
Definition qgis.h:3782
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
Fetch next feature and stores in f, returns true on success.
Wraps a request for features to a vector layer (or directly its vector data provider).
QgsFeatureRequest & setFlags(Qgis::FeatureRequestFlags flags)
Sets flags that affect how features will be fetched.
QgsFeatureRequest & setFilterFids(const QgsFeatureIds &fids)
Sets the feature IDs that should be fetched.
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
Definition qgsfeature.h:60
QgsFeatureId id
Definition qgsfeature.h:68
QgsGeometry geometry
Definition qgsfeature.h:71
bool hasGeometry() const
Returns true if the feature has an associated geometry.
bool isCanceled() const
Tells whether the operation has been canceled already.
Definition qgsfeedback.h:55
void setProgress(double progress)
Sets the current progress for the feedback object.
Definition qgsfeedback.h:63
A geometry is the spatial representation of a feature.
QgsRectangle boundingBox() const
Returns the bounding box of the geometry.
bool isGeosEqual(const QgsGeometry &) const
Compares the geometry with another geometry using GEOS.
Contains information about the context in which a processing algorithm is executed.
void setCreateByDefault(bool createByDefault)
Sets whether the destination should be created by default.
Custom exception class for processing related exceptions.
Base class for providing feedback from a processing algorithm.
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
A numeric output for processing algorithms.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A spatial index for QgsFeature objects.
qint64 QgsFeatureId
64 bit feature ids negative numbers are used for uncommitted/newly added features
QList< int > QgsAttributeList
Definition qgsfield.h:30