QGIS API Documentation 4.1.0-Master (5bf3c20f3c9)
Loading...
Searching...
No Matches
qgsalgorithmdeleteduplicategeometries.cpp
Go to the documentation of this file.
1/***************************************************************************
2 qgsalgorithmdeleteduplicategeometries.cpp
3 -----------------------------------------
4 begin : December 2019
5 copyright : (C) 2019 by Nyall Dawson
6 email : nyall dot dawson at gmail dot com
7 ***************************************************************************/
8
9/***************************************************************************
10 * *
11 * This program is free software; you can redistribute it and/or modify *
12 * it under the terms of the GNU General Public License as published by *
13 * the Free Software Foundation; either version 2 of the License, or *
14 * (at your option) any later version. *
15 * *
16 ***************************************************************************/
17
19
20#include "qgsgeometryengine.h"
21#include "qgsspatialindex.h"
22#include "qgsvectorlayer.h"
23
24#include <QString>
25
26using namespace Qt::StringLiterals;
27
29
30QString QgsDeleteDuplicateGeometriesAlgorithm::name() const
31{
32 return u"deleteduplicategeometries"_s;
33}
34
35QString QgsDeleteDuplicateGeometriesAlgorithm::displayName() const
36{
37 return QObject::tr( "Delete duplicate geometries" );
38}
39
40QStringList QgsDeleteDuplicateGeometriesAlgorithm::tags() const
41{
42 return QObject::tr( "drop,remove,same,points,coincident,overlapping,filter" ).split( ',' );
43}
44
45QString QgsDeleteDuplicateGeometriesAlgorithm::group() const
46{
47 return QObject::tr( "Vector general" );
48}
49
50QString QgsDeleteDuplicateGeometriesAlgorithm::groupId() const
51{
52 return u"vectorgeneral"_s;
53}
54
55void QgsDeleteDuplicateGeometriesAlgorithm::initAlgorithm( const QVariantMap & )
56{
57 addParameter( new QgsProcessingParameterFeatureSource( u"INPUT"_s, QObject::tr( "Input layer" ) ) );
58 addParameter( new QgsProcessingParameterFeatureSink( u"OUTPUT"_s, QObject::tr( "Cleaned" ) ) );
59
60 QgsProcessingParameterFeatureSink *duplicatesOutput = new QgsProcessingParameterFeatureSink( u"DUPLICATES"_s, QObject::tr( "Duplicates" ), Qgis::ProcessingSourceType::VectorAnyGeometry, QVariant(), true );
61 duplicatesOutput->setCreateByDefault( false );
62 addParameter( duplicatesOutput );
63
64 addOutput( new QgsProcessingOutputNumber( u"RETAINED_COUNT"_s, QObject::tr( "Count of retained records" ) ) );
65 addOutput( new QgsProcessingOutputNumber( u"DUPLICATE_COUNT"_s, QObject::tr( "Count of discarded duplicate records" ) ) );
66}
67
68QString QgsDeleteDuplicateGeometriesAlgorithm::shortHelpString() const
69{
70 return QObject::tr(
71 "This algorithm finds duplicated geometries and removes them.\n\nAttributes are not checked, "
72 "so in case two features have identical geometries but different attributes, only one of "
73 "them will be added to the result layer.\n\n"
74 "Optionally, these duplicate features can be saved to a separate output for analysis."
75 );
76}
77
78QString QgsDeleteDuplicateGeometriesAlgorithm::shortDescription() const
79{
80 return QObject::tr( "Finds duplicated geometries in a layer and removes them." );
81}
82
83QgsDeleteDuplicateGeometriesAlgorithm *QgsDeleteDuplicateGeometriesAlgorithm::createInstance() const
84{
85 return new QgsDeleteDuplicateGeometriesAlgorithm();
86}
87
88bool QgsDeleteDuplicateGeometriesAlgorithm::prepareAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback * )
89{
90 mSource.reset( parameterAsSource( parameters, u"INPUT"_s, context ) );
91 if ( !mSource )
92 throw QgsProcessingException( invalidSourceError( parameters, u"INPUT"_s ) );
93
94 return true;
95}
96
97QVariantMap QgsDeleteDuplicateGeometriesAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
98{
99 QString destId;
100 std::unique_ptr<QgsFeatureSink> sink( parameterAsSink( parameters, u"OUTPUT"_s, context, destId, mSource->fields(), mSource->wkbType(), mSource->sourceCrs() ) );
101 if ( !sink )
102 throw QgsProcessingException( invalidSinkError( parameters, u"OUTPUT"_s ) );
103
104 QString dupesSinkId;
105 std::unique_ptr<QgsFeatureSink> dupesSink( parameterAsSink( parameters, u"DUPLICATES"_s, context, dupesSinkId, mSource->fields(), mSource->wkbType(), mSource->sourceCrs() ) );
106
108
109 double step = mSource->featureCount() > 0 ? 100.0 / mSource->featureCount() : 0;
110 QHash<QgsFeatureId, QgsGeometry> geometries;
111 QSet<QgsFeatureId> nullGeometryFeatures;
112 long current = 0;
113 const QgsSpatialIndex index( it, [&]( const QgsFeature &f ) -> bool {
114 if ( feedback->isCanceled() )
115 return false;
116
117 if ( !f.hasGeometry() )
118 {
119 nullGeometryFeatures.insert( f.id() );
120 }
121 else
122 {
123 geometries.insert( f.id(), f.geometry() );
124 }
125
126 // overall this loop takes about 10% of time
127 current++;
128 feedback->setProgress( 0.10 * static_cast<double>( current ) * step );
129 return true;
130 } );
131
132 QgsFeature f;
133
134 // start by assuming everything is unique, and chop away at this list
135 QHash<QgsFeatureId, QgsGeometry> uniqueFeatures = geometries;
136 QHash<QgsFeatureId, QgsGeometry> duplicateFeatures;
137 current = 0;
138 long removed = 0;
139
140 for ( auto it = geometries.constBegin(); it != geometries.constEnd(); ++it )
141 {
142 const QgsFeatureId featureId = it.key();
143 const QgsGeometry geometry = it.value();
144
145 if ( feedback->isCanceled() )
146 break;
147
148 if ( !uniqueFeatures.contains( featureId ) )
149 {
150 // feature was already marked as a duplicate
151 }
152 else
153 {
154 const QList<QgsFeatureId> candidates = index.intersects( geometry.boundingBox() );
155
156 for ( const QgsFeatureId candidateId : candidates )
157 {
158 if ( candidateId == featureId )
159 continue;
160
161 if ( !uniqueFeatures.contains( candidateId ) )
162 {
163 // candidate already marked as a duplicate (not sure if this is possible,
164 // since it would mean the current feature would also have to be a duplicate!
165 // but let's be safe!)
166 continue;
167 }
168
169 const QgsGeometry candidateGeom = geometries.value( candidateId );
170 if ( geometry.isGeosEqual( candidateGeom ) )
171 {
172 // candidate is a duplicate of feature
173 uniqueFeatures.remove( candidateId );
174 if ( dupesSink )
175 {
176 duplicateFeatures.insert( candidateId, candidateGeom );
177 }
178 removed++;
179 }
180 }
181 }
182
183 current++;
184 feedback->setProgress( 0.80 * static_cast<double>( current ) * step + 10 ); // takes about 80% of time
185 }
186
187 // now, fetch all the feature attributes for the unique features only
188 // be super-smart and don't re-fetch geometries
189 QSet<QgsFeatureId> outputFeatureIds = qgis::listToSet( uniqueFeatures.keys() );
190 outputFeatureIds.unite( nullGeometryFeatures );
191 step = outputFeatureIds.empty() ? 1 : 100.0 / outputFeatureIds.size();
192 const double stepTime = dupesSink ? 0.05 : 0.10;
193
195 it = mSource->getFeatures( request, Qgis::ProcessingFeatureSourceFlag::SkipGeometryValidityChecks );
196 current = 0;
197 while ( it.nextFeature( f ) )
198 {
199 if ( feedback->isCanceled() )
200 break;
201
202 // use already fetched geometry
203 if ( !nullGeometryFeatures.contains( f.id() ) )
204 {
205 f.setGeometry( uniqueFeatures.value( f.id() ) );
206 }
207 if ( !sink->addFeature( f, QgsFeatureSink::FastInsert ) )
208 throw QgsProcessingException( writeFeatureError( sink.get(), parameters, u"OUTPUT"_s ) );
209
210 current++;
211 feedback->setProgress( stepTime * static_cast<double>( current ) * step + 90 ); // takes about 5%-10% of time
212 }
213
214 feedback->pushInfo( QObject::tr( "%n duplicate feature(s) removed", nullptr, removed ) );
215
216 sink->finalize();
217
218 if ( dupesSink )
219 {
220 // now, fetch all the feature attributes for the duplicate features
221 QSet<QgsFeatureId> duplicateFeatureIds = qgis::listToSet( duplicateFeatures.keys() );
222 step = duplicateFeatureIds.empty() ? 1 : 100.0 / duplicateFeatureIds.size();
223
225 it = mSource->getFeatures( request, Qgis::ProcessingFeatureSourceFlag::SkipGeometryValidityChecks );
226 current = 0;
227 while ( it.nextFeature( f ) )
228 {
229 if ( feedback->isCanceled() )
230 break;
231
232 // use already fetched geometry
233 f.setGeometry( duplicateFeatures.value( f.id() ) );
234 if ( !dupesSink->addFeature( f, QgsFeatureSink::FastInsert ) )
235 throw QgsProcessingException( writeFeatureError( dupesSink.get(), parameters, u"DUPLICATES"_s ) );
236
237 current++;
238 feedback->setProgress( 0.05 * static_cast<double>( current ) * step + 95 ); // takes about 5% of time
239 }
240
241 dupesSink->finalize();
242 }
243
244 QVariantMap outputs;
245 outputs.insert( u"OUTPUT"_s, destId );
246 outputs.insert( u"DUPLICATE_COUNT"_s, static_cast<long long>( removed ) );
247 outputs.insert( u"RETAINED_COUNT"_s, outputFeatureIds.size() );
248 if ( dupesSink )
249 {
250 outputs.insert( u"DUPLICATES"_s, dupesSinkId );
251 }
252 return outputs;
253}
254
@ VectorAnyGeometry
Any vector layer with geometry.
Definition qgis.h:3647
@ NoGeometry
Geometry is not required. It may still be returned if e.g. required for a filter condition.
Definition qgis.h:2276
@ SkipGeometryValidityChecks
Invalid geometry checks should always be skipped. This flag can be useful for algorithms which always...
Definition qgis.h:3828
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
Fetch next feature and stores in f, returns true on success.
Wraps a request for features to a vector layer (or directly its vector data provider).
QgsFeatureRequest & setFlags(Qgis::FeatureRequestFlags flags)
Sets flags that affect how features will be fetched.
QgsFeatureRequest & setFilterFids(const QgsFeatureIds &fids)
Sets the feature IDs that should be fetched.
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
Definition qgsfeature.h:60
QgsFeatureId id
Definition qgsfeature.h:68
QgsGeometry geometry
Definition qgsfeature.h:71
bool hasGeometry() const
Returns true if the feature has an associated geometry.
bool isCanceled() const
Tells whether the operation has been canceled already.
Definition qgsfeedback.h:56
void setProgress(double progress)
Sets the current progress for the feedback object.
Definition qgsfeedback.h:65
A geometry is the spatial representation of a feature.
QgsRectangle boundingBox() const
Returns the bounding box of the geometry.
bool isGeosEqual(const QgsGeometry &) const
Compares the geometry with another geometry using GEOS.
Contains information about the context in which a processing algorithm is executed.
void setCreateByDefault(bool createByDefault)
Sets whether the destination should be created by default.
Custom exception class for processing related exceptions.
Base class for providing feedback from a processing algorithm.
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
A numeric output for processing algorithms.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A spatial index for QgsFeature objects.
qint64 QgsFeatureId
64 bit feature ids negative numbers are used for uncommitted/newly added features
QList< int > QgsAttributeList
Definition qgsfield.h:30