QGIS API Documentation 3.28.0-Firenze (ed3ad0430f)
qgsalgorithmdeleteduplicategeometries.cpp
Go to the documentation of this file.
1/***************************************************************************
2 qgsalgorithmdeleteduplicategeometries.cpp
3 -----------------------------------------
4 begin : December 2019
5 copyright : (C) 2019 by Nyall Dawson
6 email : nyall dot dawson at gmail dot com
7 ***************************************************************************/
8
9/***************************************************************************
10 * *
11 * This program is free software; you can redistribute it and/or modify *
12 * it under the terms of the GNU General Public License as published by *
13 * the Free Software Foundation; either version 2 of the License, or *
14 * (at your option) any later version. *
15 * *
16 ***************************************************************************/
17
19#include "qgsvectorlayer.h"
20#include "qgsgeometryengine.h"
21
23
24QString QgsDeleteDuplicateGeometriesAlgorithm::name() const
25{
26 return QStringLiteral( "deleteduplicategeometries" );
27}
28
29QString QgsDeleteDuplicateGeometriesAlgorithm::displayName() const
30{
31 return QObject::tr( "Delete duplicate geometries" );
32}
33
34QStringList QgsDeleteDuplicateGeometriesAlgorithm::tags() const
35{
36 return QObject::tr( "drop,remove,same,points,coincident,overlapping,filter" ).split( ',' );
37}
38
39QString QgsDeleteDuplicateGeometriesAlgorithm::group() const
40{
41 return QObject::tr( "Vector general" );
42}
43
44QString QgsDeleteDuplicateGeometriesAlgorithm::groupId() const
45{
46 return QStringLiteral( "vectorgeneral" );
47}
48
49void QgsDeleteDuplicateGeometriesAlgorithm::initAlgorithm( const QVariantMap & )
50{
51 addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ), QObject::tr( "Input layer" ) ) );
52 addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Cleaned" ) ) );
53 addOutput( new QgsProcessingOutputNumber( QStringLiteral( "RETAINED_COUNT" ), QObject::tr( "Count of retained records" ) ) );
54 addOutput( new QgsProcessingOutputNumber( QStringLiteral( "DUPLICATE_COUNT" ), QObject::tr( "Count of discarded duplicate records" ) ) );
55}
56
57QString QgsDeleteDuplicateGeometriesAlgorithm::shortHelpString() const
58{
59 return QObject::tr( "This algorithm finds duplicated geometries and removes them.\n\nAttributes are not checked, "
60 "so in case two features have identical geometries but different attributes, only one of "
61 "them will be added to the result layer." );
62}
63
64QString QgsDeleteDuplicateGeometriesAlgorithm::shortDescription() const
65{
66 return QObject::tr( "Finds duplicated geometries in a layer and removes them." );
67}
68
69QgsDeleteDuplicateGeometriesAlgorithm *QgsDeleteDuplicateGeometriesAlgorithm::createInstance() const
70{
71 return new QgsDeleteDuplicateGeometriesAlgorithm();
72}
73
74bool QgsDeleteDuplicateGeometriesAlgorithm::prepareAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback * )
75{
76 mSource.reset( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) );
77 if ( !mSource )
78 throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );
79
80 return true;
81}
82
83QVariantMap QgsDeleteDuplicateGeometriesAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
84{
85 QString destId;
86 std::unique_ptr< QgsFeatureSink > sink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, destId, mSource->fields(),
87 mSource->wkbType(), mSource->sourceCrs() ) );
88 if ( !sink )
89 throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) );
90
91 QgsFeatureIterator it = mSource->getFeatures( QgsFeatureRequest().setSubsetOfAttributes( QgsAttributeList() ) );
92
93 double step = mSource->featureCount() > 0 ? 100.0 / mSource->featureCount() : 0;
94 QHash< QgsFeatureId, QgsGeometry > geometries;
95 QSet< QgsFeatureId > nullGeometryFeatures;
96 long current = 0;
97 const QgsSpatialIndex index( it, [&]( const QgsFeature & f ) ->bool
98 {
99 if ( feedback->isCanceled() )
100 return false;
101
102 if ( !f.hasGeometry() )
103 {
104 nullGeometryFeatures.insert( f.id() );
105 }
106 else
107 {
108 geometries.insert( f.id(), f.geometry() );
109 }
110
111 // overall this loop takes about 10% of time
112 current++;
113 feedback->setProgress( 0.10 * current * step );
114 return true;
115 } );
116
117 QgsFeature f;
118
119 // start by assuming everything is unique, and chop away at this list
120 QHash< QgsFeatureId, QgsGeometry > uniqueFeatures = geometries;
121 current = 0;
122 long removed = 0;
123
124 for ( auto it = geometries.constBegin(); it != geometries.constEnd(); ++it )
125 {
126 const QgsFeatureId featureId = it.key();
127 const QgsGeometry geometry = it.value();
128
129 if ( feedback->isCanceled() )
130 break;
131
132 if ( !uniqueFeatures.contains( featureId ) )
133 {
134 // feature was already marked as a duplicate
135 }
136 else
137 {
138 const QList<QgsFeatureId> candidates = index.intersects( geometry.boundingBox() );
139
140 for ( const QgsFeatureId candidateId : candidates )
141 {
142 if ( candidateId == featureId )
143 continue;
144
145 if ( !uniqueFeatures.contains( candidateId ) )
146 {
147 // candidate already marked as a duplicate (not sure if this is possible,
148 // since it would mean the current feature would also have to be a duplicate!
149 // but let's be safe!)
150 continue;
151 }
152 else if ( geometry.isGeosEqual( geometries.value( candidateId ) ) )
153 {
154 // candidate is a duplicate of feature
155 uniqueFeatures.remove( candidateId );
156 removed++;
157 }
158 }
159 }
160
161 current++;
162 feedback->setProgress( 0.80 * current * step + 10 ); // takes about 80% of time
163 }
164
165 // now, fetch all the feature attributes for the unique features only
166 // be super-smart and don't re-fetch geometries
167 QSet< QgsFeatureId > outputFeatureIds = qgis::listToSet( uniqueFeatures.keys() );
168 outputFeatureIds.unite( nullGeometryFeatures );
169 step = outputFeatureIds.empty() ? 1 : 100.0 / outputFeatureIds.size();
170
172 it = mSource->getFeatures( request );
173 current = 0;
174 while ( it.nextFeature( f ) )
175 {
176 if ( feedback->isCanceled() )
177 break;
178
179 // use already fetched geometry
180 if ( !nullGeometryFeatures.contains( f.id() ) )
181 {
182 f.setGeometry( uniqueFeatures.value( f.id() ) );
183 }
184 if ( !sink->addFeature( f, QgsFeatureSink::FastInsert ) )
185 throw QgsProcessingException( writeFeatureError( sink.get(), parameters, QStringLiteral( "OUTPUT" ) ) );
186
187 current++;
188 feedback->setProgress( 0.10 * current * step + 90 ); // takes about 10% of time
189 }
190
191 feedback->pushInfo( QObject::tr( "%n duplicate feature(s) removed", nullptr, removed ) );
192
193 QVariantMap outputs;
194 outputs.insert( QStringLiteral( "OUTPUT" ), destId );
195 outputs.insert( QStringLiteral( "DUPLICATE_COUNT" ), static_cast< long long >( removed ) );
196 outputs.insert( QStringLiteral( "RETAINED_COUNT" ), outputFeatureIds.size() );
197 return outputs;
198}
199
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
This class wraps a request for features to a vector layer (or directly its vector data provider).
QgsFeatureRequest & setFilterFids(const QgsFeatureIds &fids)
Sets the feature IDs that should be fetched.
QgsFeatureRequest & setFlags(QgsFeatureRequest::Flags flags)
Sets flags that affect how features will be fetched.
@ NoGeometry
Geometry is not required. It may still be returned if e.g. required for a filter condition.
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
Definition: qgsfeature.h:56
bool hasGeometry() const
Returns true if the feature has an associated geometry.
Definition: qgsfeature.cpp:233
bool isCanceled() const SIP_HOLDGIL
Tells whether the operation has been canceled already.
Definition: qgsfeedback.h:54
void setProgress(double progress)
Sets the current progress for the feedback object.
Definition: qgsfeedback.h:63
A geometry is the spatial representation of a feature.
Definition: qgsgeometry.h:164
QgsRectangle boundingBox() const
Returns the bounding box of the geometry.
bool isGeosEqual(const QgsGeometry &) const
Compares the geometry with another geometry using GEOS.
Contains information about the context in which a processing algorithm is executed.
Custom exception class for processing related exceptions.
Definition: qgsexception.h:83
Base class for providing feedback from a processing algorithm.
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
A numeric output for processing algorithms.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A spatial index for QgsFeature objects.
qint64 QgsFeatureId
64 bit feature ids negative numbers are used for uncommitted/newly added features
Definition: qgsfeatureid.h:28
QList< int > QgsAttributeList
Definition: qgsfield.h:26