QGIS API Documentation 3.41.0-Master (af5edcb665c)
Loading...
Searching...
No Matches
qgsalgorithmdeleteduplicategeometries.cpp
Go to the documentation of this file.
1/***************************************************************************
2 qgsalgorithmdeleteduplicategeometries.cpp
3 -----------------------------------------
4 begin : December 2019
5 copyright : (C) 2019 by Nyall Dawson
6 email : nyall dot dawson at gmail dot com
7 ***************************************************************************/
8
9/***************************************************************************
10 * *
11 * This program is free software; you can redistribute it and/or modify *
12 * it under the terms of the GNU General Public License as published by *
13 * the Free Software Foundation; either version 2 of the License, or *
14 * (at your option) any later version. *
15 * *
16 ***************************************************************************/
17
19#include "qgsvectorlayer.h"
20#include "qgsgeometryengine.h"
21#include "qgsspatialindex.h"
22
24
25QString QgsDeleteDuplicateGeometriesAlgorithm::name() const
26{
27 return QStringLiteral( "deleteduplicategeometries" );
28}
29
30QString QgsDeleteDuplicateGeometriesAlgorithm::displayName() const
31{
32 return QObject::tr( "Delete duplicate geometries" );
33}
34
35QStringList QgsDeleteDuplicateGeometriesAlgorithm::tags() const
36{
37 return QObject::tr( "drop,remove,same,points,coincident,overlapping,filter" ).split( ',' );
38}
39
40QString QgsDeleteDuplicateGeometriesAlgorithm::group() const
41{
42 return QObject::tr( "Vector general" );
43}
44
45QString QgsDeleteDuplicateGeometriesAlgorithm::groupId() const
46{
47 return QStringLiteral( "vectorgeneral" );
48}
49
50void QgsDeleteDuplicateGeometriesAlgorithm::initAlgorithm( const QVariantMap & )
51{
52 addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ), QObject::tr( "Input layer" ) ) );
53 addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Cleaned" ) ) );
54 addOutput( new QgsProcessingOutputNumber( QStringLiteral( "RETAINED_COUNT" ), QObject::tr( "Count of retained records" ) ) );
55 addOutput( new QgsProcessingOutputNumber( QStringLiteral( "DUPLICATE_COUNT" ), QObject::tr( "Count of discarded duplicate records" ) ) );
56}
57
58QString QgsDeleteDuplicateGeometriesAlgorithm::shortHelpString() const
59{
60 return QObject::tr( "This algorithm finds duplicated geometries and removes them.\n\nAttributes are not checked, "
61 "so in case two features have identical geometries but different attributes, only one of "
62 "them will be added to the result layer." );
63}
64
65QString QgsDeleteDuplicateGeometriesAlgorithm::shortDescription() const
66{
67 return QObject::tr( "Finds duplicated geometries in a layer and removes them." );
68}
69
70QgsDeleteDuplicateGeometriesAlgorithm *QgsDeleteDuplicateGeometriesAlgorithm::createInstance() const
71{
72 return new QgsDeleteDuplicateGeometriesAlgorithm();
73}
74
75bool QgsDeleteDuplicateGeometriesAlgorithm::prepareAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback * )
76{
77 mSource.reset( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) );
78 if ( !mSource )
79 throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );
80
81 return true;
82}
83
84QVariantMap QgsDeleteDuplicateGeometriesAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
85{
86 QString destId;
87 std::unique_ptr<QgsFeatureSink> sink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, destId, mSource->fields(), mSource->wkbType(), mSource->sourceCrs() ) );
88 if ( !sink )
89 throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) );
90
92
93 double step = mSource->featureCount() > 0 ? 100.0 / mSource->featureCount() : 0;
94 QHash<QgsFeatureId, QgsGeometry> geometries;
95 QSet<QgsFeatureId> nullGeometryFeatures;
96 long current = 0;
97 const QgsSpatialIndex index( it, [&]( const QgsFeature &f ) -> bool {
98 if ( feedback->isCanceled() )
99 return false;
100
101 if ( !f.hasGeometry() )
102 {
103 nullGeometryFeatures.insert( f.id() );
104 }
105 else
106 {
107 geometries.insert( f.id(), f.geometry() );
108 }
109
110 // overall this loop takes about 10% of time
111 current++;
112 feedback->setProgress( 0.10 * current * step );
113 return true;
114 } );
115
116 QgsFeature f;
117
118 // start by assuming everything is unique, and chop away at this list
119 QHash<QgsFeatureId, QgsGeometry> uniqueFeatures = geometries;
120 current = 0;
121 long removed = 0;
122
123 for ( auto it = geometries.constBegin(); it != geometries.constEnd(); ++it )
124 {
125 const QgsFeatureId featureId = it.key();
126 const QgsGeometry geometry = it.value();
127
128 if ( feedback->isCanceled() )
129 break;
130
131 if ( !uniqueFeatures.contains( featureId ) )
132 {
133 // feature was already marked as a duplicate
134 }
135 else
136 {
137 const QList<QgsFeatureId> candidates = index.intersects( geometry.boundingBox() );
138
139 for ( const QgsFeatureId candidateId : candidates )
140 {
141 if ( candidateId == featureId )
142 continue;
143
144 if ( !uniqueFeatures.contains( candidateId ) )
145 {
146 // candidate already marked as a duplicate (not sure if this is possible,
147 // since it would mean the current feature would also have to be a duplicate!
148 // but let's be safe!)
149 continue;
150 }
151 else if ( geometry.isGeosEqual( geometries.value( candidateId ) ) )
152 {
153 // candidate is a duplicate of feature
154 uniqueFeatures.remove( candidateId );
155 removed++;
156 }
157 }
158 }
159
160 current++;
161 feedback->setProgress( 0.80 * current * step + 10 ); // takes about 80% of time
162 }
163
164 // now, fetch all the feature attributes for the unique features only
165 // be super-smart and don't re-fetch geometries
166 QSet<QgsFeatureId> outputFeatureIds = qgis::listToSet( uniqueFeatures.keys() );
167 outputFeatureIds.unite( nullGeometryFeatures );
168 step = outputFeatureIds.empty() ? 1 : 100.0 / outputFeatureIds.size();
169
171 it = mSource->getFeatures( request, Qgis::ProcessingFeatureSourceFlag::SkipGeometryValidityChecks );
172 current = 0;
173 while ( it.nextFeature( f ) )
174 {
175 if ( feedback->isCanceled() )
176 break;
177
178 // use already fetched geometry
179 if ( !nullGeometryFeatures.contains( f.id() ) )
180 {
181 f.setGeometry( uniqueFeatures.value( f.id() ) );
182 }
183 if ( !sink->addFeature( f, QgsFeatureSink::FastInsert ) )
184 throw QgsProcessingException( writeFeatureError( sink.get(), parameters, QStringLiteral( "OUTPUT" ) ) );
185
186 current++;
187 feedback->setProgress( 0.10 * current * step + 90 ); // takes about 10% of time
188 }
189
190 feedback->pushInfo( QObject::tr( "%n duplicate feature(s) removed", nullptr, removed ) );
191
192 sink->finalize();
193
194 QVariantMap outputs;
195 outputs.insert( QStringLiteral( "OUTPUT" ), destId );
196 outputs.insert( QStringLiteral( "DUPLICATE_COUNT" ), static_cast<long long>( removed ) );
197 outputs.insert( QStringLiteral( "RETAINED_COUNT" ), outputFeatureIds.size() );
198 return outputs;
199}
200
@ NoGeometry
Geometry is not required. It may still be returned if e.g. required for a filter condition.
@ SkipGeometryValidityChecks
Invalid geometry checks should always be skipped. This flag can be useful for algorithms which always...
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
Fetch next feature and stores in f, returns true on success.
This class wraps a request for features to a vector layer (or directly its vector data provider).
QgsFeatureRequest & setFlags(Qgis::FeatureRequestFlags flags)
Sets flags that affect how features will be fetched.
QgsFeatureRequest & setFilterFids(const QgsFeatureIds &fids)
Sets the feature IDs that should be fetched.
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
Definition qgsfeature.h:58
QgsFeatureId id
Definition qgsfeature.h:66
QgsGeometry geometry
Definition qgsfeature.h:69
bool hasGeometry() const
Returns true if the feature has an associated geometry.
bool isCanceled() const
Tells whether the operation has been canceled already.
Definition qgsfeedback.h:53
void setProgress(double progress)
Sets the current progress for the feedback object.
Definition qgsfeedback.h:61
A geometry is the spatial representation of a feature.
QgsRectangle boundingBox() const
Returns the bounding box of the geometry.
bool isGeosEqual(const QgsGeometry &) const
Compares the geometry with another geometry using GEOS.
Contains information about the context in which a processing algorithm is executed.
Custom exception class for processing related exceptions.
Base class for providing feedback from a processing algorithm.
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
A numeric output for processing algorithms.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A spatial index for QgsFeature objects.
qint64 QgsFeatureId
64 bit feature ids negative numbers are used for uncommitted/newly added features
QList< int > QgsAttributeList
Definition qgsfield.h:27