QGIS API Documentation  3.12.1-București (121cc00ff0)
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
qgsalgorithmdeleteduplicategeometries.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  qgsalgorithmdeleteduplicategeometries.cpp
3  -----------------------------------------
4  begin : December 2019
5  copyright : (C) 2019 by Nyall Dawson
6  email : nyall dot dawson at gmail dot com
7  ***************************************************************************/
8 
9 /***************************************************************************
10  * *
11  * This program is free software; you can redistribute it and/or modify *
12  * it under the terms of the GNU General Public License as published by *
13  * the Free Software Foundation; either version 2 of the License, or *
14  * (at your option) any later version. *
15  * *
16  ***************************************************************************/
17 
19 #include "qgsvectorlayer.h"
20 #include "qgsgeometryengine.h"
21 
23 
24 QString QgsDeleteDuplicateGeometriesAlgorithm::name() const
25 {
26  return QStringLiteral( "deleteduplicategeometries" );
27 }
28 
29 QString QgsDeleteDuplicateGeometriesAlgorithm::displayName() const
30 {
31  return QObject::tr( "Delete duplicate geometries" );
32 }
33 
34 QStringList QgsDeleteDuplicateGeometriesAlgorithm::tags() const
35 {
36  return QObject::tr( "drop,remove,same,points,coincident,overlapping,filter" ).split( ',' );
37 }
38 
39 QString QgsDeleteDuplicateGeometriesAlgorithm::group() const
40 {
41  return QObject::tr( "Vector general" );
42 }
43 
44 QString QgsDeleteDuplicateGeometriesAlgorithm::groupId() const
45 {
46  return QStringLiteral( "vectorgeneral" );
47 }
48 
49 void QgsDeleteDuplicateGeometriesAlgorithm::initAlgorithm( const QVariantMap & )
50 {
51  addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ), QObject::tr( "Input layer" ) ) );
52  addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Cleaned" ) ) );
53  addOutput( new QgsProcessingOutputNumber( QStringLiteral( "RETAINED_COUNT" ), QObject::tr( "Count of retained records" ) ) );
54  addOutput( new QgsProcessingOutputNumber( QStringLiteral( "DUPLICATE_COUNT" ), QObject::tr( "Count of discarded duplicate records" ) ) );
55 }
56 
57 QString QgsDeleteDuplicateGeometriesAlgorithm::shortHelpString() const
58 {
59  return QObject::tr( "This algorithm finds duplicated geometries and removes them.\n\nAttributes are not checked, "
60  "so in case two features have identical geometries but different attributes, only one of "
61  "them will be added to the result layer." );
62 }
63 
64 QString QgsDeleteDuplicateGeometriesAlgorithm::shortDescription() const
65 {
66  return QObject::tr( "Finds duplicated geometries in a layer and removes them." );
67 }
68 
69 QgsDeleteDuplicateGeometriesAlgorithm *QgsDeleteDuplicateGeometriesAlgorithm::createInstance() const
70 {
71  return new QgsDeleteDuplicateGeometriesAlgorithm();
72 }
73 
74 bool QgsDeleteDuplicateGeometriesAlgorithm::prepareAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback * )
75 {
76  mSource.reset( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) );
77  if ( !mSource )
78  throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );
79 
80  return true;
81 }
82 
83 QVariantMap QgsDeleteDuplicateGeometriesAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
84 {
85  QString destId;
86  std::unique_ptr< QgsFeatureSink > sink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, destId, mSource->fields(),
87  mSource->wkbType(), mSource->sourceCrs() ) );
88  if ( !sink )
89  throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) );
90 
91  QgsFeatureIterator it = mSource->getFeatures( QgsFeatureRequest().setSubsetOfAttributes( QgsAttributeList() ) );
92 
93  double step = mSource->featureCount() > 0 ? 100.0 / mSource->featureCount() : 0;
94  QHash< QgsFeatureId, QgsGeometry > geometries;
95  QSet< QgsFeatureId > nullGeometryFeatures;
96  long current = 0;
97  QgsSpatialIndex index( it, [&]( const QgsFeature & f ) ->bool
98  {
99  if ( feedback->isCanceled() )
100  return false;
101 
102  if ( !f.hasGeometry() )
103  {
104  nullGeometryFeatures.insert( f.id() );
105  }
106  else
107  {
108  geometries.insert( f.id(), f.geometry() );
109  }
110 
111  // overall this loop takes about 10% of time
112  current++;
113  feedback->setProgress( 0.10 * current * step );
114  return true;
115  } );
116 
117  QgsFeature f;
118 
119  // start by assuming everything is unique, and chop away at this list
120  QHash< QgsFeatureId, QgsGeometry > uniqueFeatures = geometries;
121  current = 0;
122  long removed = 0;
123 
124  for ( auto it = geometries.constBegin(); it != geometries.constEnd(); ++it )
125  {
126  const QgsFeatureId featureId = it.key();
127  const QgsGeometry geometry = it.value();
128 
129  if ( feedback->isCanceled() )
130  break;
131 
132  if ( !uniqueFeatures.contains( featureId ) )
133  {
134  // feature was already marked as a duplicate
135  }
136  else
137  {
138  const QList<QgsFeatureId> candidates = index.intersects( geometry.boundingBox() );
139 
140  for ( const QgsFeatureId candidateId : candidates )
141  {
142  if ( candidateId == featureId )
143  continue;
144 
145  if ( !uniqueFeatures.contains( candidateId ) )
146  {
147  // candidate already marked as a duplicate (not sure if this is possible,
148  // since it would mean the current feature would also have to be a duplicate!
149  // but let's be safe!)
150  continue;
151  }
152  else if ( geometry.isGeosEqual( geometries.value( candidateId ) ) )
153  {
154  // candidate is a duplicate of feature
155  uniqueFeatures.remove( candidateId );
156  removed++;
157  }
158  }
159  }
160 
161  current++;
162  feedback->setProgress( 0.80 * current * step + 10 ); // takes about 80% of time
163  }
164 
165  // now, fetch all the feature attributes for the unique features only
166  // be super-smart and don't re-fetch geometries
167  QSet< QgsFeatureId > outputFeatureIds = uniqueFeatures.keys().toSet();
168  outputFeatureIds.unite( nullGeometryFeatures );
169  step = outputFeatureIds.empty() ? 1 : 100.0 / outputFeatureIds.size();
170 
172  it = mSource->getFeatures( request );
173  current = 0;
174  while ( it.nextFeature( f ) )
175  {
176  if ( feedback->isCanceled() )
177  break;
178 
179  // use already fetched geometry
180  if ( !nullGeometryFeatures.contains( f.id() ) )
181  {
182  f.setGeometry( uniqueFeatures.value( f.id() ) );
183  }
184  sink->addFeature( f, QgsFeatureSink::FastInsert );
185 
186  current++;
187  feedback->setProgress( 0.10 * current * step + 90 ); // takes about 10% of time
188  }
189 
190  feedback->pushInfo( QObject::tr( "%1 duplicate features removed" ).arg( removed ) );
191 
192  QVariantMap outputs;
193  outputs.insert( QStringLiteral( "OUTPUT" ), destId );
194  outputs.insert( QStringLiteral( "DUPLICATE_COUNT" ), static_cast< long long >( removed ) );
195  outputs.insert( QStringLiteral( "RETAINED_COUNT" ), outputFeatureIds.size() );
196  return outputs;
197 }
198 
QgsFeatureId id
Definition: qgsfeature.h:64
Wrapper for iterator of features from vector data provider or vector layer.
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
Base class for providing feedback from a processing algorithm.
void setProgress(double progress)
Sets the current progress for the feedback object.
Definition: qgsfeedback.h:64
qint64 QgsFeatureId
Definition: qgsfeatureid.h:25
A geometry is the spatial representation of a feature.
Definition: qgsgeometry.h:122
A numeric output for processing algorithms.
The feature class encapsulates a single feature including its id, geometry and a list of field/values...
Definition: qgsfeature.h:55
bool hasGeometry() const
Returns true if the feature has an associated geometry.
Definition: qgsfeature.cpp:197
A feature sink output for processing algorithms.
bool isGeosEqual(const QgsGeometry &) const
Compares the geometry with another geometry using GEOS.
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
This class wraps a request for features to a vector layer (or directly its vector data provider)...
Custom exception class for processing related exceptions.
Definition: qgsexception.h:82
A spatial index for QgsFeature objects.
QgsFeatureRequest & setFilterFids(const QgsFeatureIds &fids)
Sets feature IDs that should be fetched.
bool isCanceled() const
Tells whether the operation has been canceled already.
Definition: qgsfeedback.h:55
An input feature source (such as vector layers) parameter for processing algorithms.
QgsRectangle boundingBox() const
Returns the bounding box of the geometry.
QgsGeometry geometry
Definition: qgsfeature.h:67
QList< int > QgsAttributeList
Definition: qgsfield.h:26
bool nextFeature(QgsFeature &f)
Geometry is not required. It may still be returned if e.g. required for a filter condition.
Contains information about the context in which a processing algorithm is executed.
QgsFeatureRequest & setFlags(QgsFeatureRequest::Flags flags)
Sets flags that affect how features will be fetched.