QGIS API Documentation 4.1.0-Master (5bf3c20f3c9)
Loading...
Searching...
No Matches
qgsalgorithmremoveduplicatesbyattribute.cpp
Go to the documentation of this file.
1/***************************************************************************
2 qgsalgorithmremoveduplicatesbyattribute.cpp
3 ----------------------------------
4 begin : October 2018
5 copyright : (C) 2018 by Nyall Dawson
6 email : nyall dot dawson at gmail dot com
7 ***************************************************************************/
8
9/***************************************************************************
10 * *
11 * This program is free software; you can redistribute it and/or modify *
12 * it under the terms of the GNU General Public License as published by *
13 * the Free Software Foundation; either version 2 of the License, or *
14 * (at your option) any later version. *
15 * *
16 ***************************************************************************/
17
19
20#include <QString>
21
22using namespace Qt::StringLiterals;
23
25
26QString QgsRemoveDuplicatesByAttributeAlgorithm::name() const
27{
28 return u"removeduplicatesbyattribute"_s;
29}
30
31QString QgsRemoveDuplicatesByAttributeAlgorithm::displayName() const
32{
33 return QObject::tr( "Delete duplicates by attribute" );
34}
35
36QStringList QgsRemoveDuplicatesByAttributeAlgorithm::tags() const
37{
38 return QObject::tr( "drop,remove,field,value,same,filter" ).split( ',' );
39}
40
41QString QgsRemoveDuplicatesByAttributeAlgorithm::group() const
42{
43 return QObject::tr( "Vector general" );
44}
45
46QString QgsRemoveDuplicatesByAttributeAlgorithm::groupId() const
47{
48 return u"vectorgeneral"_s;
49}
50
51void QgsRemoveDuplicatesByAttributeAlgorithm::initAlgorithm( const QVariantMap & )
52{
53 addParameter( new QgsProcessingParameterFeatureSource( u"INPUT"_s, QObject::tr( "Input layer" ), QList<int>() << static_cast<int>( Qgis::ProcessingSourceType::Vector ) ) );
54 addParameter( new QgsProcessingParameterField( u"FIELDS"_s, QObject::tr( "Field to match duplicates by" ), QVariant(), u"INPUT"_s, Qgis::ProcessingFieldParameterDataType::Any, true ) );
55
56 addParameter( new QgsProcessingParameterFeatureSink( u"OUTPUT"_s, QObject::tr( "Filtered (no duplicates)" ) ) );
58 = new QgsProcessingParameterFeatureSink( u"DUPLICATES"_s, QObject::tr( "Filtered (duplicates)" ), Qgis::ProcessingSourceType::VectorAnyGeometry, QVariant(), true );
59 failOutput->setCreateByDefault( false );
60 addParameter( failOutput );
61
62 addOutput( new QgsProcessingOutputNumber( u"RETAINED_COUNT"_s, QObject::tr( "Count of retained records" ) ) );
63 addOutput( new QgsProcessingOutputNumber( u"DUPLICATE_COUNT"_s, QObject::tr( "Count of discarded duplicate records" ) ) );
64}
65
66QString QgsRemoveDuplicatesByAttributeAlgorithm::shortHelpString() const
67{
68 return QObject::tr(
69 "This algorithm removes duplicate rows by a field value (or multiple field values). The first matching row will be retained, and duplicates will be discarded.\n\n"
70 "Optionally, these duplicate records can be saved to a separate output for analysis."
71 );
72}
73
74QString QgsRemoveDuplicatesByAttributeAlgorithm::shortDescription() const
75{
76 return QObject::tr( "Removes duplicate rows by a field value (or multiple field values)." );
77}
78
79QgsRemoveDuplicatesByAttributeAlgorithm *QgsRemoveDuplicatesByAttributeAlgorithm::createInstance() const
80{
81 return new QgsRemoveDuplicatesByAttributeAlgorithm();
82}
83
84QVariantMap QgsRemoveDuplicatesByAttributeAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
85{
86 std::unique_ptr<QgsProcessingFeatureSource> source( parameterAsSource( parameters, u"INPUT"_s, context ) );
87 if ( !source )
88 throw QgsProcessingException( invalidSourceError( parameters, u"INPUT"_s ) );
89
90 const QStringList fieldNames = parameterAsStrings( parameters, u"FIELDS"_s, context );
91
92 QgsAttributeList attributes;
93 for ( const QString &field : fieldNames )
94 {
95 const int index = source->fields().lookupField( field );
96 if ( index < 0 )
97 feedback->reportError( QObject::tr( "Field %1 not found in INPUT layer, skipping" ).arg( field ) );
98 else
99 attributes.append( index );
100 }
101 if ( attributes.isEmpty() )
102 throw QgsProcessingException( QObject::tr( "No input fields found" ) );
103
104
105 QString noDupeSinkId;
106 std::unique_ptr<QgsFeatureSink> noDupeSink( parameterAsSink( parameters, u"OUTPUT"_s, context, noDupeSinkId, source->fields(), source->wkbType(), source->sourceCrs() ) );
107 if ( !noDupeSink )
108 throw QgsProcessingException( invalidSinkError( parameters, u"OUTPUT"_s ) );
109
110 QString dupeSinkId;
111 std::unique_ptr<QgsFeatureSink> dupesSink( parameterAsSink( parameters, u"DUPLICATES"_s, context, dupeSinkId, source->fields(), source->wkbType(), source->sourceCrs() ) );
112
113 const long count = source->featureCount();
114 const double step = count > 0 ? 100.0 / count : 1;
115 int current = 0;
116
117 long long keptCount = 0;
118 long long discardedCount = 0;
119
120 QSet<QVariantList> matched;
121
123 QgsFeature f;
124
125 QVariantList dupeKey;
126 dupeKey.reserve( attributes.size() );
127 for ( const int i : attributes )
128 {
129 ( void ) i;
130 dupeKey.append( QVariant() );
131 }
132
133 while ( it.nextFeature( f ) )
134 {
135 if ( feedback->isCanceled() )
136 {
137 break;
138 }
139
140 int i = 0;
141 for ( const int attr : attributes )
142 dupeKey[i++] = f.attribute( attr );
143
144 if ( matched.contains( dupeKey ) )
145 {
146 // duplicate
147 discardedCount++;
148 if ( dupesSink )
149 {
150 if ( !dupesSink->addFeature( f, QgsFeatureSink::FastInsert ) )
151 throw QgsProcessingException( writeFeatureError( dupesSink.get(), parameters, u"DUPLICATES"_s ) );
152 }
153 }
154 else
155 {
156 // not duplicate
157 keptCount++;
158 matched.insert( dupeKey );
159 if ( !noDupeSink->addFeature( f, QgsFeatureSink::FastInsert ) )
160 throw QgsProcessingException( writeFeatureError( noDupeSink.get(), parameters, u"OUTPUT"_s ) );
161 }
162
163 feedback->setProgress( current * step );
164 current++;
165 }
166
167 if ( noDupeSink )
168 noDupeSink->finalize();
169
170 QVariantMap outputs;
171 outputs.insert( u"RETAINED_COUNT"_s, keptCount );
172 outputs.insert( u"DUPLICATE_COUNT"_s, discardedCount );
173 outputs.insert( u"OUTPUT"_s, noDupeSinkId );
174 if ( dupesSink )
175 {
176 dupesSink->finalize();
177 outputs.insert( u"DUPLICATES"_s, dupeSinkId );
178 }
179 return outputs;
180}
181
@ Vector
Tables (i.e. vector layers with or without geometry). When used for a sink this indicates the sink ha...
Definition qgis.h:3653
@ VectorAnyGeometry
Any vector layer with geometry.
Definition qgis.h:3647
@ SkipGeometryValidityChecks
Invalid geometry checks should always be skipped. This flag can be useful for algorithms which always...
Definition qgis.h:3828
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
Fetch next feature and stores in f, returns true on success.
Wraps a request for features to a vector layer (or directly its vector data provider).
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
Definition qgsfeature.h:60
Q_INVOKABLE QVariant attribute(const QString &name) const
Lookup attribute value by attribute name.
bool isCanceled() const
Tells whether the operation has been canceled already.
Definition qgsfeedback.h:56
void setProgress(double progress)
Sets the current progress for the feedback object.
Definition qgsfeedback.h:65
Contains information about the context in which a processing algorithm is executed.
void setCreateByDefault(bool createByDefault)
Sets whether the destination should be created by default.
Custom exception class for processing related exceptions.
Base class for providing feedback from a processing algorithm.
virtual void reportError(const QString &error, bool fatalError=false)
Reports that the algorithm encountered an error while executing.
A numeric output for processing algorithms.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A vector layer or feature source field parameter for processing algorithms.
QList< int > QgsAttributeList
Definition qgsfield.h:30