QGIS API Documentation 3.99.0-Master (357b655ed83)
Loading...
Searching...
No Matches
qgsalgorithmremoveduplicatesbyattribute.cpp
Go to the documentation of this file.
1/***************************************************************************
2 qgsalgorithmremoveduplicatesbyattribute.cpp
3 ----------------------------------
4 begin : October 2018
5 copyright : (C) 2018 by Nyall Dawson
6 email : nyall dot dawson at gmail dot com
7 ***************************************************************************/
8
9/***************************************************************************
10 * *
11 * This program is free software; you can redistribute it and/or modify *
12 * it under the terms of the GNU General Public License as published by *
13 * the Free Software Foundation; either version 2 of the License, or *
14 * (at your option) any later version. *
15 * *
16 ***************************************************************************/
17
19
20#include <QString>
21
22using namespace Qt::StringLiterals;
23
25
26QString QgsRemoveDuplicatesByAttributeAlgorithm::name() const
27{
28 return u"removeduplicatesbyattribute"_s;
29}
30
31QString QgsRemoveDuplicatesByAttributeAlgorithm::displayName() const
32{
33 return QObject::tr( "Delete duplicates by attribute" );
34}
35
36QStringList QgsRemoveDuplicatesByAttributeAlgorithm::tags() const
37{
38 return QObject::tr( "drop,remove,field,value,same,filter" ).split( ',' );
39}
40
41QString QgsRemoveDuplicatesByAttributeAlgorithm::group() const
42{
43 return QObject::tr( "Vector general" );
44}
45
46QString QgsRemoveDuplicatesByAttributeAlgorithm::groupId() const
47{
48 return u"vectorgeneral"_s;
49}
50
51void QgsRemoveDuplicatesByAttributeAlgorithm::initAlgorithm( const QVariantMap & )
52{
53 addParameter( new QgsProcessingParameterFeatureSource( u"INPUT"_s, QObject::tr( "Input layer" ), QList<int>() << static_cast<int>( Qgis::ProcessingSourceType::Vector ) ) );
54 addParameter( new QgsProcessingParameterField( u"FIELDS"_s, QObject::tr( "Field to match duplicates by" ), QVariant(), u"INPUT"_s, Qgis::ProcessingFieldParameterDataType::Any, true ) );
55
56 addParameter( new QgsProcessingParameterFeatureSink( u"OUTPUT"_s, QObject::tr( "Filtered (no duplicates)" ) ) );
57 QgsProcessingParameterFeatureSink *failOutput = new QgsProcessingParameterFeatureSink( u"DUPLICATES"_s, QObject::tr( "Filtered (duplicates)" ), Qgis::ProcessingSourceType::VectorAnyGeometry, QVariant(), true );
58 failOutput->setCreateByDefault( false );
59 addParameter( failOutput );
60
61 addOutput( new QgsProcessingOutputNumber( u"RETAINED_COUNT"_s, QObject::tr( "Count of retained records" ) ) );
62 addOutput( new QgsProcessingOutputNumber( u"DUPLICATE_COUNT"_s, QObject::tr( "Count of discarded duplicate records" ) ) );
63}
64
65QString QgsRemoveDuplicatesByAttributeAlgorithm::shortHelpString() const
66{
67 return QObject::tr( "This algorithm removes duplicate rows by a field value (or multiple field values). The first matching row will be retained, and duplicates will be discarded.\n\n"
68 "Optionally, these duplicate records can be saved to a separate output for analysis." );
69}
70
71QString QgsRemoveDuplicatesByAttributeAlgorithm::shortDescription() const
72{
73 return QObject::tr( "Removes duplicate rows by a field value (or multiple field values)." );
74}
75
76QgsRemoveDuplicatesByAttributeAlgorithm *QgsRemoveDuplicatesByAttributeAlgorithm::createInstance() const
77{
78 return new QgsRemoveDuplicatesByAttributeAlgorithm();
79}
80
81QVariantMap QgsRemoveDuplicatesByAttributeAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
82{
83 std::unique_ptr<QgsProcessingFeatureSource> source( parameterAsSource( parameters, u"INPUT"_s, context ) );
84 if ( !source )
85 throw QgsProcessingException( invalidSourceError( parameters, u"INPUT"_s ) );
86
87 const QStringList fieldNames = parameterAsStrings( parameters, u"FIELDS"_s, context );
88
89 QgsAttributeList attributes;
90 for ( const QString &field : fieldNames )
91 {
92 const int index = source->fields().lookupField( field );
93 if ( index < 0 )
94 feedback->reportError( QObject::tr( "Field %1 not found in INPUT layer, skipping" ).arg( field ) );
95 else
96 attributes.append( index );
97 }
98 if ( attributes.isEmpty() )
99 throw QgsProcessingException( QObject::tr( "No input fields found" ) );
100
101
102 QString noDupeSinkId;
103 std::unique_ptr<QgsFeatureSink> noDupeSink( parameterAsSink( parameters, u"OUTPUT"_s, context, noDupeSinkId, source->fields(), source->wkbType(), source->sourceCrs() ) );
104 if ( !noDupeSink )
105 throw QgsProcessingException( invalidSinkError( parameters, u"OUTPUT"_s ) );
106
107 QString dupeSinkId;
108 std::unique_ptr<QgsFeatureSink> dupesSink( parameterAsSink( parameters, u"DUPLICATES"_s, context, dupeSinkId, source->fields(), source->wkbType(), source->sourceCrs() ) );
109
110 const long count = source->featureCount();
111 const double step = count > 0 ? 100.0 / count : 1;
112 int current = 0;
113
114 long long keptCount = 0;
115 long long discardedCount = 0;
116
117 QSet<QVariantList> matched;
118
120 QgsFeature f;
121
122 QVariantList dupeKey;
123 dupeKey.reserve( attributes.size() );
124 for ( const int i : attributes )
125 {
126 ( void ) i;
127 dupeKey.append( QVariant() );
128 }
129
130 while ( it.nextFeature( f ) )
131 {
132 if ( feedback->isCanceled() )
133 {
134 break;
135 }
136
137 int i = 0;
138 for ( const int attr : attributes )
139 dupeKey[i++] = f.attribute( attr );
140
141 if ( matched.contains( dupeKey ) )
142 {
143 // duplicate
144 discardedCount++;
145 if ( dupesSink )
146 {
147 if ( !dupesSink->addFeature( f, QgsFeatureSink::FastInsert ) )
148 throw QgsProcessingException( writeFeatureError( dupesSink.get(), parameters, u"DUPLICATES"_s ) );
149 }
150 }
151 else
152 {
153 // not duplicate
154 keptCount++;
155 matched.insert( dupeKey );
156 if ( !noDupeSink->addFeature( f, QgsFeatureSink::FastInsert ) )
157 throw QgsProcessingException( writeFeatureError( noDupeSink.get(), parameters, u"OUTPUT"_s ) );
158 }
159
160 feedback->setProgress( current * step );
161 current++;
162 }
163
164 if ( noDupeSink )
165 noDupeSink->finalize();
166
167 QVariantMap outputs;
168 outputs.insert( u"RETAINED_COUNT"_s, keptCount );
169 outputs.insert( u"DUPLICATE_COUNT"_s, discardedCount );
170 outputs.insert( u"OUTPUT"_s, noDupeSinkId );
171 if ( dupesSink )
172 {
173 dupesSink->finalize();
174 outputs.insert( u"DUPLICATES"_s, dupeSinkId );
175 }
176 return outputs;
177}
178
@ Vector
Tables (i.e. vector layers with or without geometry). When used for a sink this indicates the sink ha...
Definition qgis.h:3610
@ VectorAnyGeometry
Any vector layer with geometry.
Definition qgis.h:3604
@ SkipGeometryValidityChecks
Invalid geometry checks should always be skipped. This flag can be useful for algorithms which always...
Definition qgis.h:3782
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
Fetch next feature and stores in f, returns true on success.
Wraps a request for features to a vector layer (or directly its vector data provider).
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
Definition qgsfeature.h:60
Q_INVOKABLE QVariant attribute(const QString &name) const
Lookup attribute value by attribute name.
bool isCanceled() const
Tells whether the operation has been canceled already.
Definition qgsfeedback.h:55
void setProgress(double progress)
Sets the current progress for the feedback object.
Definition qgsfeedback.h:63
Contains information about the context in which a processing algorithm is executed.
void setCreateByDefault(bool createByDefault)
Sets whether the destination should be created by default.
Custom exception class for processing related exceptions.
Base class for providing feedback from a processing algorithm.
virtual void reportError(const QString &error, bool fatalError=false)
Reports that the algorithm encountered an error while executing.
A numeric output for processing algorithms.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A vector layer or feature source field parameter for processing algorithms.
QList< int > QgsAttributeList
Definition qgsfield.h:30