QGIS API Documentation 3.28.0-Firenze (ed3ad0430f)
qgsalgorithmremoveduplicatesbyattribute.cpp
Go to the documentation of this file.
1/***************************************************************************
2 qgsalgorithmremoveduplicatesbyattribute.cpp
3 ----------------------------------
4 begin : October 2018
5 copyright : (C) 2018 by Nyall Dawson
6 email : nyall dot dawson at gmail dot com
7 ***************************************************************************/
8
9/***************************************************************************
10 * *
11 * This program is free software; you can redistribute it and/or modify *
12 * it under the terms of the GNU General Public License as published by *
13 * the Free Software Foundation; either version 2 of the License, or *
14 * (at your option) any later version. *
15 * *
16 ***************************************************************************/
17
19
21
22QString QgsRemoveDuplicatesByAttributeAlgorithm::name() const
23{
24 return QStringLiteral( "removeduplicatesbyattribute" );
25}
26
27QString QgsRemoveDuplicatesByAttributeAlgorithm::displayName() const
28{
29 return QObject::tr( "Delete duplicates by attribute" );
30}
31
32QStringList QgsRemoveDuplicatesByAttributeAlgorithm::tags() const
33{
34 return QObject::tr( "drop,remove,field,value,same,filter" ).split( ',' );
35}
36
37QString QgsRemoveDuplicatesByAttributeAlgorithm::group() const
38{
39 return QObject::tr( "Vector general" );
40}
41
42QString QgsRemoveDuplicatesByAttributeAlgorithm::groupId() const
43{
44 return QStringLiteral( "vectorgeneral" );
45}
46
47void QgsRemoveDuplicatesByAttributeAlgorithm::initAlgorithm( const QVariantMap & )
48{
49 addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ), QObject::tr( "Input layer" ),
50 QList< int >() << QgsProcessing::TypeVector ) );
51 addParameter( new QgsProcessingParameterField( QStringLiteral( "FIELDS" ), QObject::tr( "Field to match duplicates by" ), QVariant(), QStringLiteral( "INPUT" ), QgsProcessingParameterField::Any, true ) );
52
53 addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Filtered (no duplicates)" ) ) );
54 QgsProcessingParameterFeatureSink *failOutput = new QgsProcessingParameterFeatureSink( QStringLiteral( "DUPLICATES" ), QObject::tr( "Filtered (duplicates)" ),
55 QgsProcessing::TypeVectorAnyGeometry, QVariant(), true );
56 failOutput->setCreateByDefault( false );
57 addParameter( failOutput );
58
59 addOutput( new QgsProcessingOutputNumber( QStringLiteral( "RETAINED_COUNT" ), QObject::tr( "Count of retained records" ) ) );
60 addOutput( new QgsProcessingOutputNumber( QStringLiteral( "DUPLICATE_COUNT" ), QObject::tr( "Count of discarded duplicate records" ) ) );
61}
62
63QString QgsRemoveDuplicatesByAttributeAlgorithm::shortHelpString() const
64{
65 return QObject::tr( "Removes duplicate rows by a field value (or multiple field values). The first matching row will be retained, and duplicates will be discarded.\n\n"
66 "Optionally, these duplicate records can be saved to a separate output for analysis." );
67}
68
69QString QgsRemoveDuplicatesByAttributeAlgorithm::shortDescription() const
70{
71 return QObject::tr( "Removes duplicate rows by a field value (or multiple field values)." );
72}
73
74QgsRemoveDuplicatesByAttributeAlgorithm *QgsRemoveDuplicatesByAttributeAlgorithm::createInstance() const
75{
76 return new QgsRemoveDuplicatesByAttributeAlgorithm();
77}
78
79QVariantMap QgsRemoveDuplicatesByAttributeAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
80{
81 std::unique_ptr< QgsProcessingFeatureSource > source( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) );
82 if ( !source )
83 throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );
84
85 const QStringList fieldNames = parameterAsFields( parameters, QStringLiteral( "FIELDS" ), context );
86
87 QgsAttributeList attributes;
88 for ( const QString &field : fieldNames )
89 {
90 const int index = source->fields().lookupField( field );
91 if ( index < 0 )
92 feedback->reportError( QObject::tr( "Field %1 not found in INPUT layer, skipping" ).arg( field ) );
93 else
94 attributes.append( index );
95 }
96 if ( attributes.isEmpty() )
97 throw QgsProcessingException( QObject::tr( "No input fields found" ) );
98
99
100 QString noDupeSinkId;
101 std::unique_ptr< QgsFeatureSink > noDupeSink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, noDupeSinkId, source->fields(),
102 source->wkbType(), source->sourceCrs() ) );
103 if ( !noDupeSink )
104 throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) );
105
106 QString dupeSinkId;
107 std::unique_ptr< QgsFeatureSink > dupesSink( parameterAsSink( parameters, QStringLiteral( "DUPLICATES" ), context, dupeSinkId, source->fields(),
108 source->wkbType(), source->sourceCrs() ) );
109
110 const long count = source->featureCount();
111 const double step = count > 0 ? 100.0 / count : 1;
112 int current = 0;
113
114 long long keptCount = 0;
115 long long discardedCount = 0;
116
117 QSet< QVariantList > matched;
118
120 QgsFeature f;
121
122 QVariantList dupeKey;
123 dupeKey.reserve( attributes.size() );
124 for ( const int i : attributes )
125 {
126 ( void )i;
127 dupeKey.append( QVariant() );
128 }
129
130 while ( it.nextFeature( f ) )
131 {
132 if ( feedback->isCanceled() )
133 {
134 break;
135 }
136
137 int i = 0;
138 for ( const int attr : attributes )
139 dupeKey[i++] = f.attribute( attr );
140
141 if ( matched.contains( dupeKey ) )
142 {
143 // duplicate
144 discardedCount++;
145 if ( dupesSink )
146 {
147 if ( !dupesSink->addFeature( f, QgsFeatureSink::FastInsert ) )
148 throw QgsProcessingException( writeFeatureError( dupesSink.get(), parameters, QStringLiteral( "DUPLICATES" ) ) );
149 }
150 }
151 else
152 {
153 // not duplicate
154 keptCount++;
155 matched.insert( dupeKey );
156 if ( !noDupeSink->addFeature( f, QgsFeatureSink::FastInsert ) )
157 throw QgsProcessingException( writeFeatureError( noDupeSink.get(), parameters, QStringLiteral( "OUTPUT" ) ) );
158 }
159
160 feedback->setProgress( current * step );
161 current++;
162 }
163
164 QVariantMap outputs;
165 outputs.insert( QStringLiteral( "RETAINED_COUNT" ), keptCount );
166 outputs.insert( QStringLiteral( "DUPLICATE_COUNT" ), discardedCount );
167 outputs.insert( QStringLiteral( "OUTPUT" ), noDupeSinkId );
168 if ( dupesSink )
169 outputs.insert( QStringLiteral( "DUPLICATES" ), dupeSinkId );
170 return outputs;
171}
172
174
175
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
This class wraps a request for features to a vector layer (or directly its vector data provider).
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
Definition: qgsfeature.h:56
QVariant attribute(const QString &name) const
Lookup attribute value by attribute name.
Definition: qgsfeature.cpp:338
bool isCanceled() const SIP_HOLDGIL
Tells whether the operation has been canceled already.
Definition: qgsfeedback.h:54
void setProgress(double progress)
Sets the current progress for the feedback object.
Definition: qgsfeedback.h:63
Contains information about the context in which a processing algorithm is executed.
void setCreateByDefault(bool createByDefault)
Sets whether the destination should be created by default.
Custom exception class for processing related exceptions.
Definition: qgsexception.h:83
@ FlagSkipGeometryValidityChecks
Invalid geometry checks should always be skipped. This flag can be useful for algorithms which always...
Base class for providing feedback from a processing algorithm.
virtual void reportError(const QString &error, bool fatalError=false)
Reports that the algorithm encountered an error while executing.
A numeric output for processing algorithms.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A vector layer or feature source field parameter for processing algorithms.
@ TypeVector
Tables (i.e. vector layers with or without geometry). When used for a sink this indicates the sink ha...
Definition: qgsprocessing.h:54
@ TypeVectorAnyGeometry
Any vector layer with geometry.
Definition: qgsprocessing.h:48
QList< int > QgsAttributeList
Definition: qgsfield.h:26
const QgsField & field
Definition: qgsfield.h:463