QGIS API Documentation 3.99.0-Master (2fe06baccd8)
Loading...
Searching...
No Matches
qgsalgorithmremoveduplicatesbyattribute.cpp
Go to the documentation of this file.
1/***************************************************************************
2 qgsalgorithmremoveduplicatesbyattribute.cpp
3 ----------------------------------
4 begin : October 2018
5 copyright : (C) 2018 by Nyall Dawson
6 email : nyall dot dawson at gmail dot com
7 ***************************************************************************/
8
9/***************************************************************************
10 * *
11 * This program is free software; you can redistribute it and/or modify *
12 * it under the terms of the GNU General Public License as published by *
13 * the Free Software Foundation; either version 2 of the License, or *
14 * (at your option) any later version. *
15 * *
16 ***************************************************************************/
17
19
21
22QString QgsRemoveDuplicatesByAttributeAlgorithm::name() const
23{
24 return QStringLiteral( "removeduplicatesbyattribute" );
25}
26
27QString QgsRemoveDuplicatesByAttributeAlgorithm::displayName() const
28{
29 return QObject::tr( "Delete duplicates by attribute" );
30}
31
32QStringList QgsRemoveDuplicatesByAttributeAlgorithm::tags() const
33{
34 return QObject::tr( "drop,remove,field,value,same,filter" ).split( ',' );
35}
36
37QString QgsRemoveDuplicatesByAttributeAlgorithm::group() const
38{
39 return QObject::tr( "Vector general" );
40}
41
42QString QgsRemoveDuplicatesByAttributeAlgorithm::groupId() const
43{
44 return QStringLiteral( "vectorgeneral" );
45}
46
47void QgsRemoveDuplicatesByAttributeAlgorithm::initAlgorithm( const QVariantMap & )
48{
49 addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ), QObject::tr( "Input layer" ), QList<int>() << static_cast<int>( Qgis::ProcessingSourceType::Vector ) ) );
50 addParameter( new QgsProcessingParameterField( QStringLiteral( "FIELDS" ), QObject::tr( "Field to match duplicates by" ), QVariant(), QStringLiteral( "INPUT" ), Qgis::ProcessingFieldParameterDataType::Any, true ) );
51
52 addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Filtered (no duplicates)" ) ) );
53 QgsProcessingParameterFeatureSink *failOutput = new QgsProcessingParameterFeatureSink( QStringLiteral( "DUPLICATES" ), QObject::tr( "Filtered (duplicates)" ), Qgis::ProcessingSourceType::VectorAnyGeometry, QVariant(), true );
54 failOutput->setCreateByDefault( false );
55 addParameter( failOutput );
56
57 addOutput( new QgsProcessingOutputNumber( QStringLiteral( "RETAINED_COUNT" ), QObject::tr( "Count of retained records" ) ) );
58 addOutput( new QgsProcessingOutputNumber( QStringLiteral( "DUPLICATE_COUNT" ), QObject::tr( "Count of discarded duplicate records" ) ) );
59}
60
61QString QgsRemoveDuplicatesByAttributeAlgorithm::shortHelpString() const
62{
63 return QObject::tr( "This algorithm removes duplicate rows by a field value (or multiple field values). The first matching row will be retained, and duplicates will be discarded.\n\n"
64 "Optionally, these duplicate records can be saved to a separate output for analysis." );
65}
66
67QString QgsRemoveDuplicatesByAttributeAlgorithm::shortDescription() const
68{
69 return QObject::tr( "Removes duplicate rows by a field value (or multiple field values)." );
70}
71
72QgsRemoveDuplicatesByAttributeAlgorithm *QgsRemoveDuplicatesByAttributeAlgorithm::createInstance() const
73{
74 return new QgsRemoveDuplicatesByAttributeAlgorithm();
75}
76
77QVariantMap QgsRemoveDuplicatesByAttributeAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
78{
79 std::unique_ptr<QgsProcessingFeatureSource> source( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) );
80 if ( !source )
81 throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );
82
83 const QStringList fieldNames = parameterAsStrings( parameters, QStringLiteral( "FIELDS" ), context );
84
85 QgsAttributeList attributes;
86 for ( const QString &field : fieldNames )
87 {
88 const int index = source->fields().lookupField( field );
89 if ( index < 0 )
90 feedback->reportError( QObject::tr( "Field %1 not found in INPUT layer, skipping" ).arg( field ) );
91 else
92 attributes.append( index );
93 }
94 if ( attributes.isEmpty() )
95 throw QgsProcessingException( QObject::tr( "No input fields found" ) );
96
97
98 QString noDupeSinkId;
99 std::unique_ptr<QgsFeatureSink> noDupeSink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, noDupeSinkId, source->fields(), source->wkbType(), source->sourceCrs() ) );
100 if ( !noDupeSink )
101 throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) );
102
103 QString dupeSinkId;
104 std::unique_ptr<QgsFeatureSink> dupesSink( parameterAsSink( parameters, QStringLiteral( "DUPLICATES" ), context, dupeSinkId, source->fields(), source->wkbType(), source->sourceCrs() ) );
105
106 const long count = source->featureCount();
107 const double step = count > 0 ? 100.0 / count : 1;
108 int current = 0;
109
110 long long keptCount = 0;
111 long long discardedCount = 0;
112
113 QSet<QVariantList> matched;
114
116 QgsFeature f;
117
118 QVariantList dupeKey;
119 dupeKey.reserve( attributes.size() );
120 for ( const int i : attributes )
121 {
122 ( void ) i;
123 dupeKey.append( QVariant() );
124 }
125
126 while ( it.nextFeature( f ) )
127 {
128 if ( feedback->isCanceled() )
129 {
130 break;
131 }
132
133 int i = 0;
134 for ( const int attr : attributes )
135 dupeKey[i++] = f.attribute( attr );
136
137 if ( matched.contains( dupeKey ) )
138 {
139 // duplicate
140 discardedCount++;
141 if ( dupesSink )
142 {
143 if ( !dupesSink->addFeature( f, QgsFeatureSink::FastInsert ) )
144 throw QgsProcessingException( writeFeatureError( dupesSink.get(), parameters, QStringLiteral( "DUPLICATES" ) ) );
145 }
146 }
147 else
148 {
149 // not duplicate
150 keptCount++;
151 matched.insert( dupeKey );
152 if ( !noDupeSink->addFeature( f, QgsFeatureSink::FastInsert ) )
153 throw QgsProcessingException( writeFeatureError( noDupeSink.get(), parameters, QStringLiteral( "OUTPUT" ) ) );
154 }
155
156 feedback->setProgress( current * step );
157 current++;
158 }
159
160 if ( noDupeSink )
161 noDupeSink->finalize();
162
163 QVariantMap outputs;
164 outputs.insert( QStringLiteral( "RETAINED_COUNT" ), keptCount );
165 outputs.insert( QStringLiteral( "DUPLICATE_COUNT" ), discardedCount );
166 outputs.insert( QStringLiteral( "OUTPUT" ), noDupeSinkId );
167 if ( dupesSink )
168 {
169 dupesSink->finalize();
170 outputs.insert( QStringLiteral( "DUPLICATES" ), dupeSinkId );
171 }
172 return outputs;
173}
174
@ Vector
Tables (i.e. vector layers with or without geometry). When used for a sink this indicates the sink ha...
Definition qgis.h:3539
@ VectorAnyGeometry
Any vector layer with geometry.
Definition qgis.h:3533
@ SkipGeometryValidityChecks
Invalid geometry checks should always be skipped. This flag can be useful for algorithms which always...
Definition qgis.h:3711
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
Fetch next feature and stores in f, returns true on success.
Wraps a request for features to a vector layer (or directly its vector data provider).
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
Definition qgsfeature.h:58
Q_INVOKABLE QVariant attribute(const QString &name) const
Lookup attribute value by attribute name.
bool isCanceled() const
Tells whether the operation has been canceled already.
Definition qgsfeedback.h:53
void setProgress(double progress)
Sets the current progress for the feedback object.
Definition qgsfeedback.h:61
Contains information about the context in which a processing algorithm is executed.
void setCreateByDefault(bool createByDefault)
Sets whether the destination should be created by default.
Custom exception class for processing related exceptions.
Base class for providing feedback from a processing algorithm.
virtual void reportError(const QString &error, bool fatalError=false)
Reports that the algorithm encountered an error while executing.
A numeric output for processing algorithms.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A vector layer or feature source field parameter for processing algorithms.
QList< int > QgsAttributeList
Definition qgsfield.h:28