QGIS API Documentation 3.41.0-Master (cea29feecf2)
Loading...
Searching...
No Matches
qgsalgorithmremoveduplicatesbyattribute.cpp
Go to the documentation of this file.
1/***************************************************************************
2 qgsalgorithmremoveduplicatesbyattribute.cpp
3 ----------------------------------
4 begin : October 2018
5 copyright : (C) 2018 by Nyall Dawson
6 email : nyall dot dawson at gmail dot com
7 ***************************************************************************/
8
9/***************************************************************************
10 * *
11 * This program is free software; you can redistribute it and/or modify *
12 * it under the terms of the GNU General Public License as published by *
13 * the Free Software Foundation; either version 2 of the License, or *
14 * (at your option) any later version. *
15 * *
16 ***************************************************************************/
17
19
21
22QString QgsRemoveDuplicatesByAttributeAlgorithm::name() const
23{
24 return QStringLiteral( "removeduplicatesbyattribute" );
25}
26
27QString QgsRemoveDuplicatesByAttributeAlgorithm::displayName() const
28{
29 return QObject::tr( "Delete duplicates by attribute" );
30}
31
32QStringList QgsRemoveDuplicatesByAttributeAlgorithm::tags() const
33{
34 return QObject::tr( "drop,remove,field,value,same,filter" ).split( ',' );
35}
36
37QString QgsRemoveDuplicatesByAttributeAlgorithm::group() const
38{
39 return QObject::tr( "Vector general" );
40}
41
42QString QgsRemoveDuplicatesByAttributeAlgorithm::groupId() const
43{
44 return QStringLiteral( "vectorgeneral" );
45}
46
47void QgsRemoveDuplicatesByAttributeAlgorithm::initAlgorithm( const QVariantMap & )
48{
49 addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ), QObject::tr( "Input layer" ), QList<int>() << static_cast<int>( Qgis::ProcessingSourceType::Vector ) ) );
50 addParameter( new QgsProcessingParameterField( QStringLiteral( "FIELDS" ), QObject::tr( "Field to match duplicates by" ), QVariant(), QStringLiteral( "INPUT" ), Qgis::ProcessingFieldParameterDataType::Any, true ) );
51
52 addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Filtered (no duplicates)" ) ) );
53 QgsProcessingParameterFeatureSink *failOutput = new QgsProcessingParameterFeatureSink( QStringLiteral( "DUPLICATES" ), QObject::tr( "Filtered (duplicates)" ), Qgis::ProcessingSourceType::VectorAnyGeometry, QVariant(), true );
54 failOutput->setCreateByDefault( false );
55 addParameter( failOutput );
56
57 addOutput( new QgsProcessingOutputNumber( QStringLiteral( "RETAINED_COUNT" ), QObject::tr( "Count of retained records" ) ) );
58 addOutput( new QgsProcessingOutputNumber( QStringLiteral( "DUPLICATE_COUNT" ), QObject::tr( "Count of discarded duplicate records" ) ) );
59}
60
61QString QgsRemoveDuplicatesByAttributeAlgorithm::shortHelpString() const
62{
63 return QObject::tr( "Removes duplicate rows by a field value (or multiple field values). The first matching row will be retained, and duplicates will be discarded.\n\n"
64 "Optionally, these duplicate records can be saved to a separate output for analysis." );
65}
66
67QString QgsRemoveDuplicatesByAttributeAlgorithm::shortDescription() const
68{
69 return QObject::tr( "Removes duplicate rows by a field value (or multiple field values)." );
70}
71
72QgsRemoveDuplicatesByAttributeAlgorithm *QgsRemoveDuplicatesByAttributeAlgorithm::createInstance() const
73{
74 return new QgsRemoveDuplicatesByAttributeAlgorithm();
75}
76
77QVariantMap QgsRemoveDuplicatesByAttributeAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
78{
79 std::unique_ptr<QgsProcessingFeatureSource> source( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) );
80 if ( !source )
81 throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );
82
83 const QStringList fieldNames = parameterAsStrings( parameters, QStringLiteral( "FIELDS" ), context );
84
85 QgsAttributeList attributes;
86 for ( const QString &field : fieldNames )
87 {
88 const int index = source->fields().lookupField( field );
89 if ( index < 0 )
90 feedback->reportError( QObject::tr( "Field %1 not found in INPUT layer, skipping" ).arg( field ) );
91 else
92 attributes.append( index );
93 }
94 if ( attributes.isEmpty() )
95 throw QgsProcessingException( QObject::tr( "No input fields found" ) );
96
97
98 QString noDupeSinkId;
99 std::unique_ptr<QgsFeatureSink> noDupeSink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, noDupeSinkId, source->fields(), source->wkbType(), source->sourceCrs() ) );
100 if ( !noDupeSink )
101 throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) );
102
103 QString dupeSinkId;
104 std::unique_ptr<QgsFeatureSink> dupesSink( parameterAsSink( parameters, QStringLiteral( "DUPLICATES" ), context, dupeSinkId, source->fields(), source->wkbType(), source->sourceCrs() ) );
105
106 const long count = source->featureCount();
107 const double step = count > 0 ? 100.0 / count : 1;
108 int current = 0;
109
110 long long keptCount = 0;
111 long long discardedCount = 0;
112
113 QSet<QVariantList> matched;
114
116 QgsFeature f;
117
118 QVariantList dupeKey;
119 dupeKey.reserve( attributes.size() );
120 for ( const int i : attributes )
121 {
122 ( void ) i;
123 dupeKey.append( QVariant() );
124 }
125
126 while ( it.nextFeature( f ) )
127 {
128 if ( feedback->isCanceled() )
129 {
130 break;
131 }
132
133 int i = 0;
134 for ( const int attr : attributes )
135 dupeKey[i++] = f.attribute( attr );
136
137 if ( matched.contains( dupeKey ) )
138 {
139 // duplicate
140 discardedCount++;
141 if ( dupesSink )
142 {
143 if ( !dupesSink->addFeature( f, QgsFeatureSink::FastInsert ) )
144 throw QgsProcessingException( writeFeatureError( dupesSink.get(), parameters, QStringLiteral( "DUPLICATES" ) ) );
145 }
146 }
147 else
148 {
149 // not duplicate
150 keptCount++;
151 matched.insert( dupeKey );
152 if ( !noDupeSink->addFeature( f, QgsFeatureSink::FastInsert ) )
153 throw QgsProcessingException( writeFeatureError( noDupeSink.get(), parameters, QStringLiteral( "OUTPUT" ) ) );
154 }
155
156 feedback->setProgress( current * step );
157 current++;
158 }
159
160 if ( noDupeSink )
161 noDupeSink->finalize();
162
163 QVariantMap outputs;
164 outputs.insert( QStringLiteral( "RETAINED_COUNT" ), keptCount );
165 outputs.insert( QStringLiteral( "DUPLICATE_COUNT" ), discardedCount );
166 outputs.insert( QStringLiteral( "OUTPUT" ), noDupeSinkId );
167 if ( dupesSink )
168 {
169 dupesSink->finalize();
170 outputs.insert( QStringLiteral( "DUPLICATES" ), dupeSinkId );
171 }
172 return outputs;
173}
174
@ Vector
Tables (i.e. vector layers with or without geometry). When used for a sink this indicates the sink ha...
@ VectorAnyGeometry
Any vector layer with geometry.
@ SkipGeometryValidityChecks
Invalid geometry checks should always be skipped. This flag can be useful for algorithms which always...
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
Fetch next feature and stores in f, returns true on success.
This class wraps a request for features to a vector layer (or directly its vector data provider).
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
Definition qgsfeature.h:58
bool isCanceled() const
Tells whether the operation has been canceled already.
Definition qgsfeedback.h:53
void setProgress(double progress)
Sets the current progress for the feedback object.
Definition qgsfeedback.h:61
Contains information about the context in which a processing algorithm is executed.
void setCreateByDefault(bool createByDefault)
Sets whether the destination should be created by default.
Custom exception class for processing related exceptions.
Base class for providing feedback from a processing algorithm.
virtual void reportError(const QString &error, bool fatalError=false)
Reports that the algorithm encountered an error while executing.
A numeric output for processing algorithms.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A vector layer or feature source field parameter for processing algorithms.
QList< int > QgsAttributeList
Definition qgsfield.h:27