QGIS API Documentation  3.26.3-Buenos Aires (65e4edfdad)
qgsalgorithmremoveduplicatesbyattribute.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  qgsalgorithmremoveduplicatesbyattribute.cpp
3  ----------------------------------
4  begin : October 2018
5  copyright : (C) 2018 by Nyall Dawson
6  email : nyall dot dawson at gmail dot com
7  ***************************************************************************/
8 
9 /***************************************************************************
10  * *
11  * This program is free software; you can redistribute it and/or modify *
12  * it under the terms of the GNU General Public License as published by *
13  * the Free Software Foundation; either version 2 of the License, or *
14  * (at your option) any later version. *
15  * *
16  ***************************************************************************/
17 
19 
21 
22 QString QgsRemoveDuplicatesByAttributeAlgorithm::name() const
23 {
24  return QStringLiteral( "removeduplicatesbyattribute" );
25 }
26 
27 QString QgsRemoveDuplicatesByAttributeAlgorithm::displayName() const
28 {
29  return QObject::tr( "Delete duplicates by attribute" );
30 }
31 
32 QStringList QgsRemoveDuplicatesByAttributeAlgorithm::tags() const
33 {
34  return QObject::tr( "drop,remove,field,value,same,filter" ).split( ',' );
35 }
36 
37 QString QgsRemoveDuplicatesByAttributeAlgorithm::group() const
38 {
39  return QObject::tr( "Vector general" );
40 }
41 
42 QString QgsRemoveDuplicatesByAttributeAlgorithm::groupId() const
43 {
44  return QStringLiteral( "vectorgeneral" );
45 }
46 
47 void QgsRemoveDuplicatesByAttributeAlgorithm::initAlgorithm( const QVariantMap & )
48 {
49  addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ), QObject::tr( "Input layer" ),
50  QList< int >() << QgsProcessing::TypeVector ) );
51  addParameter( new QgsProcessingParameterField( QStringLiteral( "FIELDS" ), QObject::tr( "Field to match duplicates by" ), QVariant(), QStringLiteral( "INPUT" ), QgsProcessingParameterField::Any, true ) );
52 
53  addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Filtered (no duplicates)" ) ) );
54  QgsProcessingParameterFeatureSink *failOutput = new QgsProcessingParameterFeatureSink( QStringLiteral( "DUPLICATES" ), QObject::tr( "Filtered (duplicates)" ),
55  QgsProcessing::TypeVectorAnyGeometry, QVariant(), true );
56  failOutput->setCreateByDefault( false );
57  addParameter( failOutput );
58 
59  addOutput( new QgsProcessingOutputNumber( QStringLiteral( "RETAINED_COUNT" ), QObject::tr( "Count of retained records" ) ) );
60  addOutput( new QgsProcessingOutputNumber( QStringLiteral( "DUPLICATE_COUNT" ), QObject::tr( "Count of discarded duplicate records" ) ) );
61 }
62 
63 QString QgsRemoveDuplicatesByAttributeAlgorithm::shortHelpString() const
64 {
65  return QObject::tr( "Removes duplicate rows by a field value (or multiple field values). The first matching row will be retained, and duplicates will be discarded.\n\n"
66  "Optionally, these duplicate records can be saved to a separate output for analysis." );
67 }
68 
69 QString QgsRemoveDuplicatesByAttributeAlgorithm::shortDescription() const
70 {
71  return QObject::tr( "Removes duplicate rows by a field value (or multiple field values)." );
72 }
73 
74 QgsRemoveDuplicatesByAttributeAlgorithm *QgsRemoveDuplicatesByAttributeAlgorithm::createInstance() const
75 {
76  return new QgsRemoveDuplicatesByAttributeAlgorithm();
77 }
78 
79 QVariantMap QgsRemoveDuplicatesByAttributeAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
80 {
81  std::unique_ptr< QgsProcessingFeatureSource > source( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) );
82  if ( !source )
83  throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );
84 
85  const QStringList fieldNames = parameterAsFields( parameters, QStringLiteral( "FIELDS" ), context );
86 
87  QgsAttributeList attributes;
88  for ( const QString &field : fieldNames )
89  {
90  const int index = source->fields().lookupField( field );
91  if ( index < 0 )
92  feedback->reportError( QObject::tr( "Field %1 not found in INPUT layer, skipping" ).arg( field ) );
93  else
94  attributes.append( index );
95  }
96  if ( attributes.isEmpty() )
97  throw QgsProcessingException( QObject::tr( "No input fields found" ) );
98 
99 
100  QString noDupeSinkId;
101  std::unique_ptr< QgsFeatureSink > noDupeSink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, noDupeSinkId, source->fields(),
102  source->wkbType(), source->sourceCrs() ) );
103  if ( !noDupeSink )
104  throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) );
105 
106  QString dupeSinkId;
107  std::unique_ptr< QgsFeatureSink > dupesSink( parameterAsSink( parameters, QStringLiteral( "DUPLICATES" ), context, dupeSinkId, source->fields(),
108  source->wkbType(), source->sourceCrs() ) );
109 
110  const long count = source->featureCount();
111  const double step = count > 0 ? 100.0 / count : 1;
112  int current = 0;
113 
114  long long keptCount = 0;
115  long long discardedCount = 0;
116 
117  QSet< QVariantList > matched;
118 
120  QgsFeature f;
121 
122  QVariantList dupeKey;
123  dupeKey.reserve( attributes.size() );
124  for ( const int i : attributes )
125  {
126  ( void )i;
127  dupeKey.append( QVariant() );
128  }
129 
130  while ( it.nextFeature( f ) )
131  {
132  if ( feedback->isCanceled() )
133  {
134  break;
135  }
136 
137  int i = 0;
138  for ( const int attr : attributes )
139  dupeKey[i++] = f.attribute( attr );
140 
141  if ( matched.contains( dupeKey ) )
142  {
143  // duplicate
144  discardedCount++;
145  if ( dupesSink )
146  {
147  if ( !dupesSink->addFeature( f, QgsFeatureSink::FastInsert ) )
148  throw QgsProcessingException( writeFeatureError( dupesSink.get(), parameters, QStringLiteral( "DUPLICATES" ) ) );
149  }
150  }
151  else
152  {
153  // not duplicate
154  keptCount++;
155  matched.insert( dupeKey );
156  if ( !noDupeSink->addFeature( f, QgsFeatureSink::FastInsert ) )
157  throw QgsProcessingException( writeFeatureError( noDupeSink.get(), parameters, QStringLiteral( "OUTPUT" ) ) );
158  }
159 
160  feedback->setProgress( current * step );
161  current++;
162  }
163 
164  QVariantMap outputs;
165  outputs.insert( QStringLiteral( "RETAINED_COUNT" ), keptCount );
166  outputs.insert( QStringLiteral( "DUPLICATE_COUNT" ), discardedCount );
167  outputs.insert( QStringLiteral( "OUTPUT" ), noDupeSinkId );
168  if ( dupesSink )
169  outputs.insert( QStringLiteral( "DUPLICATES" ), dupeSinkId );
170  return outputs;
171 }
172 
174 
175 
QgsFeedback::setProgress
void setProgress(double progress)
Sets the current progress for the feedback object.
Definition: qgsfeedback.h:76
QgsProcessingFeedback
Base class for providing feedback from a processing algorithm.
Definition: qgsprocessingfeedback.h:37
QgsProcessingFeedback::reportError
virtual void reportError(const QString &error, bool fatalError=false)
Reports that the algorithm encountered an error while executing.
Definition: qgsprocessingfeedback.cpp:59
QgsProcessingDestinationParameter::setCreateByDefault
void setCreateByDefault(bool createByDefault)
Sets whether the destination should be created by default.
Definition: qgsprocessingparameters.cpp:6803
QgsFeedback::isCanceled
bool isCanceled() const SIP_HOLDGIL
Tells whether the operation has been canceled already.
Definition: qgsfeedback.h:67
QgsProcessingFeatureSource::FlagSkipGeometryValidityChecks
@ FlagSkipGeometryValidityChecks
Invalid geometry checks should always be skipped. This flag can be useful for algorithms which always...
Definition: qgsprocessingutils.h:584
QgsProcessingParameterFeatureSource
An input feature source (such as vector layers) parameter for processing algorithms.
Definition: qgsprocessingparameters.h:3057
field
const QgsField & field
Definition: qgsfield.h:463
QgsAttributeList
QList< int > QgsAttributeList
Definition: qgsfield.h:26
QgsProcessingOutputNumber
A numeric output for processing algorithms.
Definition: qgsprocessingoutputs.h:312
QgsProcessingParameterFeatureSink
A feature sink output for processing algorithms.
Definition: qgsprocessingparameters.h:3219
QgsFeatureRequest
This class wraps a request for features to a vector layer (or directly its vector data provider).
Definition: qgsfeaturerequest.h:83
QgsProcessing::TypeVector
@ TypeVector
Tables (i.e. vector layers with or without geometry). When used for a sink this indicates the sink ha...
Definition: qgsprocessing.h:54
QgsProcessingContext
Contains information about the context in which a processing algorithm is executed.
Definition: qgsprocessingcontext.h:46
QgsProcessing::TypeVectorAnyGeometry
@ TypeVectorAnyGeometry
Any vector layer with geometry.
Definition: qgsprocessing.h:48
QgsFeature::attribute
QVariant attribute(const QString &name) const
Lookup attribute value by attribute name.
Definition: qgsfeature.cpp:327
QgsFeatureIterator::nextFeature
bool nextFeature(QgsFeature &f)
Definition: qgsfeatureiterator.h:399
QgsFeature
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
Definition: qgsfeature.h:55
QgsProcessingParameterField::Any
@ Any
Accepts any field.
Definition: qgsprocessingparameters.h:2947
qgsalgorithmremoveduplicatesbyattribute.h
QgsFeatureIterator
Wrapper for iterator of features from vector data provider or vector layer.
Definition: qgsfeatureiterator.h:289
QgsProcessingException
Custom exception class for processing related exceptions.
Definition: qgsexception.h:82
QgsProcessingParameterField
A vector layer or feature source field parameter for processing algorithms.
Definition: qgsprocessingparameters.h:2940
QgsFeatureSink::FastInsert
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
Definition: qgsfeaturesink.h:70