QGIS API Documentation  3.8.0-Zanzibar (11aff65)
qgsalgorithmremoveduplicatesbyattribute.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  qgsalgorithmremoveduplicatesbyattribute.cpp
3  ----------------------------------
4  begin : October 2018
5  copyright : (C) 2018 by Nyall Dawson
6  email : nyall dot dawson at gmail dot com
7  ***************************************************************************/
8 
9 /***************************************************************************
10  * *
11  * This program is free software; you can redistribute it and/or modify *
12  * it under the terms of the GNU General Public License as published by *
13  * the Free Software Foundation; either version 2 of the License, or *
14  * (at your option) any later version. *
15  * *
16  ***************************************************************************/
17 
19 
21 
22 QString QgsRemoveDuplicatesByAttributeAlgorithm::name() const
23 {
24  return QStringLiteral( "removeduplicatesbyattribute" );
25 }
26 
27 QString QgsRemoveDuplicatesByAttributeAlgorithm::displayName() const
28 {
29  return QObject::tr( "Delete duplicates by attribute" );
30 }
31 
32 QStringList QgsRemoveDuplicatesByAttributeAlgorithm::tags() const
33 {
34  return QObject::tr( "drop,remove,field,value,same,filter" ).split( ',' );
35 }
36 
37 QString QgsRemoveDuplicatesByAttributeAlgorithm::group() const
38 {
39  return QObject::tr( "Vector general" );
40 }
41 
42 QString QgsRemoveDuplicatesByAttributeAlgorithm::groupId() const
43 {
44  return QStringLiteral( "vectorgeneral" );
45 }
46 
47 void QgsRemoveDuplicatesByAttributeAlgorithm::initAlgorithm( const QVariantMap & )
48 {
49  addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ), QObject::tr( "Input layer" ),
50  QList< int >() << QgsProcessing::TypeVector ) );
51  addParameter( new QgsProcessingParameterField( QStringLiteral( "FIELDS" ), QObject::tr( "Field to match duplicates by" ), QVariant(), QStringLiteral( "INPUT" ), QgsProcessingParameterField::Any, true ) );
52 
53  addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Filtered (no duplicates)" ) ) );
54  QgsProcessingParameterFeatureSink *failOutput = new QgsProcessingParameterFeatureSink( QStringLiteral( "DUPLICATES" ), QObject::tr( "Filtered (duplicates)" ),
55  QgsProcessing::TypeVectorAnyGeometry, QVariant(), true );
56  failOutput->setCreateByDefault( false );
57  addParameter( failOutput );
58 
59  addOutput( new QgsProcessingOutputNumber( QStringLiteral( "RETAINED_COUNT" ), QObject::tr( "Count of retained records" ) ) );
60  addOutput( new QgsProcessingOutputNumber( QStringLiteral( "DUPLICATE_COUNT" ), QObject::tr( "Count of discarded duplicate records" ) ) );
61 }
62 
63 QString QgsRemoveDuplicatesByAttributeAlgorithm::shortHelpString() const
64 {
65  return QObject::tr( "Removes duplicate rows by a field value (or multiple field values). The first matching row will be retained, and duplicates will be discarded.\n\n"
66  "Optionally, these duplicate records can be saved to a separate output for analysis." );
67 }
68 
69 QString QgsRemoveDuplicatesByAttributeAlgorithm::shortDescription() const
70 {
71  return QObject::tr( "Removes duplicate rows by a field value (or multiple field values)." );
72 }
73 
74 QgsRemoveDuplicatesByAttributeAlgorithm *QgsRemoveDuplicatesByAttributeAlgorithm::createInstance() const
75 {
76  return new QgsRemoveDuplicatesByAttributeAlgorithm();
77 }
78 
79 QVariantMap QgsRemoveDuplicatesByAttributeAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
80 {
81  std::unique_ptr< QgsProcessingFeatureSource > source( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) );
82  if ( !source )
83  throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );
84 
85  const QStringList fieldNames = parameterAsFields( parameters, QStringLiteral( "FIELDS" ), context );
86 
87  QgsAttributeList attributes;
88  for ( const QString &field : fieldNames )
89  {
90  const int index = source->fields().lookupField( field );
91  if ( index < 0 )
92  feedback->reportError( QObject::tr( "Field %1 not found in INPUT layer, skipping" ).arg( field ) );
93  else
94  attributes.append( index );
95  }
96  if ( attributes.isEmpty() )
97  throw QgsProcessingException( QObject::tr( "No input fields found" ) );
98 
99 
100  QString noDupeSinkId;
101  std::unique_ptr< QgsFeatureSink > noDupeSink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, noDupeSinkId, source->fields(),
102  source->wkbType(), source->sourceCrs() ) );
103  if ( !noDupeSink )
104  throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) );
105 
106  QString dupeSinkId;
107  std::unique_ptr< QgsFeatureSink > dupesSink( parameterAsSink( parameters, QStringLiteral( "DUPLICATES" ), context, dupeSinkId, source->fields(),
108  source->wkbType(), source->sourceCrs() ) );
109 
110  const long count = source->featureCount();
111  double step = count > 0 ? 100.0 / count : 1;
112  int current = 0;
113 
114  long long keptCount = 0;
115  long long discardedCount = 0;
116 
117  QSet< QVariantList > matched;
118 
120  QgsFeature f;
121 
122  QVariantList dupeKey;
123  dupeKey.reserve( attributes.size() );
124  for ( int i : attributes )
125  {
126  ( void )i;
127  dupeKey.append( QVariant() );
128  }
129 
130  while ( it.nextFeature( f ) )
131  {
132  if ( feedback->isCanceled() )
133  {
134  break;
135  }
136 
137  int i = 0;
138  for ( int attr : attributes )
139  dupeKey[i++] = f.attribute( attr );
140 
141  if ( matched.contains( dupeKey ) )
142  {
143  // duplicate
144  discardedCount++;
145  if ( dupesSink )
146  dupesSink->addFeature( f, QgsFeatureSink::FastInsert );
147  }
148  else
149  {
150  // not duplicate
151  keptCount++;
152  matched.insert( dupeKey );
153  noDupeSink->addFeature( f, QgsFeatureSink::FastInsert );
154  }
155 
156  feedback->setProgress( current * step );
157  current++;
158  }
159 
160  QVariantMap outputs;
161  outputs.insert( QStringLiteral( "RETAINED_COUNT" ), keptCount );
162  outputs.insert( QStringLiteral( "DUPLICATE_COUNT" ), discardedCount );
163  outputs.insert( QStringLiteral( "OUTPUT" ), noDupeSinkId );
164  if ( dupesSink )
165  outputs.insert( QStringLiteral( "DUPLICATES" ), dupeSinkId );
166  return outputs;
167 }
168 
170 
171 
Wrapper for iterator of features from vector data provider or vector layer.
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
Base class for providing feedback from a processing algorithm.
Invalid geometry checks should always be skipped. This flag can be useful for algorithms which always...
A vector layer or feature source field parameter for processing algorithms.
void setProgress(double progress)
Sets the current progress for the feedback object.
Definition: qgsfeedback.h:63
A numeric output for processing algorithms.
The feature class encapsulates a single feature including its id, geometry and a list of field/values...
Definition: qgsfeature.h:55
A feature sink output for processing algorithms.
This class wraps a request for features to a vector layer (or directly its vector data provider)...
Custom exception class for processing related exceptions.
Definition: qgsexception.h:82
void setCreateByDefault(bool createByDefault)
Sets whether the destination should be created by default.
bool isCanceled() const
Tells whether the operation has been canceled already.
Definition: qgsfeedback.h:54
An input feature source (such as vector layers) parameter for processing algorithms.
Tables (i.e. vector layers with or without geometry). When used for a sink this indicates the sink ha...
Definition: qgsprocessing.h:53
virtual void reportError(const QString &error, bool fatalError=false)
Reports that the algorithm encountered an error while executing.
QList< int > QgsAttributeList
Definition: qgsfield.h:27
bool nextFeature(QgsFeature &f)
QVariant attribute(const QString &name) const
Lookup attribute value from attribute name.
Definition: qgsfeature.cpp:262
Contains information about the context in which a processing algorithm is executed.
Any vector layer with geometry.
Definition: qgsprocessing.h:47