22 QString QgsRemoveDuplicatesByAttributeAlgorithm::name()
const
24 return QStringLiteral(
"removeduplicatesbyattribute" );
27 QString QgsRemoveDuplicatesByAttributeAlgorithm::displayName()
const
29 return QObject::tr(
"Delete duplicates by attribute" );
32 QStringList QgsRemoveDuplicatesByAttributeAlgorithm::tags()
const
34 return QObject::tr(
"drop,remove,field,value,same,filter" ).split(
',' );
37 QString QgsRemoveDuplicatesByAttributeAlgorithm::group()
const
39 return QObject::tr(
"Vector general" );
42 QString QgsRemoveDuplicatesByAttributeAlgorithm::groupId()
const
44 return QStringLiteral(
"vectorgeneral" );
47 void QgsRemoveDuplicatesByAttributeAlgorithm::initAlgorithm(
const QVariantMap & )
57 addParameter( failOutput );
59 addOutput(
new QgsProcessingOutputNumber( QStringLiteral(
"RETAINED_COUNT" ), QObject::tr(
"Count of retained records" ) ) );
60 addOutput(
new QgsProcessingOutputNumber( QStringLiteral(
"DUPLICATE_COUNT" ), QObject::tr(
"Count of discarded duplicate records" ) ) );
63 QString QgsRemoveDuplicatesByAttributeAlgorithm::shortHelpString()
const
65 return QObject::tr(
"Removes duplicate rows by a field value (or multiple field values). The first matching row will be retained, and duplicates will be discarded.\n\n"
66 "Optionally, these duplicate records can be saved to a separate output for analysis." );
69 QString QgsRemoveDuplicatesByAttributeAlgorithm::shortDescription()
const
71 return QObject::tr(
"Removes duplicate rows by a field value (or multiple field values)." );
74 QgsRemoveDuplicatesByAttributeAlgorithm *QgsRemoveDuplicatesByAttributeAlgorithm::createInstance()
const
76 return new QgsRemoveDuplicatesByAttributeAlgorithm();
81 std::unique_ptr< QgsProcessingFeatureSource > source( parameterAsSource( parameters, QStringLiteral(
"INPUT" ), context ) );
85 const QStringList fieldNames = parameterAsFields( parameters, QStringLiteral(
"FIELDS" ), context );
88 for (
const QString &
field : fieldNames )
90 const int index = source->fields().lookupField(
field );
92 feedback->
reportError( QObject::tr(
"Field %1 not found in INPUT layer, skipping" ).arg(
field ) );
94 attributes.append( index );
96 if ( attributes.isEmpty() )
100 QString noDupeSinkId;
101 std::unique_ptr< QgsFeatureSink > noDupeSink( parameterAsSink( parameters, QStringLiteral(
"OUTPUT" ), context, noDupeSinkId, source->fields(),
102 source->wkbType(), source->sourceCrs() ) );
107 std::unique_ptr< QgsFeatureSink > dupesSink( parameterAsSink( parameters, QStringLiteral(
"DUPLICATES" ), context, dupeSinkId, source->fields(),
108 source->wkbType(), source->sourceCrs() ) );
110 const long count = source->featureCount();
111 const double step = count > 0 ? 100.0 / count : 1;
114 long long keptCount = 0;
115 long long discardedCount = 0;
117 QSet< QVariantList > matched;
122 QVariantList dupeKey;
123 dupeKey.reserve( attributes.size() );
124 for (
const int i : attributes )
127 dupeKey.append( QVariant() );
138 for (
const int attr : attributes )
141 if ( matched.contains( dupeKey ) )
148 throw QgsProcessingException( writeFeatureError( dupesSink.get(), parameters, QStringLiteral(
"DUPLICATES" ) ) );
155 matched.insert( dupeKey );
157 throw QgsProcessingException( writeFeatureError( noDupeSink.get(), parameters, QStringLiteral(
"OUTPUT" ) ) );
165 outputs.insert( QStringLiteral(
"RETAINED_COUNT" ), keptCount );
166 outputs.insert( QStringLiteral(
"DUPLICATE_COUNT" ), discardedCount );
167 outputs.insert( QStringLiteral(
"OUTPUT" ), noDupeSinkId );
169 outputs.insert( QStringLiteral(
"DUPLICATES" ), dupeSinkId );