QGIS API Documentation 3.37.0-Master (fdefdf9c27f)
qgsalgorithmdetectdatasetchanges.cpp
Go to the documentation of this file.
1/***************************************************************************
2 qgsalgorithmdetectdatasetchanges.cpp
3 -----------------------------------------
4 begin : December 2019
5 copyright : (C) 2019 by Nyall Dawson
6 email : nyall dot dawson at gmail dot com
7 ***************************************************************************/
8
9/***************************************************************************
10 * *
11 * This program is free software; you can redistribute it and/or modify *
12 * it under the terms of the GNU General Public License as published by *
13 * the Free Software Foundation; either version 2 of the License, or *
14 * (at your option) any later version. *
15 * *
16 ***************************************************************************/
17
19#include "qgsvectorlayer.h"
20#include "qgsspatialindex.h"
21
23
24QString QgsDetectVectorChangesAlgorithm::name() const
25{
26 return QStringLiteral( "detectvectorchanges" );
27}
28
29QString QgsDetectVectorChangesAlgorithm::displayName() const
30{
31 return QObject::tr( "Detect dataset changes" );
32}
33
34QStringList QgsDetectVectorChangesAlgorithm::tags() const
35{
36 return QObject::tr( "added,dropped,new,deleted,features,geometries,difference,delta,revised,original,version" ).split( ',' );
37}
38
39QString QgsDetectVectorChangesAlgorithm::group() const
40{
41 return QObject::tr( "Vector general" );
42}
43
44QString QgsDetectVectorChangesAlgorithm::groupId() const
45{
46 return QStringLiteral( "vectorgeneral" );
47}
48
49void QgsDetectVectorChangesAlgorithm::initAlgorithm( const QVariantMap & )
50{
51 addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "ORIGINAL" ), QObject::tr( "Original layer" ) ) );
52 addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "REVISED" ), QObject::tr( "Revised layer" ) ) );
53
54 std::unique_ptr< QgsProcessingParameterField > compareAttributesParam = std::make_unique< QgsProcessingParameterField >( QStringLiteral( "COMPARE_ATTRIBUTES" ),
55 QObject::tr( "Attributes to consider for match (or none to compare geometry only)" ), QVariant(),
56 QStringLiteral( "ORIGINAL" ), Qgis::ProcessingFieldParameterDataType::Any, true, true );
57 compareAttributesParam->setDefaultToAllFields( true );
58 addParameter( compareAttributesParam.release() );
59
60 std::unique_ptr< QgsProcessingParameterDefinition > matchTypeParam = std::make_unique< QgsProcessingParameterEnum >( QStringLiteral( "MATCH_TYPE" ),
61 QObject::tr( "Geometry comparison behavior" ),
62 QStringList() << QObject::tr( "Exact Match" )
63 << QObject::tr( "Tolerant Match (Topological Equality)" ),
64 false, 1 );
65 matchTypeParam->setFlags( matchTypeParam->flags() | Qgis::ProcessingParameterFlag::Advanced );
66 addParameter( matchTypeParam.release() );
67
68 addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "UNCHANGED" ), QObject::tr( "Unchanged features" ), Qgis::ProcessingSourceType::VectorAnyGeometry, QVariant(), true, true ) );
69 addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "ADDED" ), QObject::tr( "Added features" ), Qgis::ProcessingSourceType::VectorAnyGeometry, QVariant(), true, true ) );
70 addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "DELETED" ), QObject::tr( "Deleted features" ), Qgis::ProcessingSourceType::VectorAnyGeometry, QVariant(), true, true ) );
71
72 addOutput( new QgsProcessingOutputNumber( QStringLiteral( "UNCHANGED_COUNT" ), QObject::tr( "Count of unchanged features" ) ) );
73 addOutput( new QgsProcessingOutputNumber( QStringLiteral( "ADDED_COUNT" ), QObject::tr( "Count of features added in revised layer" ) ) );
74 addOutput( new QgsProcessingOutputNumber( QStringLiteral( "DELETED_COUNT" ), QObject::tr( "Count of features deleted from original layer" ) ) );
75}
76
77QString QgsDetectVectorChangesAlgorithm::shortHelpString() const
78{
79 return QObject::tr( "This algorithm compares two vector layers, and determines which features are unchanged, added or deleted between "
80 "the two. It is designed for comparing two different versions of the same dataset.\n\n"
81 "When comparing features, the original and revised feature geometries will be compared against each other. Depending "
82 "on the Geometry Comparison Behavior setting, the comparison will either be made using an exact comparison (where "
83 "geometries must be an exact match for each other, including the order and count of vertices) or a topological "
84 "comparison only (where geometries are considered equal if all of their component edges overlap. E.g. "
85 "lines with the same vertex locations but opposite direction will be considered equal by this method). If the topological "
86 "comparison is selected then any z or m values present in the geometries will not be compared.\n\n"
87 "By default, the algorithm compares all attributes from the original and revised features. If the Attributes to Consider for Match "
88 "parameter is changed, then only the selected attributes will be compared (e.g. allowing users to ignore a timestamp or ID field "
89 "which is expected to change between the revisions).\n\n"
90 "If any features in the original or revised layers do not have an associated geometry, then care must be taken to ensure "
91 "that these features have a unique set of attributes selected for comparison. If this condition is not met, warnings will be "
92 "raised and the resultant outputs may be misleading.\n\n"
93 "The algorithm outputs three layers, one containing all features which are considered to be unchanged between the revisions, "
94 "one containing features deleted from the original layer which are not present in the revised layer, and one containing features "
95 "added to the revised layer which are not present in the original layer." );
96}
97
98QString QgsDetectVectorChangesAlgorithm::shortDescription() const
99{
100 return QObject::tr( "Calculates features which are unchanged, added or deleted between two dataset versions." );
101}
102
103QgsDetectVectorChangesAlgorithm *QgsDetectVectorChangesAlgorithm::createInstance() const
104{
105 return new QgsDetectVectorChangesAlgorithm();
106}
107
108bool QgsDetectVectorChangesAlgorithm::prepareAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
109{
110 mOriginal.reset( parameterAsSource( parameters, QStringLiteral( "ORIGINAL" ), context ) );
111 if ( !mOriginal )
112 throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "ORIGINAL" ) ) );
113
114 mRevised.reset( parameterAsSource( parameters, QStringLiteral( "REVISED" ), context ) );
115 if ( !mRevised )
116 throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "REVISED" ) ) );
117
118 mMatchType = static_cast< GeometryMatchType >( parameterAsEnum( parameters, QStringLiteral( "MATCH_TYPE" ), context ) );
119
120 switch ( mMatchType )
121 {
122 case Exact:
123 if ( mOriginal->wkbType() != mRevised->wkbType() )
124 throw QgsProcessingException( QObject::tr( "Geometry type of revised layer (%1) does not match the original layer (%2). Consider using the \"Tolerant Match\" option instead." ).arg( QgsWkbTypes::displayString( mRevised->wkbType() ),
125 QgsWkbTypes::displayString( mOriginal->wkbType() ) ) );
126 break;
127
128 case Topological:
129 if ( QgsWkbTypes::geometryType( mOriginal->wkbType() ) != QgsWkbTypes::geometryType( mRevised->wkbType() ) )
130 throw QgsProcessingException( QObject::tr( "Geometry type of revised layer (%1) does not match the original layer (%2)" ).arg( QgsWkbTypes::geometryDisplayString( QgsWkbTypes::geometryType( mRevised->wkbType() ) ),
132 break;
133
134 }
135
136 if ( mOriginal->sourceCrs() != mRevised->sourceCrs() )
137 feedback->reportError( QObject::tr( "CRS for revised layer (%1) does not match the original layer (%2) - reprojection accuracy may affect geometry matching" ).arg( mOriginal->sourceCrs().userFriendlyIdentifier(),
138 mRevised->sourceCrs().userFriendlyIdentifier() ), false );
139
140 mFieldsToCompare = parameterAsStrings( parameters, QStringLiteral( "COMPARE_ATTRIBUTES" ), context );
141 mOriginalFieldsToCompareIndices.reserve( mFieldsToCompare.size() );
142 mRevisedFieldsToCompareIndices.reserve( mFieldsToCompare.size() );
143 QStringList missingOriginalFields;
144 QStringList missingRevisedFields;
145 for ( const QString &field : mFieldsToCompare )
146 {
147 const int originalIndex = mOriginal->fields().lookupField( field );
148 mOriginalFieldsToCompareIndices.append( originalIndex );
149 if ( originalIndex < 0 )
150 missingOriginalFields << field;
151
152 const int revisedIndex = mRevised->fields().lookupField( field );
153 if ( revisedIndex < 0 )
154 missingRevisedFields << field;
155 mRevisedFieldsToCompareIndices.append( revisedIndex );
156 }
157
158 if ( !missingOriginalFields.empty() )
159 throw QgsProcessingException( QObject::tr( "Original layer missing selected comparison attributes: %1" ).arg( missingOriginalFields.join( ',' ) ) );
160 if ( !missingRevisedFields.empty() )
161 throw QgsProcessingException( QObject::tr( "Revised layer missing selected comparison attributes: %1" ).arg( missingRevisedFields.join( ',' ) ) );
162
163 return true;
164}
165
166QVariantMap QgsDetectVectorChangesAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
167{
168 QString unchangedDestId;
169 std::unique_ptr< QgsFeatureSink > unchangedSink( parameterAsSink( parameters, QStringLiteral( "UNCHANGED" ), context, unchangedDestId, mOriginal->fields(),
170 mOriginal->wkbType(), mOriginal->sourceCrs() ) );
171 if ( !unchangedSink && parameters.value( QStringLiteral( "UNCHANGED" ) ).isValid() )
172 throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "UNCHANGED" ) ) );
173
174 QString addedDestId;
175 std::unique_ptr< QgsFeatureSink > addedSink( parameterAsSink( parameters, QStringLiteral( "ADDED" ), context, addedDestId, mRevised->fields(),
176 mRevised->wkbType(), mRevised->sourceCrs() ) );
177 if ( !addedSink && parameters.value( QStringLiteral( "ADDED" ) ).isValid() )
178 throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "ADDED" ) ) );
179
180 QString deletedDestId;
181 std::unique_ptr< QgsFeatureSink > deletedSink( parameterAsSink( parameters, QStringLiteral( "DELETED" ), context, deletedDestId, mOriginal->fields(),
182 mOriginal->wkbType(), mOriginal->sourceCrs() ) );
183 if ( !deletedSink && parameters.value( QStringLiteral( "DELETED" ) ).isValid() )
184 throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "DELETED" ) ) );
185
186 // first iteration: we loop through the entire original layer, building up a spatial index of ALL original geometries
187 // and collecting the original geometries themselves along with the attributes to compare
188 QgsFeatureRequest request;
189 request.setSubsetOfAttributes( mOriginalFieldsToCompareIndices );
190
191 QgsFeatureIterator it = mOriginal->getFeatures( request );
192
193 double step = mOriginal->featureCount() > 0 ? 100.0 / mOriginal->featureCount() : 0;
194 QHash< QgsFeatureId, QgsGeometry > originalGeometries;
195 QHash< QgsFeatureId, QgsAttributes > originalAttributes;
196 QHash< QgsAttributes, QgsFeatureId > originalNullGeometryAttributes;
197 long current = 0;
198
199 QgsAttributes attrs;
200 attrs.resize( mFieldsToCompare.size() );
201
202 const QgsSpatialIndex index( it, [&]( const QgsFeature & f )->bool
203 {
204 if ( feedback->isCanceled() )
205 return false;
206
207 if ( f.hasGeometry() )
208 {
209 originalGeometries.insert( f.id(), f.geometry() );
210 }
211
212 if ( !mFieldsToCompare.empty() )
213 {
214 int idx = 0;
215 for ( const int field : mOriginalFieldsToCompareIndices )
216 {
217 attrs[idx++] = f.attributes().at( field );
218 }
219 originalAttributes.insert( f.id(), attrs );
220 }
221
222 if ( !f.hasGeometry() )
223 {
224 if ( originalNullGeometryAttributes.contains( attrs ) )
225 {
226 feedback->reportError( QObject::tr( "A non-unique set of comparison attributes was found for "
227 "one or more features without geometries - results may be misleading (features %1 and %2)" ).arg( f.id() ).arg( originalNullGeometryAttributes.value( attrs ) ) );
228 }
229 else
230 {
231 originalNullGeometryAttributes.insert( attrs, f.id() );
232 }
233 }
234
235 // overall this loop takes about 10% of time
236 current++;
237 feedback->setProgress( 0.10 * current * step );
238 return true;
239 } );
240
241 QSet<QgsFeatureId> unchangedOriginalIds;
242 QSet<QgsFeatureId> addedRevisedIds;
243 current = 0;
244
245 // second iteration: we loop through ALL revised features, checking whether each is a match for a geometry from the
246 // original set. If so, check if the feature is unchanged. If there's no match with the original features, we mark it as an "added" feature
247 step = mRevised->featureCount() > 0 ? 100.0 / mRevised->featureCount() : 0;
248 QgsFeatureRequest revisedRequest = QgsFeatureRequest().setDestinationCrs( mOriginal->sourceCrs(), context.transformContext() );
249 revisedRequest.setSubsetOfAttributes( mRevisedFieldsToCompareIndices );
250 it = mRevised->getFeatures( revisedRequest );
251 QgsFeature revisedFeature;
252 while ( it.nextFeature( revisedFeature ) )
253 {
254 if ( feedback->isCanceled() )
255 break;
256
257 int idx = 0;
258 for ( const int field : mRevisedFieldsToCompareIndices )
259 {
260 attrs[idx++] = revisedFeature.attributes().at( field );
261 }
262
263 bool matched = false;
264
265 if ( !revisedFeature.hasGeometry() )
266 {
267 if ( originalNullGeometryAttributes.contains( attrs ) )
268 {
269 // found a match for feature
270 unchangedOriginalIds.insert( originalNullGeometryAttributes.value( attrs ) );
271 matched = true;
272 }
273 }
274 else
275 {
276 // can we match this feature?
277 const QList<QgsFeatureId> candidates = index.intersects( revisedFeature.geometry().boundingBox() );
278
279 // lazy evaluate -- there may be NO candidates!
280 QgsGeometry revised;
281
282 for ( const QgsFeatureId candidateId : candidates )
283 {
284 if ( unchangedOriginalIds.contains( candidateId ) )
285 {
286 // already matched this original feature
287 continue;
288 }
289
290 // attribute comparison is faster to do first, if desired
291 if ( !mFieldsToCompare.empty() )
292 {
293 if ( attrs != originalAttributes[ candidateId ] )
294 {
295 // attributes don't match, so candidates is not a match
296 continue;
297 }
298 }
299
300 QgsGeometry original = originalGeometries.value( candidateId );
301 // lazy evaluation
302 if ( revised.isNull() )
303 {
304 revised = revisedFeature.geometry();
305 // drop z/m if not wanted for match
306 switch ( mMatchType )
307 {
308 case Topological:
309 {
310 revised.get()->dropMValue();
311 revised.get()->dropZValue();
312 original.get()->dropMValue();
313 original.get()->dropZValue();
314 break;
315 }
316
317 case Exact:
318 break;
319 }
320 }
321
322 bool geometryMatch = false;
323 switch ( mMatchType )
324 {
325 case Topological:
326 {
327 geometryMatch = revised.isGeosEqual( original );
328 break;
329 }
330
331 case Exact:
332 geometryMatch = revised.equals( original );
333 break;
334 }
335
336 if ( geometryMatch )
337 {
338 // candidate is a match for feature
339 unchangedOriginalIds.insert( candidateId );
340 matched = true;
341 break;
342 }
343 }
344 }
345
346 if ( !matched )
347 {
348 // new feature
349 addedRevisedIds.insert( revisedFeature.id() );
350 }
351
352 current++;
353 feedback->setProgress( 0.70 * current * step + 10 ); // takes about 70% of time
354 }
355
356 // third iteration: iterate back over the original features, and direct them to the appropriate sink.
357 // If they were marked as unchanged during the second iteration, we put them in the unchanged sink. Otherwise
358 // they are placed into the deleted sink.
359 step = mOriginal->featureCount() > 0 ? 100.0 / mOriginal->featureCount() : 0;
360
362 it = mOriginal->getFeatures( request );
363 current = 0;
364 long deleted = 0;
365 QgsFeature f;
366 while ( it.nextFeature( f ) )
367 {
368 if ( feedback->isCanceled() )
369 break;
370
371 // use already fetched geometry
372 f.setGeometry( originalGeometries.value( f.id(), QgsGeometry() ) );
373
374 if ( unchangedOriginalIds.contains( f.id() ) )
375 {
376 // unchanged
377 if ( unchangedSink )
378 {
379 if ( !unchangedSink->addFeature( f, QgsFeatureSink::FastInsert ) )
380 throw QgsProcessingException( writeFeatureError( unchangedSink.get(), parameters, QStringLiteral( "UNCHANGED" ) ) );
381 }
382 }
383 else
384 {
385 // deleted feature
386 if ( deletedSink )
387 {
388 if ( !deletedSink->addFeature( f, QgsFeatureSink::FastInsert ) )
389 throw QgsProcessingException( writeFeatureError( deletedSink.get(), parameters, QStringLiteral( "DELETED" ) ) );
390 }
391 deleted++;
392 }
393
394 current++;
395 feedback->setProgress( 0.10 * current * step + 80 ); // takes about 10% of time
396 }
397
398 // forth iteration: collect all added features and add them to the added sink
399 // NOTE: while we could potentially do this as part of the second iteration and save some time, we instead
400 // do this here using a brand new request because the second iteration
401 // is fetching reprojected features and we ideally want geometries from the revised layer's actual CRS only here!
402 // also, the second iteration is only fetching the actual attributes used in the comparison, whereas we want
403 // to include all attributes in the "added" output
404 if ( addedSink )
405 {
406 step = addedRevisedIds.size() > 0 ? 100.0 / addedRevisedIds.size() : 0;
407 it = mRevised->getFeatures( QgsFeatureRequest().setFilterFids( addedRevisedIds ) );
408 current = 0;
409 while ( it.nextFeature( f ) )
410 {
411 if ( feedback->isCanceled() )
412 break;
413
414 // added feature
415 if ( !addedSink->addFeature( f, QgsFeatureSink::FastInsert ) )
416 throw QgsProcessingException( writeFeatureError( addedSink.get(), parameters, QStringLiteral( "ADDED" ) ) );
417
418 current++;
419 feedback->setProgress( 0.10 * current * step + 90 ); // takes about 10% of time
420 }
421 }
422 feedback->setProgress( 100 );
423
424 feedback->pushInfo( QObject::tr( "%n feature(s) unchanged", nullptr, unchangedOriginalIds.size() ) );
425 feedback->pushInfo( QObject::tr( "%n feature(s) added", nullptr, addedRevisedIds.size() ) );
426 feedback->pushInfo( QObject::tr( "%n feature(s) deleted", nullptr, deleted ) );
427
428 QVariantMap outputs;
429 outputs.insert( QStringLiteral( "UNCHANGED" ), unchangedDestId );
430 outputs.insert( QStringLiteral( "ADDED" ), addedDestId );
431 outputs.insert( QStringLiteral( "DELETED" ), deletedDestId );
432 outputs.insert( QStringLiteral( "UNCHANGED_COUNT" ), static_cast< long long >( unchangedOriginalIds.size() ) );
433 outputs.insert( QStringLiteral( "ADDED_COUNT" ), static_cast< long long >( addedRevisedIds.size() ) );
434 outputs.insert( QStringLiteral( "DELETED_COUNT" ), static_cast< long long >( deleted ) );
435
436 return outputs;
437}
438
@ VectorAnyGeometry
Any vector layer with geometry.
@ NoGeometry
Geometry is not required. It may still be returned if e.g. required for a filter condition.
@ Advanced
Parameter is an advanced parameter which should be hidden from users by default.
virtual bool dropMValue()=0
Drops any measure values which exist in the geometry.
virtual bool dropZValue()=0
Drops any z-dimensions which exist in the geometry.
A vector of attributes.
Definition: qgsattributes.h:59
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
Fetch next feature and stores in f, returns true on success.
This class wraps a request for features to a vector layer (or directly its vector data provider).
QgsFeatureRequest & setFlags(Qgis::FeatureRequestFlags flags)
Sets flags that affect how features will be fetched.
QgsFeatureRequest & setSubsetOfAttributes(const QgsAttributeList &attrs)
Set a subset of attributes that will be fetched.
QgsFeatureRequest & setDestinationCrs(const QgsCoordinateReferenceSystem &crs, const QgsCoordinateTransformContext &context)
Sets the destination crs for feature's geometries.
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
Definition: qgsfeature.h:56
QgsAttributes attributes
Definition: qgsfeature.h:65
QgsGeometry geometry
Definition: qgsfeature.h:67
bool hasGeometry() const
Returns true if the feature has an associated geometry.
Definition: qgsfeature.cpp:230
void setGeometry(const QgsGeometry &geometry)
Set the feature's geometry.
Definition: qgsfeature.cpp:167
Q_GADGET QgsFeatureId id
Definition: qgsfeature.h:64
bool isCanceled() const
Tells whether the operation has been canceled already.
Definition: qgsfeedback.h:53
void setProgress(double progress)
Sets the current progress for the feedback object.
Definition: qgsfeedback.h:61
A geometry is the spatial representation of a feature.
Definition: qgsgeometry.h:162
Q_GADGET bool isNull
Definition: qgsgeometry.h:164
QgsAbstractGeometry * get()
Returns a modifiable (non-const) reference to the underlying abstract geometry primitive.
bool equals(const QgsGeometry &geometry) const
Test if this geometry is exactly equal to another geometry.
QgsRectangle boundingBox() const
Returns the bounding box of the geometry.
bool isGeosEqual(const QgsGeometry &) const
Compares the geometry with another geometry using GEOS.
Contains information about the context in which a processing algorithm is executed.
QgsCoordinateTransformContext transformContext() const
Returns the coordinate transform context.
Custom exception class for processing related exceptions.
Definition: qgsexception.h:83
Base class for providing feedback from a processing algorithm.
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
virtual void reportError(const QString &error, bool fatalError=false)
Reports that the algorithm encountered an error while executing.
A numeric output for processing algorithms.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A spatial index for QgsFeature objects.
static Qgis::GeometryType geometryType(Qgis::WkbType type)
Returns the geometry type for a WKB type, e.g., both MultiPolygon and CurvePolygon would have a Polyg...
Definition: qgswkbtypes.h:862
static QString displayString(Qgis::WkbType type)
Returns a non-translated display string type for a WKB type, e.g., the geometry name used in WKT geom...
static QString geometryDisplayString(Qgis::GeometryType type)
Returns a display string for a geometry type.
qint64 QgsFeatureId
64 bit feature ids negative numbers are used for uncommitted/newly added features
Definition: qgsfeatureid.h:28