QGIS API Documentation  3.22.4-Białowieża (ce8e65e95e)
qgsalgorithmdetectdatasetchanges.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  qgsalgorithmdetectdatasetchanges.cpp
3  -----------------------------------------
4  begin : December 2019
5  copyright : (C) 2019 by Nyall Dawson
6  email : nyall dot dawson at gmail dot com
7  ***************************************************************************/
8 
9 /***************************************************************************
10  * *
11  * This program is free software; you can redistribute it and/or modify *
12  * it under the terms of the GNU General Public License as published by *
13  * the Free Software Foundation; either version 2 of the License, or *
14  * (at your option) any later version. *
15  * *
16  ***************************************************************************/
17 
19 #include "qgsvectorlayer.h"
20 #include "qgsgeometryengine.h"
21 
23 
24 QString QgsDetectVectorChangesAlgorithm::name() const
25 {
26  return QStringLiteral( "detectvectorchanges" );
27 }
28 
29 QString QgsDetectVectorChangesAlgorithm::displayName() const
30 {
31  return QObject::tr( "Detect dataset changes" );
32 }
33 
34 QStringList QgsDetectVectorChangesAlgorithm::tags() const
35 {
36  return QObject::tr( "added,dropped,new,deleted,features,geometries,difference,delta,revised,original,version" ).split( ',' );
37 }
38 
39 QString QgsDetectVectorChangesAlgorithm::group() const
40 {
41  return QObject::tr( "Vector general" );
42 }
43 
44 QString QgsDetectVectorChangesAlgorithm::groupId() const
45 {
46  return QStringLiteral( "vectorgeneral" );
47 }
48 
49 void QgsDetectVectorChangesAlgorithm::initAlgorithm( const QVariantMap & )
50 {
51  addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "ORIGINAL" ), QObject::tr( "Original layer" ) ) );
52  addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "REVISED" ), QObject::tr( "Revised layer" ) ) );
53 
54  std::unique_ptr< QgsProcessingParameterField > compareAttributesParam = std::make_unique< QgsProcessingParameterField >( QStringLiteral( "COMPARE_ATTRIBUTES" ),
55  QObject::tr( "Attributes to consider for match (or none to compare geometry only)" ), QVariant(),
56  QStringLiteral( "ORIGINAL" ), QgsProcessingParameterField::Any, true, true );
57  compareAttributesParam->setDefaultToAllFields( true );
58  addParameter( compareAttributesParam.release() );
59 
60  std::unique_ptr< QgsProcessingParameterDefinition > matchTypeParam = std::make_unique< QgsProcessingParameterEnum >( QStringLiteral( "MATCH_TYPE" ),
61  QObject::tr( "Geometry comparison behavior" ),
62  QStringList() << QObject::tr( "Exact Match" )
63  << QObject::tr( "Tolerant Match (Topological Equality)" ),
64  false, 1 );
65  matchTypeParam->setFlags( matchTypeParam->flags() | QgsProcessingParameterDefinition::FlagAdvanced );
66  addParameter( matchTypeParam.release() );
67 
68  addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "UNCHANGED" ), QObject::tr( "Unchanged features" ), QgsProcessing::TypeVectorAnyGeometry, QVariant(), true, true ) );
69  addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "ADDED" ), QObject::tr( "Added features" ), QgsProcessing::TypeVectorAnyGeometry, QVariant(), true, true ) );
70  addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "DELETED" ), QObject::tr( "Deleted features" ), QgsProcessing::TypeVectorAnyGeometry, QVariant(), true, true ) );
71 
72  addOutput( new QgsProcessingOutputNumber( QStringLiteral( "UNCHANGED_COUNT" ), QObject::tr( "Count of unchanged features" ) ) );
73  addOutput( new QgsProcessingOutputNumber( QStringLiteral( "ADDED_COUNT" ), QObject::tr( "Count of features added in revised layer" ) ) );
74  addOutput( new QgsProcessingOutputNumber( QStringLiteral( "DELETED_COUNT" ), QObject::tr( "Count of features deleted from original layer" ) ) );
75 }
76 
77 QString QgsDetectVectorChangesAlgorithm::shortHelpString() const
78 {
79  return QObject::tr( "This algorithm compares two vector layers, and determines which features are unchanged, added or deleted between "
80  "the two. It is designed for comparing two different versions of the same dataset.\n\n"
81  "When comparing features, the original and revised feature geometries will be compared against each other. Depending "
82  "on the Geometry Comparison Behavior setting, the comparison will either be made using an exact comparison (where "
83  "geometries must be an exact match for each other, including the order and count of vertices) or a topological "
84  "comparison only (where are geometries area considered equal if all of their component edges overlap. E.g. "
85  "lines with the same vertex locations but opposite direction will be considered equal by this method). If the topological "
86  "comparison is selected then any z or m values present in the geometries will not be compared.\n\n"
87  "By default, the algorithm compares all attributes from the original and revised features. If the Attributes to Consider for Match "
88  "parameter is changed, then only the selected attributes will be compared (e.g. allowing users to ignore a timestamp or ID field "
89  "which is expected to change between the revisions).\n\n"
90  "If any features in the original or revised layers do not have an associated geometry, then care must be taken to ensure "
91  "that these features have a unique set of attributes selected for comparison. If this condition is not met, warnings will be "
92  "raised and the resultant outputs may be misleading.\n\n"
93  "The algorithm outputs three layers, one containing all features which are considered to be unchanged between the revisions, "
94  "one containing features deleted from the original layer which are not present in the revised layer, and one containing features "
95  "add to the revised layer which are not present in the original layer." );
96 }
97 
98 QString QgsDetectVectorChangesAlgorithm::shortDescription() const
99 {
100  return QObject::tr( "Calculates features which are unchanged, added or deleted between two dataset versions." );
101 }
102 
103 QgsDetectVectorChangesAlgorithm *QgsDetectVectorChangesAlgorithm::createInstance() const
104 {
105  return new QgsDetectVectorChangesAlgorithm();
106 }
107 
108 bool QgsDetectVectorChangesAlgorithm::prepareAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
109 {
110  mOriginal.reset( parameterAsSource( parameters, QStringLiteral( "ORIGINAL" ), context ) );
111  if ( !mOriginal )
112  throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "ORIGINAL" ) ) );
113 
114  mRevised.reset( parameterAsSource( parameters, QStringLiteral( "REVISED" ), context ) );
115  if ( !mRevised )
116  throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "REVISED" ) ) );
117 
118  mMatchType = static_cast< GeometryMatchType >( parameterAsEnum( parameters, QStringLiteral( "MATCH_TYPE" ), context ) );
119 
120  switch ( mMatchType )
121  {
122  case Exact:
123  if ( mOriginal->wkbType() != mRevised->wkbType() )
124  throw QgsProcessingException( QObject::tr( "Geometry type of revised layer (%1) does not match the original layer (%2). Consider using the \"Tolerant Match\" option instead." ).arg( QgsWkbTypes::displayString( mRevised->wkbType() ),
125  QgsWkbTypes::displayString( mOriginal->wkbType() ) ) );
126  break;
127 
128  case Topological:
129  if ( QgsWkbTypes::geometryType( mOriginal->wkbType() ) != QgsWkbTypes::geometryType( mRevised->wkbType() ) )
130  throw QgsProcessingException( QObject::tr( "Geometry type of revised layer (%1) does not match the original layer (%2)" ).arg( QgsWkbTypes::geometryDisplayString( QgsWkbTypes::geometryType( mRevised->wkbType() ) ),
131  QgsWkbTypes::geometryDisplayString( QgsWkbTypes::geometryType( mOriginal->wkbType() ) ) ) );
132  break;
133 
134  }
135 
136  if ( mOriginal->sourceCrs() != mRevised->sourceCrs() )
137  feedback->reportError( QObject::tr( "CRS for revised layer (%1) does not match the original layer (%2) - reprojection accuracy may affect geometry matching" ).arg( mOriginal->sourceCrs().userFriendlyIdentifier(),
138  mRevised->sourceCrs().userFriendlyIdentifier() ), false );
139 
140  mFieldsToCompare = parameterAsFields( parameters, QStringLiteral( "COMPARE_ATTRIBUTES" ), context );
141  mOriginalFieldsToCompareIndices.reserve( mFieldsToCompare.size() );
142  mRevisedFieldsToCompareIndices.reserve( mFieldsToCompare.size() );
143  QStringList missingOriginalFields;
144  QStringList missingRevisedFields;
145  for ( const QString &field : mFieldsToCompare )
146  {
147  const int originalIndex = mOriginal->fields().lookupField( field );
148  mOriginalFieldsToCompareIndices.append( originalIndex );
149  if ( originalIndex < 0 )
150  missingOriginalFields << field;
151 
152  const int revisedIndex = mRevised->fields().lookupField( field );
153  if ( revisedIndex < 0 )
154  missingRevisedFields << field;
155  mRevisedFieldsToCompareIndices.append( revisedIndex );
156  }
157 
158  if ( !missingOriginalFields.empty() )
159  throw QgsProcessingException( QObject::tr( "Original layer missing selected comparison attributes: %1" ).arg( missingOriginalFields.join( ',' ) ) );
160  if ( !missingRevisedFields.empty() )
161  throw QgsProcessingException( QObject::tr( "Revised layer missing selected comparison attributes: %1" ).arg( missingRevisedFields.join( ',' ) ) );
162 
163  return true;
164 }
165 
166 QVariantMap QgsDetectVectorChangesAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
167 {
168  QString unchangedDestId;
169  std::unique_ptr< QgsFeatureSink > unchangedSink( parameterAsSink( parameters, QStringLiteral( "UNCHANGED" ), context, unchangedDestId, mOriginal->fields(),
170  mOriginal->wkbType(), mOriginal->sourceCrs() ) );
171  if ( !unchangedSink && parameters.value( QStringLiteral( "UNCHANGED" ) ).isValid() )
172  throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "UNCHANGED" ) ) );
173 
174  QString addedDestId;
175  std::unique_ptr< QgsFeatureSink > addedSink( parameterAsSink( parameters, QStringLiteral( "ADDED" ), context, addedDestId, mRevised->fields(),
176  mRevised->wkbType(), mRevised->sourceCrs() ) );
177  if ( !addedSink && parameters.value( QStringLiteral( "ADDED" ) ).isValid() )
178  throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "ADDED" ) ) );
179 
180  QString deletedDestId;
181  std::unique_ptr< QgsFeatureSink > deletedSink( parameterAsSink( parameters, QStringLiteral( "DELETED" ), context, deletedDestId, mOriginal->fields(),
182  mOriginal->wkbType(), mOriginal->sourceCrs() ) );
183  if ( !deletedSink && parameters.value( QStringLiteral( "DELETED" ) ).isValid() )
184  throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "DELETED" ) ) );
185 
186  // first iteration: we loop through the entire original layer, building up a spatial index of ALL original geometries
187  // and collecting the original geometries themselves along with the attributes to compare
188  QgsFeatureRequest request;
189  request.setSubsetOfAttributes( mOriginalFieldsToCompareIndices );
190 
191  QgsFeatureIterator it = mOriginal->getFeatures( request );
192 
193  double step = mOriginal->featureCount() > 0 ? 100.0 / mOriginal->featureCount() : 0;
194  QHash< QgsFeatureId, QgsGeometry > originalGeometries;
195  QHash< QgsFeatureId, QgsAttributes > originalAttributes;
196  QHash< QgsAttributes, QgsFeatureId > originalNullGeometryAttributes;
197  long current = 0;
198 
199  QgsAttributes attrs;
200  attrs.resize( mFieldsToCompare.size() );
201 
202  const QgsSpatialIndex index( it, [&]( const QgsFeature & f )->bool
203  {
204  if ( feedback->isCanceled() )
205  return false;
206 
207  if ( f.hasGeometry() )
208  {
209  originalGeometries.insert( f.id(), f.geometry() );
210  }
211 
212  if ( !mFieldsToCompare.empty() )
213  {
214  int idx = 0;
215  for ( const int field : mOriginalFieldsToCompareIndices )
216  {
217  attrs[idx++] = f.attributes().at( field );
218  }
219  originalAttributes.insert( f.id(), attrs );
220  }
221 
222  if ( !f.hasGeometry() )
223  {
224  if ( originalNullGeometryAttributes.contains( attrs ) )
225  {
226  feedback->reportError( QObject::tr( "A non-unique set of comparison attributes was found for "
227  "one or more features without geometries - results may be misleading (features %1 and %2)" ).arg( f.id() ).arg( originalNullGeometryAttributes.value( attrs ) ) );
228  }
229  else
230  {
231  originalNullGeometryAttributes.insert( attrs, f.id() );
232  }
233  }
234 
235  // overall this loop takes about 10% of time
236  current++;
237  feedback->setProgress( 0.10 * current * step );
238  return true;
239  } );
240 
241  QSet<QgsFeatureId> unchangedOriginalIds;
242  QSet<QgsFeatureId> addedRevisedIds;
243  current = 0;
244 
245  // second iteration: we loop through ALL revised features, checking whether each is a match for a geometry from the
246  // original set. If so, check if the feature is unchanged. If there's no match with the original features, we mark it as an "added" feature
247  step = mRevised->featureCount() > 0 ? 100.0 / mRevised->featureCount() : 0;
248  QgsFeatureRequest revisedRequest = QgsFeatureRequest().setDestinationCrs( mOriginal->sourceCrs(), context.transformContext() );
249  revisedRequest.setSubsetOfAttributes( mRevisedFieldsToCompareIndices );
250  it = mRevised->getFeatures( revisedRequest );
251  QgsFeature revisedFeature;
252  while ( it.nextFeature( revisedFeature ) )
253  {
254  if ( feedback->isCanceled() )
255  break;
256 
257  int idx = 0;
258  for ( const int field : mRevisedFieldsToCompareIndices )
259  {
260  attrs[idx++] = revisedFeature.attributes().at( field );
261  }
262 
263  bool matched = false;
264 
265  if ( !revisedFeature.hasGeometry() )
266  {
267  if ( originalNullGeometryAttributes.contains( attrs ) )
268  {
269  // found a match for feature
270  unchangedOriginalIds.insert( originalNullGeometryAttributes.value( attrs ) );
271  matched = true;
272  }
273  }
274  else
275  {
276  // can we match this feature?
277  const QList<QgsFeatureId> candidates = index.intersects( revisedFeature.geometry().boundingBox() );
278 
279  // lazy evaluate -- there may be NO candidates!
280  QgsGeometry revised;
281 
282  for ( const QgsFeatureId candidateId : candidates )
283  {
284  if ( unchangedOriginalIds.contains( candidateId ) )
285  {
286  // already matched this original feature
287  continue;
288  }
289 
290  // attribute comparison is faster to do first, if desired
291  if ( !mFieldsToCompare.empty() )
292  {
293  if ( attrs != originalAttributes[ candidateId ] )
294  {
295  // attributes don't match, so candidates is not a match
296  continue;
297  }
298  }
299 
300  QgsGeometry original = originalGeometries.value( candidateId );
301  // lazy evaluation
302  if ( revised.isNull() )
303  {
304  revised = revisedFeature.geometry();
305  // drop z/m if not wanted for match
306  switch ( mMatchType )
307  {
308  case Topological:
309  {
310  revised.get()->dropMValue();
311  revised.get()->dropZValue();
312  original.get()->dropMValue();
313  original.get()->dropZValue();
314  break;
315  }
316 
317  case Exact:
318  break;
319  }
320  }
321 
322  bool geometryMatch = false;
323  switch ( mMatchType )
324  {
325  case Topological:
326  {
327  geometryMatch = revised.isGeosEqual( original );
328  break;
329  }
330 
331  case Exact:
332  geometryMatch = revised.equals( original );
333  break;
334  }
335 
336  if ( geometryMatch )
337  {
338  // candidate is a match for feature
339  unchangedOriginalIds.insert( candidateId );
340  matched = true;
341  break;
342  }
343  }
344  }
345 
346  if ( !matched )
347  {
348  // new feature
349  addedRevisedIds.insert( revisedFeature.id() );
350  }
351 
352  current++;
353  feedback->setProgress( 0.70 * current * step + 10 ); // takes about 70% of time
354  }
355 
356  // third iteration: iterate back over the original features, and direct them to the appropriate sink.
357  // If they were marked as unchanged during the second iteration, we put them in the unchanged sink. Otherwise
358  // they are placed into the deleted sink.
359  step = mOriginal->featureCount() > 0 ? 100.0 / mOriginal->featureCount() : 0;
360 
362  it = mOriginal->getFeatures( request );
363  current = 0;
364  long deleted = 0;
365  QgsFeature f;
366  while ( it.nextFeature( f ) )
367  {
368  if ( feedback->isCanceled() )
369  break;
370 
371  // use already fetched geometry
372  f.setGeometry( originalGeometries.value( f.id(), QgsGeometry() ) );
373 
374  if ( unchangedOriginalIds.contains( f.id() ) )
375  {
376  // unchanged
377  if ( unchangedSink )
378  {
379  if ( !unchangedSink->addFeature( f, QgsFeatureSink::FastInsert ) )
380  throw QgsProcessingException( writeFeatureError( unchangedSink.get(), parameters, QStringLiteral( "UNCHANGED" ) ) );
381  }
382  }
383  else
384  {
385  // deleted feature
386  if ( deletedSink )
387  {
388  if ( !deletedSink->addFeature( f, QgsFeatureSink::FastInsert ) )
389  throw QgsProcessingException( writeFeatureError( deletedSink.get(), parameters, QStringLiteral( "DELETED" ) ) );
390  }
391  deleted++;
392  }
393 
394  current++;
395  feedback->setProgress( 0.10 * current * step + 80 ); // takes about 10% of time
396  }
397 
398  // forth iteration: collect all added features and add them to the added sink
399  // NOTE: while we could potentially do this as part of the second iteration and save some time, we instead
400  // do this here using a brand new request because the second iteration
401  // is fetching reprojected features and we ideally want geometries from the revised layer's actual CRS only here!
402  // also, the second iteration is only fetching the actual attributes used in the comparison, whereas we want
403  // to include all attributes in the "added" output
404  if ( addedSink )
405  {
406  step = addedRevisedIds.size() > 0 ? 100.0 / addedRevisedIds.size() : 0;
407  it = mRevised->getFeatures( QgsFeatureRequest().setFilterFids( addedRevisedIds ) );
408  current = 0;
409  while ( it.nextFeature( f ) )
410  {
411  if ( feedback->isCanceled() )
412  break;
413 
414  // added feature
415  if ( !addedSink->addFeature( f, QgsFeatureSink::FastInsert ) )
416  throw QgsProcessingException( writeFeatureError( addedSink.get(), parameters, QStringLiteral( "ADDED" ) ) );
417 
418  current++;
419  feedback->setProgress( 0.10 * current * step + 90 ); // takes about 10% of time
420  }
421  }
422  feedback->setProgress( 100 );
423 
424  feedback->pushInfo( QObject::tr( "%1 features unchanged" ).arg( unchangedOriginalIds.size() ) );
425  feedback->pushInfo( QObject::tr( "%1 features added" ).arg( addedRevisedIds.size() ) );
426  feedback->pushInfo( QObject::tr( "%1 features deleted" ).arg( deleted ) );
427 
428  QVariantMap outputs;
429  outputs.insert( QStringLiteral( "UNCHANGED" ), unchangedDestId );
430  outputs.insert( QStringLiteral( "ADDED" ), addedDestId );
431  outputs.insert( QStringLiteral( "DELETED" ), deletedDestId );
432  outputs.insert( QStringLiteral( "UNCHANGED_COUNT" ), static_cast< long long >( unchangedOriginalIds.size() ) );
433  outputs.insert( QStringLiteral( "ADDED_COUNT" ), static_cast< long long >( addedRevisedIds.size() ) );
434  outputs.insert( QStringLiteral( "DELETED_COUNT" ), static_cast< long long >( deleted ) );
435 
436  return outputs;
437 }
438 
virtual bool dropMValue()=0
Drops any measure values which exist in the geometry.
virtual bool dropZValue()=0
Drops any z-dimensions which exist in the geometry.
A vector of attributes.
Definition: qgsattributes.h:58
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
This class wraps a request for features to a vector layer (or directly its vector data provider).
QgsFeatureRequest & setFlags(QgsFeatureRequest::Flags flags)
Sets flags that affect how features will be fetched.
QgsFeatureRequest & setSubsetOfAttributes(const QgsAttributeList &attrs)
Set a subset of attributes that will be fetched.
QgsFeatureRequest & setDestinationCrs(const QgsCoordinateReferenceSystem &crs, const QgsCoordinateTransformContext &context)
Sets the destination crs for feature's geometries.
@ NoGeometry
Geometry is not required. It may still be returned if e.g. required for a filter condition.
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
Definition: qgsfeature.h:56
QgsAttributes attributes
Definition: qgsfeature.h:65
QgsGeometry geometry
Definition: qgsfeature.h:67
bool hasGeometry() const
Returns true if the feature has an associated geometry.
Definition: qgsfeature.cpp:223
void setGeometry(const QgsGeometry &geometry)
Set the feature's geometry.
Definition: qgsfeature.cpp:163
Q_GADGET QgsFeatureId id
Definition: qgsfeature.h:64
bool isCanceled() const SIP_HOLDGIL
Tells whether the operation has been canceled already.
Definition: qgsfeedback.h:54
void setProgress(double progress)
Sets the current progress for the feedback object.
Definition: qgsfeedback.h:63
A geometry is the spatial representation of a feature.
Definition: qgsgeometry.h:125
Q_GADGET bool isNull
Definition: qgsgeometry.h:127
QgsAbstractGeometry * get()
Returns a modifiable (non-const) reference to the underlying abstract geometry primitive.
bool equals(const QgsGeometry &geometry) const
Test if this geometry is exactly equal to another geometry.
QgsRectangle boundingBox() const
Returns the bounding box of the geometry.
bool isGeosEqual(const QgsGeometry &) const
Compares the geometry with another geometry using GEOS.
Contains information about the context in which a processing algorithm is executed.
QgsCoordinateTransformContext transformContext() const
Returns the coordinate transform context.
Custom exception class for processing related exceptions.
Definition: qgsexception.h:83
Base class for providing feedback from a processing algorithm.
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
virtual void reportError(const QString &error, bool fatalError=false)
Reports that the algorithm encountered an error while executing.
A numeric output for processing algorithms.
@ FlagAdvanced
Parameter is an advanced parameter which should be hidden from users by default.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
@ TypeVectorAnyGeometry
Any vector layer with geometry.
Definition: qgsprocessing.h:48
A spatial index for QgsFeature objects.
static GeometryType geometryType(Type type) SIP_HOLDGIL
Returns the geometry type for a WKB type, e.g., both MultiPolygon and CurvePolygon would have a Polyg...
Definition: qgswkbtypes.h:968
static QString geometryDisplayString(GeometryType type) SIP_HOLDGIL
Returns a display string for a geometry type.
static QString displayString(Type type) SIP_HOLDGIL
Returns a non-translated display string type for a WKB type, e.g., the geometry name used in WKT geom...
qint64 QgsFeatureId
64 bit feature ids negative numbers are used for uncommitted/newly added features
Definition: qgsfeatureid.h:28
const QgsField & field
Definition: qgsfield.h:463