QGIS API Documentation  3.27.0-Master (597e8eebd4)
qgsalgorithmjoinbynearest.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  qgsalgorithmjoinbynearest.cpp
3  ---------------------
4  begin : April 2017
5  copyright : (C) 2017 by Nyall Dawson
6  email : nyall dot dawson at gmail dot com
7  ***************************************************************************/
8 
9 /***************************************************************************
10  * *
11  * This program is free software; you can redistribute it and/or modify *
12  * it under the terms of the GNU General Public License as published by *
13  * the Free Software Foundation; either version 2 of the License, or *
14  * (at your option) any later version. *
15  * *
16  ***************************************************************************/
17 
19 #include "qgsprocessingoutputs.h"
20 #include "qgslinestring.h"
21 
22 #include <algorithm>
23 
25 
26 QString QgsJoinByNearestAlgorithm::name() const
27 {
28  return QStringLiteral( "joinbynearest" );
29 }
30 
31 QString QgsJoinByNearestAlgorithm::displayName() const
32 {
33  return QObject::tr( "Join attributes by nearest" );
34 }
35 
36 QStringList QgsJoinByNearestAlgorithm::tags() const
37 {
38  return QObject::tr( "join,connect,attributes,values,fields,tables,proximity,closest,neighbour,neighbor,n-nearest,distance" ).split( ',' );
39 }
40 
41 QString QgsJoinByNearestAlgorithm::group() const
42 {
43  return QObject::tr( "Vector general" );
44 }
45 
46 QString QgsJoinByNearestAlgorithm::groupId() const
47 {
48  return QStringLiteral( "vectorgeneral" );
49 }
50 
51 void QgsJoinByNearestAlgorithm::initAlgorithm( const QVariantMap & )
52 {
53  addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ),
54  QObject::tr( "Input layer" ) ) );
55  addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT_2" ),
56  QObject::tr( "Input layer 2" ) ) );
57 
58  addParameter( new QgsProcessingParameterField( QStringLiteral( "FIELDS_TO_COPY" ),
59  QObject::tr( "Layer 2 fields to copy (leave empty to copy all fields)" ),
60  QVariant(), QStringLiteral( "INPUT_2" ), QgsProcessingParameterField::Any,
61  true, true ) );
62 
63  addParameter( new QgsProcessingParameterBoolean( QStringLiteral( "DISCARD_NONMATCHING" ),
64  QObject::tr( "Discard records which could not be joined" ),
65  false ) );
66 
67  addParameter( new QgsProcessingParameterString( QStringLiteral( "PREFIX" ),
68  QObject::tr( "Joined field prefix" ), QVariant(), false, true ) );
69 
70  addParameter( new QgsProcessingParameterNumber( QStringLiteral( "NEIGHBORS" ),
71  QObject::tr( "Maximum nearest neighbors" ), QgsProcessingParameterNumber::Integer, 1, false, 1 ) );
72 
73  addParameter( new QgsProcessingParameterDistance( QStringLiteral( "MAX_DISTANCE" ),
74  QObject::tr( "Maximum distance" ), QVariant(), QStringLiteral( "INPUT" ), true, 0 ) );
75 
76  addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Joined layer" ), QgsProcessing::TypeVectorAnyGeometry, QVariant(), true, true ) );
77 
78  std::unique_ptr< QgsProcessingParameterFeatureSink > nonMatchingSink = std::make_unique< QgsProcessingParameterFeatureSink >(
79  QStringLiteral( "NON_MATCHING" ), QObject::tr( "Unjoinable features from first layer" ), QgsProcessing::TypeVectorAnyGeometry, QVariant(), true, false );
80  // TODO GUI doesn't support advanced outputs yet
81  //nonMatchingSink->setFlags(nonMatchingSink->flags() | QgsProcessingParameterDefinition::FlagAdvanced );
82  addParameter( nonMatchingSink.release() );
83 
84  addOutput( new QgsProcessingOutputNumber( QStringLiteral( "JOINED_COUNT" ), QObject::tr( "Number of joined features from input table" ) ) );
85  addOutput( new QgsProcessingOutputNumber( QStringLiteral( "UNJOINABLE_COUNT" ), QObject::tr( "Number of unjoinable features from input table" ) ) );
86 }
87 
88 QString QgsJoinByNearestAlgorithm::shortHelpString() const
89 {
90  return QObject::tr( "This algorithm takes an input vector layer and creates a new vector layer that is an extended version of the "
91  "input one, with additional attributes in its attribute table.\n\n"
92  "The additional attributes and their values are taken from a second vector layer, where features are joined "
93  "by finding the closest features from each layer. By default only the single nearest feature is joined,"
94  "but optionally the join can use the n-nearest neighboring features instead. If multiple features are found "
95  "with identical distances these will all be returned (even if the total number of features exceeds the specified "
96  "maximum feature count).\n\n"
97  "If a maximum distance is specified, then only features which are closer than this distance "
98  "will be matched.\n\n"
99  "The output features will contain the selected attributes from the nearest feature, "
100  "along with new attributes for the distance to the near feature, the index of the feature, "
101  "and the coordinates of the closest point on the input feature (feature_x, feature_y) "
102  "to the matched nearest feature, and the coordinates of the closet point on the matched feature "
103  "(nearest_x, nearest_y).\n\n"
104  "This algorithm uses purely Cartesian calculations for distance, and does not consider "
105  "geodetic or ellipsoid properties when determining feature proximity." );
106 }
107 
108 QString QgsJoinByNearestAlgorithm::shortDescription() const
109 {
110  return QObject::tr( "Joins a layer to another layer, using the closest features (nearest neighbors)." );
111 }
112 
113 QgsJoinByNearestAlgorithm *QgsJoinByNearestAlgorithm::createInstance() const
114 {
115  return new QgsJoinByNearestAlgorithm();
116 }
117 
118 QVariantMap QgsJoinByNearestAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
119 {
120  const int neighbors = parameterAsInt( parameters, QStringLiteral( "NEIGHBORS" ), context );
121  const bool discardNonMatching = parameterAsBoolean( parameters, QStringLiteral( "DISCARD_NONMATCHING" ), context );
122  const double maxDistance = parameters.value( QStringLiteral( "MAX_DISTANCE" ) ).isValid() ? parameterAsDouble( parameters, QStringLiteral( "MAX_DISTANCE" ), context ) : std::numeric_limits< double >::quiet_NaN();
123  std::unique_ptr< QgsProcessingFeatureSource > input( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) );
124  if ( !input )
125  throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );
126 
127  std::unique_ptr< QgsProcessingFeatureSource > input2( parameterAsSource( parameters, QStringLiteral( "INPUT_2" ), context ) );
128  if ( !input2 )
129  throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT_2" ) ) );
130 
131  const bool sameSourceAndTarget = parameters.value( QStringLiteral( "INPUT" ) ) == parameters.value( QStringLiteral( "INPUT_2" ) );
132 
133  const QString prefix = parameterAsString( parameters, QStringLiteral( "PREFIX" ), context );
134  const QStringList fieldsToCopy = parameterAsFields( parameters, QStringLiteral( "FIELDS_TO_COPY" ), context );
135 
136  QgsFields outFields2;
137  QgsAttributeList fields2Indices;
138  if ( fieldsToCopy.empty() )
139  {
140  outFields2 = input2->fields();
141  fields2Indices.reserve( outFields2.count() );
142  for ( int i = 0; i < outFields2.count(); ++i )
143  {
144  fields2Indices << i;
145  }
146  }
147  else
148  {
149  fields2Indices.reserve( fieldsToCopy.count() );
150  for ( const QString &field : fieldsToCopy )
151  {
152  const int index = input2->fields().lookupField( field );
153  if ( index >= 0 )
154  {
155  fields2Indices << index;
156  outFields2.append( input2->fields().at( index ) );
157  }
158  }
159  }
160 
161  if ( !prefix.isEmpty() )
162  {
163  for ( int i = 0; i < outFields2.count(); ++i )
164  {
165  outFields2.rename( i, prefix + outFields2[ i ].name() );
166  }
167  }
168 
169  const QgsAttributeList fields2Fetch = fields2Indices;
170 
171  QgsFields outFields = QgsProcessingUtils::combineFields( input->fields(), outFields2 );
172 
173  QgsFields resultFields;
174  resultFields.append( QgsField( QStringLiteral( "n" ), QVariant::Int ) );
175  resultFields.append( QgsField( QStringLiteral( "distance" ), QVariant::Double ) );
176  resultFields.append( QgsField( QStringLiteral( "feature_x" ), QVariant::Double ) );
177  resultFields.append( QgsField( QStringLiteral( "feature_y" ), QVariant::Double ) );
178  resultFields.append( QgsField( QStringLiteral( "nearest_x" ), QVariant::Double ) );
179  resultFields.append( QgsField( QStringLiteral( "nearest_y" ), QVariant::Double ) );
180  outFields = QgsProcessingUtils::combineFields( outFields, resultFields );
181 
182  QString dest;
183  std::unique_ptr< QgsFeatureSink > sink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, dest, outFields,
184  input->wkbType(), input->sourceCrs(), QgsFeatureSink::RegeneratePrimaryKey ) );
185  if ( parameters.value( QStringLiteral( "OUTPUT" ) ).isValid() && !sink )
186  throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) );
187 
188  QString destNonMatching1;
189  std::unique_ptr< QgsFeatureSink > sinkNonMatching1( parameterAsSink( parameters, QStringLiteral( "NON_MATCHING" ), context, destNonMatching1, input->fields(),
190  input->wkbType(), input->sourceCrs(), QgsFeatureSink::RegeneratePrimaryKey ) );
191  if ( parameters.value( QStringLiteral( "NON_MATCHING" ) ).isValid() && !sinkNonMatching1 )
192  throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "NON_MATCHING" ) ) );
193 
194  // make spatial index
195  const QgsFeatureIterator f2 = input2->getFeatures( QgsFeatureRequest().setDestinationCrs( input->sourceCrs(), context.transformContext() ).setSubsetOfAttributes( fields2Fetch ) );
196  QHash< QgsFeatureId, QgsAttributes > input2AttributeCache;
197  double step = input2->featureCount() > 0 ? 50.0 / input2->featureCount() : 1;
198  int i = 0;
199  const QgsSpatialIndex index( f2, [&]( const QgsFeature & f )->bool
200  {
201  i++;
202  if ( feedback->isCanceled() )
203  return false;
204 
205  feedback->setProgress( i * step );
206 
207  if ( !f.hasGeometry() )
208  return true;
209 
210  // only keep selected attributes
211  QgsAttributes attributes;
212  for ( int j = 0; j < f.attributes().count(); ++j )
213  {
214  if ( ! fields2Indices.contains( j ) )
215  continue;
216  attributes << f.attribute( j );
217  }
218  input2AttributeCache.insert( f.id(), attributes );
219 
220  return true;
222 
223  QgsFeature f;
224 
225  // create extra null attributes for non-matched records (the +2 is for the "n" and "distance", and start/end x/y fields)
226  QgsAttributes nullMatch;
227  nullMatch.reserve( fields2Indices.size() + 6 );
228  for ( int i = 0; i < fields2Indices.count() + 6; ++i )
229  nullMatch << QVariant();
230 
231  long long joinedCount = 0;
232  long long unjoinedCount = 0;
233 
234  // Create output vector layer with additional attributes
235  step = input->featureCount() > 0 ? 50.0 / input->featureCount() : 1;
236  QgsFeatureIterator features = input->getFeatures();
237  i = 0;
238  while ( features.nextFeature( f ) )
239  {
240  i++;
241  if ( feedback->isCanceled() )
242  {
243  break;
244  }
245 
246  feedback->setProgress( 50 + i * step );
247 
248  if ( !f.hasGeometry() )
249  {
250  unjoinedCount++;
251  if ( sinkNonMatching1 )
252  {
253  if ( !sinkNonMatching1->addFeature( f, QgsFeatureSink::FastInsert ) )
254  throw QgsProcessingException( writeFeatureError( sinkNonMatching1.get(), parameters, QStringLiteral( "NON_MATCHING" ) ) );
255  }
256  if ( sink && !discardNonMatching )
257  {
258  QgsAttributes attr = f.attributes();
259  attr.append( nullMatch );
260  f.setAttributes( attr );
261  if ( !sink->addFeature( f, QgsFeatureSink::FastInsert ) )
262  throw QgsProcessingException( writeFeatureError( sink.get(), parameters, QStringLiteral( "OUTPUT" ) ) );
263  }
264  }
265  else
266  {
267  // note - if using same source as target, we have to get one extra neighbor, since the first match will be the input feature
268 
269  // if the user didn't specify a distance (isnan), then use 0 for nearestNeighbor() parameter
270  // if the user specified 0 exactly, then use the smallest positive double value instead
271  const double searchDistance = std::isnan( maxDistance ) ? 0 : std::max( std::numeric_limits<double>::min(), maxDistance );
272  const QList< QgsFeatureId > nearest = index.nearestNeighbor( f.geometry(), neighbors + ( sameSourceAndTarget ? 1 : 0 ), searchDistance );
273 
274  if ( nearest.count() > neighbors + ( sameSourceAndTarget ? 1 : 0 ) )
275  {
276  feedback->pushInfo( QObject::tr( "Multiple matching features found at same distance from search feature, found %n feature(s) instead of %1", nullptr, nearest.count() - ( sameSourceAndTarget ? 1 : 0 ) ).arg( neighbors ) );
277  }
278  QgsFeature out;
279  out.setGeometry( f.geometry() );
280  int j = 0;
281  for ( const QgsFeatureId id : nearest )
282  {
283  if ( sameSourceAndTarget && id == f.id() )
284  continue; // don't match to same feature if using a single input table
285  j++;
286  if ( sink )
287  {
288  QgsAttributes attr = f.attributes();
289  attr.append( input2AttributeCache.value( id ) );
290  attr.append( j );
291 
292  const QgsGeometry closestLine = f.geometry().shortestLine( index.geometry( id ) );
293  if ( const QgsLineString *line = qgsgeometry_cast< const QgsLineString *>( closestLine.constGet() ) )
294  {
295  attr.append( line->length() );
296  attr.append( line->startPoint().x() );
297  attr.append( line->startPoint().y() );
298  attr.append( line->endPoint().x() );
299  attr.append( line->endPoint().y() );
300  }
301  else
302  {
303  attr.append( QVariant() ); //distance
304  attr.append( QVariant() ); //start x
305  attr.append( QVariant() ); //start y
306  attr.append( QVariant() ); //end x
307  attr.append( QVariant() ); //end y
308  }
309  out.setAttributes( attr );
310  if ( !sink->addFeature( out, QgsFeatureSink::FastInsert ) )
311  throw QgsProcessingException( writeFeatureError( sink.get(), parameters, QStringLiteral( "OUTPUT" ) ) );
312  }
313  }
314  if ( j > 0 )
315  joinedCount++;
316  else
317  {
318  if ( sinkNonMatching1 )
319  {
320  if ( !sinkNonMatching1->addFeature( f, QgsFeatureSink::FastInsert ) )
321  throw QgsProcessingException( writeFeatureError( sinkNonMatching1.get(), parameters, QStringLiteral( "NON_MATCHING" ) ) );
322  }
323  if ( !discardNonMatching && sink )
324  {
325  QgsAttributes attr = f.attributes();
326  attr.append( nullMatch );
327  f.setAttributes( attr );
328  if ( !sink->addFeature( f, QgsFeatureSink::FastInsert ) )
329  throw QgsProcessingException( writeFeatureError( sink.get(), parameters, QStringLiteral( "OUTPUT" ) ) );
330  }
331  unjoinedCount++;
332  }
333  }
334  }
335 
336  QVariantMap outputs;
337  outputs.insert( QStringLiteral( "JOINED_COUNT" ), joinedCount );
338  outputs.insert( QStringLiteral( "UNJOINABLE_COUNT" ), unjoinedCount );
339  if ( sink )
340  outputs.insert( QStringLiteral( "OUTPUT" ), dest );
341  if ( sinkNonMatching1 )
342  outputs.insert( QStringLiteral( "NON_MATCHING" ), destNonMatching1 );
343  return outputs;
344 }
345 
346 
A vector of attributes.
Definition: qgsattributes.h:58
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
This class wraps a request for features to a vector layer (or directly its vector data provider).
QgsFeatureRequest & setSubsetOfAttributes(const QgsAttributeList &attrs)
Set a subset of attributes that will be fetched.
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
@ RegeneratePrimaryKey
This flag indicates, that a primary key field cannot be guaranteed to be unique and the sink should i...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
Definition: qgsfeature.h:56
QgsAttributes attributes
Definition: qgsfeature.h:65
void setAttributes(const QgsAttributes &attrs)
Sets the feature's attributes.
Definition: qgsfeature.cpp:153
bool hasGeometry() const
Returns true if the feature has an associated geometry.
Definition: qgsfeature.cpp:223
QVariant attribute(const QString &name) const
Lookup attribute value by attribute name.
Definition: qgsfeature.cpp:320
void setGeometry(const QgsGeometry &geometry)
Set the feature's geometry.
Definition: qgsfeature.cpp:163
Q_GADGET QgsFeatureId id
Definition: qgsfeature.h:64
bool isCanceled() const SIP_HOLDGIL
Tells whether the operation has been canceled already.
Definition: qgsfeedback.h:54
void setProgress(double progress)
Sets the current progress for the feedback object.
Definition: qgsfeedback.h:63
Encapsulate a field in an attribute table or data source.
Definition: qgsfield.h:51
Container of fields for a vector layer.
Definition: qgsfields.h:45
bool append(const QgsField &field, FieldOrigin origin=OriginProvider, int originIndex=-1)
Appends a field. The field must have unique name, otherwise it is rejected (returns false)
Definition: qgsfields.cpp:59
int count() const
Returns number of items.
Definition: qgsfields.cpp:133
int lookupField(const QString &fieldName) const
Looks up field's index from the field name.
Definition: qgsfields.cpp:349
bool rename(int fieldIdx, const QString &name)
Renames a name of field.
Definition: qgsfields.cpp:72
A geometry is the spatial representation of a feature.
Definition: qgsgeometry.h:125
const QgsAbstractGeometry * constGet() const SIP_HOLDGIL
Returns a non-modifiable (const) reference to the underlying abstract geometry primitive.
QgsGeometry shortestLine(const QgsGeometry &other) const
Returns the shortest line joining this geometry to another geometry.
Line string geometry type, with support for z-dimension and m-values.
Definition: qgslinestring.h:45
Contains information about the context in which a processing algorithm is executed.
QgsCoordinateTransformContext transformContext() const
Returns the coordinate transform context.
Custom exception class for processing related exceptions.
Definition: qgsexception.h:83
Base class for providing feedback from a processing algorithm.
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
A numeric output for processing algorithms.
A boolean parameter for processing algorithms.
A double numeric parameter for distance values.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A vector layer or feature source field parameter for processing algorithms.
A numeric parameter for processing algorithms.
A string parameter for processing algorithms.
static QgsFields combineFields(const QgsFields &fieldsA, const QgsFields &fieldsB, const QString &fieldsBPrefix=QString())
Combines two field lists, avoiding duplicate field names (in a case-insensitive manner).
@ TypeVectorAnyGeometry
Any vector layer with geometry.
Definition: qgsprocessing.h:48
A spatial index for QgsFeature objects.
@ FlagStoreFeatureGeometries
Indicates that the spatial index should also store feature geometries. This requires more memory,...
qint64 QgsFeatureId
64 bit feature ids negative numbers are used for uncommitted/newly added features
Definition: qgsfeatureid.h:28
QList< int > QgsAttributeList
Definition: qgsfield.h:26
const QgsField & field
Definition: qgsfield.h:463