QGIS API Documentation 4.1.0-Master (ca2ac17535b)
Loading...
Searching...
No Matches
qgsalgorithmrandomextract.cpp
Go to the documentation of this file.
1/***************************************************************************
2 qgsalgorithmrandomextract.cpp
3 ---------------------
4 begin : December 2019
5 copyright : (C) 2019 by Alexander Bruy
6 email : alexander dot bruy at gmail dot com
7 ***************************************************************************/
8
9/***************************************************************************
10 * *
11 * This program is free software; you can redistribute it and/or modify *
12 * it under the terms of the GNU General Public License as published by *
13 * the Free Software Foundation; either version 2 of the License, or *
14 * (at your option) any later version. *
15 * *
16 ***************************************************************************/
17
19
20#include <random>
21
22#include "qgsvectorlayer.h"
23
24#include <QString>
25
26using namespace Qt::StringLiterals;
27
29
30void QgsRandomExtractSelectAlgorithmBase::sampleFeatureIds( QgsFeatureSource *source, const long long count, QgsProcessingFeedback *feedback )
31{
32 // Build a list of all feature ids
33 QgsFeatureIterator fit = source->getFeatures( QgsFeatureRequest().setFlags( Qgis::FeatureRequestFlag::NoGeometry ).setNoAttributes() );
34 std::vector<QgsFeatureId> allFeats;
35 allFeats.reserve( count );
36 QgsFeature f;
37 feedback->pushInfo( QObject::tr( "Building list of all features..." ) );
38 while ( fit.nextFeature( f ) )
39 {
40 if ( feedback->isCanceled() )
41 return;
42 allFeats.push_back( f.id() );
43 }
44
45 // initialize random engine
46 std::random_device randomDevice;
47 std::mt19937 mersenneTwister( randomDevice() );
48 std::uniform_int_distribution<size_t> fidsDistribution;
49
50 // If the number of features to select is greater than half the total number of features
51 // we will instead randomly select features to *exclude* from the output layer
52 const std::size_t actualFeatureCount = allFeats.size();
53 std::size_t shuffledFeatureCount = count;
54 bool invertSelection = static_cast<std::size_t>( count ) > actualFeatureCount / 2;
55 if ( invertSelection )
56 shuffledFeatureCount = actualFeatureCount - count;
57
58 std::size_t nb = actualFeatureCount;
59
60 // Shuffle <number> features at the start of the iterator
61 feedback->pushInfo( QObject::tr( "Randomly selecting %1 features" ).arg( count ) );
62 auto cursor = allFeats.begin();
63 using difference_type = std::vector<QgsFeatureId>::difference_type;
64 while ( shuffledFeatureCount-- )
65 {
66 if ( feedback->isCanceled() )
67 return;
68
69 // Update the distribution to match the number of unshuffled features
70 fidsDistribution.param( std::uniform_int_distribution<size_t>::param_type( 0, nb - 1 ) );
71 // Swap the current feature with a random one
72 std::swap( *cursor, *( cursor + static_cast<difference_type>( fidsDistribution( mersenneTwister ) ) ) );
73 // Move the cursor to the next feature
74 ++cursor;
75
76 // Decrement the number of unshuffled features
77 --nb;
78 }
79
80 // Insert the selected features into a QgsFeatureIds set
81 if ( invertSelection )
82 for ( auto it = cursor; it != allFeats.end(); ++it )
83 mSelectedFeatureIds.insert( *it );
84 else
85 for ( auto it = allFeats.begin(); it != cursor; ++it )
86 mSelectedFeatureIds.insert( *it );
87}
88
89QString QgsRandomExtractSelectAlgorithmBase::group() const
90{
91 return QObject::tr( "Vector selection" );
92}
93
94QString QgsRandomExtractSelectAlgorithmBase::groupId() const
95{
96 return u"vectorselection"_s;
97}
98
99// Random extract algorithm
100
101QString QgsRandomExtractAlgorithm::name() const
102{
103 return u"randomextract"_s;
104}
105
106QString QgsRandomExtractAlgorithm::displayName() const
107{
108 return QObject::tr( "Random extract" );
109}
110
111QStringList QgsRandomExtractAlgorithm::tags() const
112{
113 return QObject::tr( "extract,filter,random,number,percentage" ).split( ',' );
114}
115
116QString QgsRandomExtractAlgorithm::shortDescription() const
117{
118 return QObject::tr( "Generates a vector layer that contains only a random subset of the features in an input layer." );
119}
120
121QString QgsRandomExtractAlgorithm::shortHelpString() const
122{
123 return QObject::tr(
124 "This algorithm takes a vector layer and generates a new one that contains only a subset "
125 "of the features in the input layer.\n\n"
126 "The subset is defined randomly, using a percentage or count value to define the total number "
127 "of features in the subset."
128 );
129}
130
131Qgis::ProcessingAlgorithmDocumentationFlags QgsRandomExtractAlgorithm::documentationFlags() const
132{
134}
135
136QgsRandomExtractAlgorithm *QgsRandomExtractAlgorithm::createInstance() const
137{
138 return new QgsRandomExtractAlgorithm();
139}
140
141void QgsRandomExtractAlgorithm::initAlgorithm( const QVariantMap & )
142{
143 addParameter( new QgsProcessingParameterFeatureSource( u"INPUT"_s, QObject::tr( "Input layer" ), QList<int>() << static_cast<int>( Qgis::ProcessingSourceType::Vector ) ) );
144 addParameter( new QgsProcessingParameterEnum( u"METHOD"_s, QObject::tr( "Method" ), QStringList() << QObject::tr( "Number of features" ) << QObject::tr( "Percentage of features" ), false, 0 ) );
145 addParameter( new QgsProcessingParameterNumber( u"NUMBER"_s, QObject::tr( "Number/percentage of features" ), Qgis::ProcessingNumberParameterType::Double, 10, false, 0 ) );
146
147 addParameter( new QgsProcessingParameterFeatureSink( u"OUTPUT"_s, QObject::tr( "Extracted (random)" ) ) );
148}
149
150QVariantMap QgsRandomExtractAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
151{
152 std::unique_ptr<QgsProcessingFeatureSource> source( parameterAsSource( parameters, u"INPUT"_s, context ) );
153 if ( !source )
154 throw QgsProcessingException( invalidSourceError( parameters, u"INPUT"_s ) );
155
156 QString dest;
157 std::unique_ptr<QgsFeatureSink> sink( parameterAsSink( parameters, u"OUTPUT"_s, context, dest, source->fields(), source->wkbType(), source->sourceCrs(), QgsFeatureSink::RegeneratePrimaryKey ) );
158 if ( !sink )
159 throw QgsProcessingException( invalidSinkError( parameters, u"OUTPUT"_s ) );
160
161 const int method = parameterAsEnum( parameters, u"METHOD"_s, context );
162 double number = parameterAsDouble( parameters, u"NUMBER"_s, context );
163 const long long count = source->featureCount();
164
165 if ( method == 0 )
166 {
167 // number of features
168 if ( number > static_cast<double>( count ) )
169 throw QgsProcessingException( QObject::tr( "Selected number is greater than feature count. Choose a lower value and try again." ) );
170 }
171 else
172 {
173 // percentage of features
174 if ( number > 100 )
175 throw QgsProcessingException( QObject::tr( "Percentage can't be greater than 100. Choose a lower value and try again." ) );
176
177 number = std::ceil( number * static_cast<double>( count ) / 100 );
178 }
179
180 sampleFeatureIds( source.get(), static_cast<long long>( number ), feedback );
181
182 feedback->pushInfo( QObject::tr( "Adding selected features" ) );
183 QgsFeature f;
185 while ( fit.nextFeature( f ) )
186 {
187 if ( feedback->isCanceled() )
188 return QVariantMap();
189
190 if ( !sink->addFeature( f, QgsFeatureSink::FastInsert ) )
191 throw QgsProcessingException( writeFeatureError( sink.get(), parameters, u"OUTPUT"_s ) );
192 else
193 feedback->featureAddedToSink( u"OUTPUT"_s );
194 }
195
196 sink->finalize();
197 feedback->featureSinkFinalized( u"OUTPUT"_s );
198
199 QVariantMap outputs;
200 outputs.insert( u"OUTPUT"_s, dest );
201 return outputs;
202}
203
204// Random selection algorithm
205
206QString QgsRandomSelectionAlgorithm::name() const
207{
208 return u"randomselection"_s;
209}
210
211QString QgsRandomSelectionAlgorithm::displayName() const
212{
213 return QObject::tr( "Random selection" );
214}
215
216QStringList QgsRandomSelectionAlgorithm::tags() const
217{
218 return QObject::tr( "select,random,number,percentage" ).split( ',' );
219}
220
221QString QgsRandomSelectionAlgorithm::shortDescription() const
222{
223 return QObject::tr( "Randomly selects features from a vector layer." );
224}
225
226QString QgsRandomSelectionAlgorithm::shortHelpString() const
227{
228 return QObject::tr(
229 "This algorithm takes a vector layer and selects a subset of its features. "
230 "No new layer is generated by this algorithm.\n\n"
231 "The subset is defined randomly, using a percentage or count value to define "
232 "the total number of features in the subset."
233 );
234}
235
236QgsRandomSelectionAlgorithm *QgsRandomSelectionAlgorithm::createInstance() const
237{
238 return new QgsRandomSelectionAlgorithm();
239}
240
241void QgsRandomSelectionAlgorithm::initAlgorithm( const QVariantMap & )
242{
243 addParameter( new QgsProcessingParameterVectorLayer( u"INPUT"_s, QObject::tr( "Input layer" ), QList<int>() << static_cast<int>( Qgis::ProcessingSourceType::Vector ) ) );
244 addParameter( new QgsProcessingParameterEnum( u"METHOD"_s, QObject::tr( "Method" ), QStringList() << QObject::tr( "Number of features" ) << QObject::tr( "Percentage of features" ), false, 0 ) );
245 addParameter( new QgsProcessingParameterNumber( u"NUMBER"_s, QObject::tr( "Number/percentage of features" ), Qgis::ProcessingNumberParameterType::Double, 10, false, 0 ) );
246
247 addOutput( new QgsProcessingOutputVectorLayer( u"OUTPUT"_s, QObject::tr( "Selected (random)" ) ) );
248}
249
250QVariantMap QgsRandomSelectionAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
251{
252 mInput = parameters.value( u"INPUT"_s );
253 mTargetLayer = parameterAsVectorLayer( parameters, u"INPUT"_s, context );
254
255 if ( !mTargetLayer )
256 throw QgsProcessingException( QObject::tr( "Could not load source layer for INPUT." ) );
257
258 const int method = parameterAsEnum( parameters, u"METHOD"_s, context );
259 double number = parameterAsDouble( parameters, u"NUMBER"_s, context );
260 const long long count = mTargetLayer->featureCount();
261
262 if ( method == 0 )
263 {
264 // number of features
265 if ( number > static_cast<double>( count ) )
266 throw QgsProcessingException( QObject::tr( "Selected number is greater than feature count. Choose a lower value and try again." ) );
267 }
268 else
269 {
270 // percentage of features
271 if ( number > 100 )
272 throw QgsProcessingException( QObject::tr( "Percentage can't be greater than 100. Choose a lower value and try again." ) );
273
274 number = std::ceil( number * static_cast<double>( count ) / 100 );
275 }
276
277 // Insert the selected features into a QgsFeatureIds set
278 sampleFeatureIds( mTargetLayer, static_cast<long long>( number ), feedback );
279
280 return QVariantMap();
281}
282
283QVariantMap QgsRandomSelectionAlgorithm::postProcessAlgorithm( QgsProcessingContext &, QgsProcessingFeedback * )
284{
285 mTargetLayer->selectByIds( mSelectedFeatureIds );
286
287 QVariantMap outputs;
288 outputs.insert( u"OUTPUT"_s, mInput );
289 return outputs;
290}
291
@ Vector
Tables (i.e. vector layers with or without geometry). When used for a sink this indicates the sink ha...
Definition qgis.h:3720
@ NoGeometry
Geometry is not required. It may still be returned if e.g. required for a filter condition.
Definition qgis.h:2329
@ RegeneratesPrimaryKey
Algorithm always drops any existing primary keys or FID values and regenerates them in outputs.
Definition qgis.h:3801
QFlags< ProcessingAlgorithmDocumentationFlag > ProcessingAlgorithmDocumentationFlags
Flags describing algorithm behavior for documentation purposes.
Definition qgis.h:3812
@ SkipGeometryValidityChecks
Invalid geometry checks should always be skipped. This flag can be useful for algorithms which always...
Definition qgis.h:3895
@ Double
Double/float values.
Definition qgis.h:3988
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
Fetch next feature and stores in f, returns true on success.
Wraps a request for features to a vector layer (or directly its vector data provider).
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
@ RegeneratePrimaryKey
This flag indicates, that a primary key field cannot be guaranteed to be unique and the sink should i...
An interface for objects which provide features via a getFeatures method.
virtual QgsFields fields() const =0
Returns the fields associated with features in the source.
virtual QgsCoordinateReferenceSystem sourceCrs() const =0
Returns the coordinate reference system for features in the source.
virtual Qgis::WkbType wkbType() const =0
Returns the geometry type for features returned by this source.
virtual QgsFeatureIterator getFeatures(const QgsFeatureRequest &request=QgsFeatureRequest()) const =0
Returns an iterator for the features in the source.
virtual long long featureCount() const =0
Returns the number of features contained in the source, or -1 if the feature count is unknown.
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
Definition qgsfeature.h:60
QgsFeatureId id
Definition qgsfeature.h:63
bool isCanceled() const
Tells whether the operation has been canceled already.
Definition qgsfeedback.h:56
Contains information about the context in which a processing algorithm is executed.
Custom exception class for processing related exceptions.
Base class for providing feedback from a processing algorithm.
void featureAddedToSink(const QString &output)
Reports that a feature was added to the the sink associated with the specified algorithm output.
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
void featureSinkFinalized(const QString &output)
Reports that a feature sink has been finalized.
A vector layer output for processing algorithms.
An enum based parameter for processing algorithms, allowing for selection from predefined values.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A numeric parameter for processing algorithms.
A vector layer (with or without geometry) parameter for processing algorithms.