QGIS API Documentation 4.1.0-Master (376402f9aeb)
Loading...
Searching...
No Matches
qgsalgorithmrandomextractwithinsubsets.cpp
Go to the documentation of this file.
1/***************************************************************************
2 qgsalgorithmrandomextractwithinsubsets.h
3 ---------------------
4 begin : January 2026
5 copyright : (C) 2026 by Alexander Bruy
6 email : alexander dot bruy at gmail dot com
7 ***************************************************************************/
8
9/***************************************************************************
10 * *
11 * This program is free software; you can redistribute it and/or modify *
12 * it under the terms of the GNU General Public License as published by *
13 * the Free Software Foundation; either version 2 of the License, or *
14 * (at your option) any later version. *
15 * *
16 ***************************************************************************/
17
19
20#include <random>
21
22#include "qgsvectorlayer.h"
23
24#include <QString>
25
26using namespace Qt::StringLiterals;
27
29
30void QgsRandomExtractWithinSubsetsAlgorithmBase::sampleFeatureIds( QgsFeatureSource *source, const double value, const QString &fieldName, QgsProcessingFeedback *feedback )
31{
32 const int fieldIndex = source->fields().lookupField( fieldName );
33 if ( fieldIndex < 0 )
34 {
35 throw QgsProcessingException( QObject::tr( "Missing field '%1' in input layer" ).arg( fieldName ) );
36 }
37
38 QHash<QVariant, QgsFeatureIds> idsHash;
39
40 // Group IDs by attribute
41 QgsFeatureRequest request;
43 request.setSubsetOfAttributes( { fieldIndex } );
44 QgsFeatureIterator it = source->getFeatures( request );
45 double step = source->featureCount() > 0 ? 50.0 / static_cast<double>( source->featureCount() ) : 1;
46 long long i = 0;
47
48 QgsFeature f;
49 feedback->pushInfo( QObject::tr( "Building features subsets…" ) );
50 while ( it.nextFeature( f ) )
51 {
52 if ( feedback->isCanceled() )
53 {
54 return;
55 }
56 idsHash[f.attribute( fieldIndex )].insert( f.id() );
57
58 i++;
59 feedback->setProgress( static_cast<double>( i ) * step );
60 }
61
62 // initialize random engine
63 std::random_device randomDevice;
64 std::mt19937 mersenneTwister( randomDevice() );
65 std::uniform_int_distribution<std::size_t> fidsDistribution;
66
67 feedback->pushInfo( QObject::tr( "Randomly selecting features within subsets…" ) );
68 i = 0;
69 step = !idsHash.isEmpty() ? 50.0 / static_cast<double>( idsHash.size() ) : 1;
70
71 for ( auto hashIt = idsHash.constBegin(); hashIt != idsHash.constEnd(); ++hashIt )
72 {
73 if ( feedback->isCanceled() )
74 {
75 break;
76 }
77
78 const QgsFeatureIds &subsetIds = hashIt.value();
79 const long long total = subsetIds.size();
80 const long long count = mMethod == 0 ? static_cast<long long>( value ) : static_cast<long long>( std::ceil( static_cast<double>( total ) * value / 100 ) );
81
82 if ( count >= total )
83 {
84 feedback->reportError( QObject::tr( "Subset '%1' is smaller than requested number of features." ).arg( hashIt.key().toString() ) );
85 mSelectedFeatureIds.unite( subsetIds );
86 }
87 else
88 {
89 std::vector<QgsFeatureId> allSubsetIds( subsetIds.begin(), subsetIds.end() );
90 bool invertSelection = count > total / 2;
91 long long shuffledFeatureCount = invertSelection ? total - count : count;
92 std::size_t nb = allSubsetIds.size();
93
94 using difference_type = std::vector<QgsFeatureId>::difference_type;
95 auto cursor = allSubsetIds.begin();
96 for ( long long j = 0; j < shuffledFeatureCount; ++j )
97 {
98 if ( feedback->isCanceled() )
99 {
100 return;
101 }
102
103 fidsDistribution.param( std::uniform_int_distribution<std::size_t>::param_type( 0, nb - 1 ) );
104 std::swap( *cursor, *( cursor + static_cast<difference_type>( fidsDistribution( mersenneTwister ) ) ) );
105 ++cursor;
106 --nb;
107 }
108
109 if ( invertSelection )
110 {
111 for ( auto selectIt = cursor; selectIt != allSubsetIds.end(); ++selectIt )
112 {
113 mSelectedFeatureIds.insert( *selectIt );
114 }
115 }
116 else
117 {
118 for ( auto selectIt = allSubsetIds.begin(); selectIt != cursor; ++selectIt )
119 {
120 mSelectedFeatureIds.insert( *selectIt );
121 }
122 }
123 }
124
125 i++;
126 feedback->setProgress( 50.0 + ( static_cast<double>( i ) * step ) );
127 }
128}
129
130QString QgsRandomExtractWithinSubsetsAlgorithmBase::group() const
131{
132 return QObject::tr( "Vector selection" );
133}
134
135QString QgsRandomExtractWithinSubsetsAlgorithmBase::groupId() const
136{
137 return u"vectorselection"_s;
138}
139
140// Random extract within subsets algorithm
141
142QString QgsRandomExtractWithinSubsetsAlgorithm::name() const
143{
144 return u"randomextractwithinsubsets"_s;
145}
146
147QString QgsRandomExtractWithinSubsetsAlgorithm::displayName() const
148{
149 return QObject::tr( "Random extract within subsets" );
150}
151
152QStringList QgsRandomExtractWithinSubsetsAlgorithm::tags() const
153{
154 return QObject::tr( "extract,filter,random,number,percentage,subset" ).split( ',' );
155}
156
157QString QgsRandomExtractWithinSubsetsAlgorithm::shortDescription() const
158{
159 return QObject::tr( "Generates a new vector layer that contains only a subset of the features in the input layer." );
160}
161
162QString QgsRandomExtractWithinSubsetsAlgorithm::shortHelpString() const
163{
164 return QObject::tr(
165 "This algorithm takes a vector layer and generates a new one that "
166 "contains only a subset of the features in the input layer.\n\n"
167 "The subset is defined randomly, using a percentage or count value "
168 "to define the total number of features in the subset.\n\n"
169 "The percentage/count value is not applied to the whole layer, but "
170 "instead to each category. Categories are defined according to a "
171 "given attribute, which is also specified as an input parameter "
172 "for the algorithm."
173 );
174}
175
176Qgis::ProcessingAlgorithmDocumentationFlags QgsRandomExtractWithinSubsetsAlgorithm::documentationFlags() const
177{
179}
180
181QgsRandomExtractWithinSubsetsAlgorithm *QgsRandomExtractWithinSubsetsAlgorithm::createInstance() const
182{
183 return new QgsRandomExtractWithinSubsetsAlgorithm();
184}
185
186void QgsRandomExtractWithinSubsetsAlgorithm::initAlgorithm( const QVariantMap & )
187{
188 addParameter( new QgsProcessingParameterFeatureSource( u"INPUT"_s, QObject::tr( "Input layer" ), QList<int>() << static_cast<int>( Qgis::ProcessingSourceType::Vector ) ) );
189 addParameter( new QgsProcessingParameterField( u"FIELD"_s, QObject::tr( "ID field" ), QVariant(), u"INPUT"_s ) );
190 addParameter( new QgsProcessingParameterEnum( u"METHOD"_s, QObject::tr( "Method" ), QStringList() << QObject::tr( "Number of features" ) << QObject::tr( "Percentage of features" ), false, 0 ) );
191 addParameter( new QgsProcessingParameterNumber( u"NUMBER"_s, QObject::tr( "Number/percentage of features" ), Qgis::ProcessingNumberParameterType::Double, 10, false, 0 ) );
192
193 addParameter( new QgsProcessingParameterFeatureSink( u"OUTPUT"_s, QObject::tr( "Extracted (random stratified)" ) ) );
194}
195
196QVariantMap QgsRandomExtractWithinSubsetsAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
197{
198 std::unique_ptr<QgsProcessingFeatureSource> source( parameterAsSource( parameters, u"INPUT"_s, context ) );
199 if ( !source )
200 throw QgsProcessingException( invalidSourceError( parameters, u"INPUT"_s ) );
201
202 const QString fieldName = parameterAsString( parameters, u"FIELD"_s, context );
203 mMethod = parameterAsEnum( parameters, u"METHOD"_s, context );
204 const double number = parameterAsDouble( parameters, u"NUMBER"_s, context );
205
206 QString dest;
207 std::unique_ptr<QgsFeatureSink> sink( parameterAsSink( parameters, u"OUTPUT"_s, context, dest, source->fields(), source->wkbType(), source->sourceCrs(), QgsFeatureSink::RegeneratePrimaryKey ) );
208 if ( !sink )
209 throw QgsProcessingException( invalidSinkError( parameters, u"OUTPUT"_s ) );
210
211 const long long count = source->featureCount() > 0 ? source->featureCount() : 0;
212
213 if ( mMethod == 0 )
214 {
215 // number of features
216 if ( number > static_cast<double>( count ) )
217 throw QgsProcessingException( QObject::tr( "Selected number is greater than feature count. Choose a lower value and try again." ) );
218 }
219 else
220 {
221 // percentage of features
222 if ( number > 100 )
223 throw QgsProcessingException( QObject::tr( "Percentage can't be greater than 100. Choose a lower value and try again." ) );
224 }
225
226 sampleFeatureIds( source.get(), number, fieldName, feedback );
227
228 feedback->pushInfo( QObject::tr( "Adding selected features" ) );
229 QgsFeature f;
231 double step = mSelectedFeatureIds.size() > 0 ? 100.0 / static_cast<double>( mSelectedFeatureIds.size() ) : 1;
232 long long i = 0;
233 while ( fit.nextFeature( f ) )
234 {
235 if ( feedback->isCanceled() )
236 return QVariantMap();
237
238 i++;
239 feedback->setProgress( static_cast<double>( i ) * step );
240
241 if ( !sink->addFeature( f, QgsFeatureSink::FastInsert ) )
242 throw QgsProcessingException( writeFeatureError( sink.get(), parameters, u"OUTPUT"_s ) );
243 }
244
245 sink->finalize();
246
247 QVariantMap outputs;
248 outputs.insert( u"OUTPUT"_s, dest );
249 return outputs;
250}
251
252// Random selection algorithm
253
254QString QgsRandomSelectionWithinSubsetsAlgorithm::name() const
255{
256 return u"randomselectionwithinsubsets"_s;
257}
258
259QString QgsRandomSelectionWithinSubsetsAlgorithm::displayName() const
260{
261 return QObject::tr( "Random selection within subsets" );
262}
263
264QStringList QgsRandomSelectionWithinSubsetsAlgorithm::tags() const
265{
266 return QObject::tr( "select,random,number,percentage,subset" ).split( ',' );
267}
268
269QString QgsRandomSelectionWithinSubsetsAlgorithm::shortDescription() const
270{
271 return QObject::tr( "Randomly selects features from a subset of a vector layer." );
272}
273
274QString QgsRandomSelectionWithinSubsetsAlgorithm::shortHelpString() const
275{
276 return QObject::tr(
277 "This algorithm takes a vector layer and selects a subset of its features. "
278 "No new layer is generated by this algorithm.\n\n"
279 "The subset is defined randomly, using a percentage or count value to define "
280 "the total number of features in the subset.\n\n"
281 "The percentage/count value is not applied to the whole layer, but instead to each category. "
282 "Categories are defined according to a given attribute, which is also specified "
283 "as an input parameter for the algorithm."
284 );
285}
286
287QgsRandomSelectionWithinSubsetsAlgorithm *QgsRandomSelectionWithinSubsetsAlgorithm::createInstance() const
288{
289 return new QgsRandomSelectionWithinSubsetsAlgorithm();
290}
291
292void QgsRandomSelectionWithinSubsetsAlgorithm::initAlgorithm( const QVariantMap & )
293{
294 addParameter( new QgsProcessingParameterVectorLayer( u"INPUT"_s, QObject::tr( "Input layer" ), QList<int>() << static_cast<int>( Qgis::ProcessingSourceType::Vector ) ) );
295 addParameter( new QgsProcessingParameterField( u"FIELD"_s, QObject::tr( "ID field" ), QVariant(), u"INPUT"_s ) );
296 addParameter( new QgsProcessingParameterEnum( u"METHOD"_s, QObject::tr( "Method" ), QStringList() << QObject::tr( "Number of features" ) << QObject::tr( "Percentage of features" ), false, 0 ) );
297 addParameter( new QgsProcessingParameterNumber( u"NUMBER"_s, QObject::tr( "Number/percentage of features" ), Qgis::ProcessingNumberParameterType::Double, 10, false, 0 ) );
298
299 addOutput( new QgsProcessingOutputVectorLayer( u"OUTPUT"_s, QObject::tr( "Selected (random)" ) ) );
300}
301
302QVariantMap QgsRandomSelectionWithinSubsetsAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
303{
304 mInput = parameters.value( u"INPUT"_s );
305 mTargetLayer = parameterAsVectorLayer( parameters, u"INPUT"_s, context );
306
307 if ( !mTargetLayer )
308 throw QgsProcessingException( QObject::tr( "Could not load source layer for INPUT." ) );
309
310 const QString fieldName = parameterAsString( parameters, u"FIELD"_s, context );
311 mMethod = parameterAsEnum( parameters, u"METHOD"_s, context );
312 const double number = parameterAsDouble( parameters, u"NUMBER"_s, context );
313 const long long count = mTargetLayer->featureCount() > 0 ? mTargetLayer->featureCount() : 0;
314
315 if ( mMethod == 0 )
316 {
317 // number of features
318 if ( number > static_cast<double>( count ) )
319 throw QgsProcessingException( QObject::tr( "Selected number is greater than feature count. Choose a lower value and try again." ) );
320 }
321 else
322 {
323 // percentage of features
324 if ( number > 100 )
325 throw QgsProcessingException( QObject::tr( "Percentage can't be greater than 100. Choose a lower value and try again." ) );
326 }
327
328 sampleFeatureIds( mTargetLayer, number, fieldName, feedback );
329
330 return QVariantMap();
331}
332
333QVariantMap QgsRandomSelectionWithinSubsetsAlgorithm::postProcessAlgorithm( QgsProcessingContext &, QgsProcessingFeedback * )
334{
335 mTargetLayer->selectByIds( mSelectedFeatureIds );
336
337 QVariantMap outputs;
338 outputs.insert( u"OUTPUT"_s, mInput );
339 return outputs;
340}
341
@ Vector
Tables (i.e. vector layers with or without geometry). When used for a sink this indicates the sink ha...
Definition qgis.h:3720
@ NoGeometry
Geometry is not required. It may still be returned if e.g. required for a filter condition.
Definition qgis.h:2329
@ RegeneratesPrimaryKey
Algorithm always drops any existing primary keys or FID values and regenerates them in outputs.
Definition qgis.h:3801
QFlags< ProcessingAlgorithmDocumentationFlag > ProcessingAlgorithmDocumentationFlags
Flags describing algorithm behavior for documentation purposes.
Definition qgis.h:3812
@ SkipGeometryValidityChecks
Invalid geometry checks should always be skipped. This flag can be useful for algorithms which always...
Definition qgis.h:3895
@ Double
Double/float values.
Definition qgis.h:3988
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
Fetch next feature and stores in f, returns true on success.
Wraps a request for features to a vector layer (or directly its vector data provider).
QgsFeatureRequest & setFlags(Qgis::FeatureRequestFlags flags)
Sets flags that affect how features will be fetched.
QgsFeatureRequest & setSubsetOfAttributes(const QgsAttributeList &attrs)
Set a subset of attributes that will be fetched.
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
@ RegeneratePrimaryKey
This flag indicates, that a primary key field cannot be guaranteed to be unique and the sink should i...
An interface for objects which provide features via a getFeatures method.
virtual QgsFields fields() const =0
Returns the fields associated with features in the source.
virtual QgsCoordinateReferenceSystem sourceCrs() const =0
Returns the coordinate reference system for features in the source.
virtual Qgis::WkbType wkbType() const =0
Returns the geometry type for features returned by this source.
virtual QgsFeatureIterator getFeatures(const QgsFeatureRequest &request=QgsFeatureRequest()) const =0
Returns an iterator for the features in the source.
virtual long long featureCount() const =0
Returns the number of features contained in the source, or -1 if the feature count is unknown.
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
Definition qgsfeature.h:60
QgsFeatureId id
Definition qgsfeature.h:68
Q_INVOKABLE QVariant attribute(const QString &name) const
Lookup attribute value by attribute name.
bool isCanceled() const
Tells whether the operation has been canceled already.
Definition qgsfeedback.h:56
void setProgress(double progress)
Sets the current progress for the feedback object.
Definition qgsfeedback.h:65
Q_INVOKABLE int lookupField(const QString &fieldName) const
Looks up field's index from the field name.
Contains information about the context in which a processing algorithm is executed.
Custom exception class for processing related exceptions.
Base class for providing feedback from a processing algorithm.
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
virtual void reportError(const QString &error, bool fatalError=false)
Reports that the algorithm encountered an error while executing.
A vector layer output for processing algorithms.
An enum based parameter for processing algorithms, allowing for selection from predefined values.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A vector layer or feature source field parameter for processing algorithms.
A numeric parameter for processing algorithms.
A vector layer (with or without geometry) parameter for processing algorithms.
QSet< QgsFeatureId > QgsFeatureIds