QGIS API Documentation 3.43.0-Master (e01d6d7c4c0)
qgsalgorithmrandomextract.cpp
Go to the documentation of this file.
1/***************************************************************************
2 qgsalgorithmrandomextract.cpp
3 ---------------------
4 begin : December 2019
5 copyright : (C) 2019 by Alexander Bruy
6 email : alexander dot bruy at gmail dot com
7 ***************************************************************************/
8
9/***************************************************************************
10 * *
11 * This program is free software; you can redistribute it and/or modify *
12 * it under the terms of the GNU General Public License as published by *
13 * the Free Software Foundation; either version 2 of the License, or *
14 * (at your option) any later version. *
15 * *
16 ***************************************************************************/
17
19#include <random>
20#include <functional>
21
23
24QString QgsRandomExtractAlgorithm::name() const
25{
26 return QStringLiteral( "randomextract" );
27}
28
29QString QgsRandomExtractAlgorithm::displayName() const
30{
31 return QObject::tr( "Random extract" );
32}
33
34QStringList QgsRandomExtractAlgorithm::tags() const
35{
36 return QObject::tr( "extract,filter,random,number,percentage" ).split( ',' );
37}
38
39QString QgsRandomExtractAlgorithm::group() const
40{
41 return QObject::tr( "Vector selection" );
42}
43
44QString QgsRandomExtractAlgorithm::groupId() const
45{
46 return QStringLiteral( "vectorselection" );
47}
48
49QString QgsRandomExtractAlgorithm::shortDescription() const
50{
51 return QObject::tr( "Generates a vector layer that contains only a random subset of the features in an input layer." );
52}
53
54QString QgsRandomExtractAlgorithm::shortHelpString() const
55{
56 return QObject::tr( "This algorithm takes a vector layer and generates a new one that contains only a subset "
57 "of the features in the input layer.\n\n"
58 "The subset is defined randomly, using a percentage or count value to define the total number "
59 "of features in the subset." );
60}
61
62Qgis::ProcessingAlgorithmDocumentationFlags QgsRandomExtractAlgorithm::documentationFlags() const
63{
65}
66
67QgsRandomExtractAlgorithm *QgsRandomExtractAlgorithm::createInstance() const
68{
69 return new QgsRandomExtractAlgorithm();
70}
71
72void QgsRandomExtractAlgorithm::initAlgorithm( const QVariantMap & )
73{
74 addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ), QObject::tr( "Input layer" ), QList<int>() << static_cast<int>( Qgis::ProcessingSourceType::Vector ) ) );
75 addParameter( new QgsProcessingParameterEnum( QStringLiteral( "METHOD" ), QObject::tr( "Method" ), QStringList() << QObject::tr( "Number of features" ) << QObject::tr( "Percentage of features" ), false, 0 ) );
76 addParameter( new QgsProcessingParameterNumber( QStringLiteral( "NUMBER" ), QObject::tr( "Number/percentage of features" ), Qgis::ProcessingNumberParameterType::Integer, 10, false, 0 ) );
77
78 addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Extracted (random)" ) ) );
79}
80
81QVariantMap QgsRandomExtractAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
82{
83 std::unique_ptr<QgsProcessingFeatureSource> source( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) );
84 if ( !source )
85 throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );
86
87 QString dest;
88 std::unique_ptr<QgsFeatureSink> sink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, dest, source->fields(), source->wkbType(), source->sourceCrs(), QgsFeatureSink::RegeneratePrimaryKey ) );
89 if ( !sink )
90 throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) );
91
92 const int method = parameterAsEnum( parameters, QStringLiteral( "METHOD" ), context );
93 int number = parameterAsInt( parameters, QStringLiteral( "NUMBER" ), context );
94 const long count = source->featureCount();
95
96 if ( method == 0 )
97 {
98 // number of features
99 if ( number > count )
100 throw QgsProcessingException( QObject::tr( "Selected number is greater than feature count. Choose a lower value and try again." ) );
101 }
102 else
103 {
104 // percentage of features
105 if ( number > 100 )
106 throw QgsProcessingException( QObject::tr( "Percentage can't be greater than 100. Choose a lower value and try again." ) );
107
108 number = static_cast<int>( std::ceil( number * count / 100 ) );
109 }
110
111 // Build a list of all feature ids
112 QgsFeatureIterator fit = source->getFeatures( QgsFeatureRequest()
114 .setNoAttributes() );
115 std::vector<QgsFeatureId> allFeats;
116 allFeats.reserve( count );
117 QgsFeature f;
118 feedback->pushInfo( QObject::tr( "Building list of all features..." ) );
119 while ( fit.nextFeature( f ) )
120 {
121 if ( feedback->isCanceled() )
122 return QVariantMap();
123 allFeats.push_back( f.id() );
124 }
125 feedback->pushInfo( QObject::tr( "Done." ) );
126
127 // initialize random engine
128 std::random_device randomDevice;
129 std::mt19937 mersenneTwister( randomDevice() );
130 std::uniform_int_distribution<size_t> fidsDistribution;
131
132 // If the number of features to select is greater than half the total number of features
133 // we will instead randomly select features to *exclude* from the output layer
134 size_t actualFeatureCount = allFeats.size();
135 size_t shuffledFeatureCount = number;
136 bool invertSelection = static_cast<size_t>( number ) > actualFeatureCount / 2;
137 if ( invertSelection )
138 shuffledFeatureCount = actualFeatureCount - number;
139
140 size_t nb = actualFeatureCount;
141
142 // Shuffle <number> features at the start of the iterator
143 feedback->pushInfo( QObject::tr( "Randomly select %1 features" ).arg( number ) );
144 auto cursor = allFeats.begin();
145 using difference_type = std::vector<QgsFeatureId>::difference_type;
146 while ( shuffledFeatureCount-- )
147 {
148 if ( feedback->isCanceled() )
149 return QVariantMap();
150
151 // Update the distribution to match the number of unshuffled features
152 fidsDistribution.param( std::uniform_int_distribution<size_t>::param_type( 0, nb - 1 ) );
153 // Swap the current feature with a random one
154 std::swap( *cursor, *( cursor + static_cast<difference_type>( fidsDistribution( mersenneTwister ) ) ) );
155 // Move the cursor to the next feature
156 ++cursor;
157
158 // Decrement the number of unshuffled features
159 --nb;
160 }
161
162 // Insert the selected features into a QgsFeatureIds set
163 QgsFeatureIds selected;
164 if ( invertSelection )
165 for ( auto it = cursor; it != allFeats.end(); ++it )
166 selected.insert( *it );
167 else
168 for ( auto it = allFeats.begin(); it != cursor; ++it )
169 selected.insert( *it );
170
171 feedback->pushInfo( QObject::tr( "Adding selected features" ) );
173 while ( fit.nextFeature( f ) )
174 {
175 if ( feedback->isCanceled() )
176 return QVariantMap();
177
178 if ( !sink->addFeature( f, QgsFeatureSink::FastInsert ) )
179 throw QgsProcessingException( writeFeatureError( sink.get(), parameters, QStringLiteral( "OUTPUT" ) ) );
180 }
181
182 sink->finalize();
183
184 QVariantMap outputs;
185 outputs.insert( QStringLiteral( "OUTPUT" ), dest );
186 return outputs;
187}
188
@ Vector
Tables (i.e. vector layers with or without geometry). When used for a sink this indicates the sink ha...
@ NoGeometry
Geometry is not required. It may still be returned if e.g. required for a filter condition.
@ RegeneratesPrimaryKey
Algorithm always drops any existing primary keys or FID values and regenerates them in outputs.
QFlags< ProcessingAlgorithmDocumentationFlag > ProcessingAlgorithmDocumentationFlags
Flags describing algorithm behavior for documentation purposes.
Definition qgis.h:3496
@ SkipGeometryValidityChecks
Invalid geometry checks should always be skipped. This flag can be useful for algorithms which always...
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
Fetch next feature and stores in f, returns true on success.
Wraps a request for features to a vector layer (or directly its vector data provider).
QgsFeatureRequest & setFlags(Qgis::FeatureRequestFlags flags)
Sets flags that affect how features will be fetched.
QgsFeatureRequest & setFilterFids(const QgsFeatureIds &fids)
Sets the feature IDs that should be fetched.
QgsFeatureRequest & setNoAttributes()
Set that no attributes will be fetched.
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
@ RegeneratePrimaryKey
This flag indicates, that a primary key field cannot be guaranteed to be unique and the sink should i...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
Definition qgsfeature.h:58
QgsFeatureId id
Definition qgsfeature.h:66
bool isCanceled() const
Tells whether the operation has been canceled already.
Definition qgsfeedback.h:53
Contains information about the context in which a processing algorithm is executed.
Custom exception class for processing related exceptions.
Base class for providing feedback from a processing algorithm.
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
An enum based parameter for processing algorithms, allowing for selection from predefined values.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A numeric parameter for processing algorithms.
QSet< QgsFeatureId > QgsFeatureIds