QGIS API Documentation 3.99.0-Master (2fe06baccd8)
Loading...
Searching...
No Matches
qgsalgorithmrandomextract.cpp
Go to the documentation of this file.
1/***************************************************************************
2 qgsalgorithmrandomextract.cpp
3 ---------------------
4 begin : December 2019
5 copyright : (C) 2019 by Alexander Bruy
6 email : alexander dot bruy at gmail dot com
7 ***************************************************************************/
8
9/***************************************************************************
10 * *
11 * This program is free software; you can redistribute it and/or modify *
12 * it under the terms of the GNU General Public License as published by *
13 * the Free Software Foundation; either version 2 of the License, or *
14 * (at your option) any later version. *
15 * *
16 ***************************************************************************/
17
19
20#include <functional>
21#include <random>
22
24
25QString QgsRandomExtractAlgorithm::name() const
26{
27 return QStringLiteral( "randomextract" );
28}
29
30QString QgsRandomExtractAlgorithm::displayName() const
31{
32 return QObject::tr( "Random extract" );
33}
34
35QStringList QgsRandomExtractAlgorithm::tags() const
36{
37 return QObject::tr( "extract,filter,random,number,percentage" ).split( ',' );
38}
39
40QString QgsRandomExtractAlgorithm::group() const
41{
42 return QObject::tr( "Vector selection" );
43}
44
45QString QgsRandomExtractAlgorithm::groupId() const
46{
47 return QStringLiteral( "vectorselection" );
48}
49
50QString QgsRandomExtractAlgorithm::shortDescription() const
51{
52 return QObject::tr( "Generates a vector layer that contains only a random subset of the features in an input layer." );
53}
54
55QString QgsRandomExtractAlgorithm::shortHelpString() const
56{
57 return QObject::tr( "This algorithm takes a vector layer and generates a new one that contains only a subset "
58 "of the features in the input layer.\n\n"
59 "The subset is defined randomly, using a percentage or count value to define the total number "
60 "of features in the subset." );
61}
62
63Qgis::ProcessingAlgorithmDocumentationFlags QgsRandomExtractAlgorithm::documentationFlags() const
64{
66}
67
68QgsRandomExtractAlgorithm *QgsRandomExtractAlgorithm::createInstance() const
69{
70 return new QgsRandomExtractAlgorithm();
71}
72
73void QgsRandomExtractAlgorithm::initAlgorithm( const QVariantMap & )
74{
75 addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ), QObject::tr( "Input layer" ), QList<int>() << static_cast<int>( Qgis::ProcessingSourceType::Vector ) ) );
76 addParameter( new QgsProcessingParameterEnum( QStringLiteral( "METHOD" ), QObject::tr( "Method" ), QStringList() << QObject::tr( "Number of features" ) << QObject::tr( "Percentage of features" ), false, 0 ) );
77 addParameter( new QgsProcessingParameterNumber( QStringLiteral( "NUMBER" ), QObject::tr( "Number/percentage of features" ), Qgis::ProcessingNumberParameterType::Integer, 10, false, 0 ) );
78
79 addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Extracted (random)" ) ) );
80}
81
82QVariantMap QgsRandomExtractAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
83{
84 std::unique_ptr<QgsProcessingFeatureSource> source( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) );
85 if ( !source )
86 throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );
87
88 QString dest;
89 std::unique_ptr<QgsFeatureSink> sink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, dest, source->fields(), source->wkbType(), source->sourceCrs(), QgsFeatureSink::RegeneratePrimaryKey ) );
90 if ( !sink )
91 throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) );
92
93 const int method = parameterAsEnum( parameters, QStringLiteral( "METHOD" ), context );
94 int number = parameterAsInt( parameters, QStringLiteral( "NUMBER" ), context );
95 const long count = source->featureCount();
96
97 if ( method == 0 )
98 {
99 // number of features
100 if ( number > count )
101 throw QgsProcessingException( QObject::tr( "Selected number is greater than feature count. Choose a lower value and try again." ) );
102 }
103 else
104 {
105 // percentage of features
106 if ( number > 100 )
107 throw QgsProcessingException( QObject::tr( "Percentage can't be greater than 100. Choose a lower value and try again." ) );
108
109 number = static_cast<int>( std::ceil( number * count / 100 ) );
110 }
111
112 // Build a list of all feature ids
113 QgsFeatureIterator fit = source->getFeatures( QgsFeatureRequest()
115 .setNoAttributes() );
116 std::vector<QgsFeatureId> allFeats;
117 allFeats.reserve( count );
118 QgsFeature f;
119 feedback->pushInfo( QObject::tr( "Building list of all features..." ) );
120 while ( fit.nextFeature( f ) )
121 {
122 if ( feedback->isCanceled() )
123 return QVariantMap();
124 allFeats.push_back( f.id() );
125 }
126 feedback->pushInfo( QObject::tr( "Done." ) );
127
128 // initialize random engine
129 std::random_device randomDevice;
130 std::mt19937 mersenneTwister( randomDevice() );
131 std::uniform_int_distribution<size_t> fidsDistribution;
132
133 // If the number of features to select is greater than half the total number of features
134 // we will instead randomly select features to *exclude* from the output layer
135 size_t actualFeatureCount = allFeats.size();
136 size_t shuffledFeatureCount = number;
137 bool invertSelection = static_cast<size_t>( number ) > actualFeatureCount / 2;
138 if ( invertSelection )
139 shuffledFeatureCount = actualFeatureCount - number;
140
141 size_t nb = actualFeatureCount;
142
143 // Shuffle <number> features at the start of the iterator
144 feedback->pushInfo( QObject::tr( "Randomly select %1 features" ).arg( number ) );
145 auto cursor = allFeats.begin();
146 using difference_type = std::vector<QgsFeatureId>::difference_type;
147 while ( shuffledFeatureCount-- )
148 {
149 if ( feedback->isCanceled() )
150 return QVariantMap();
151
152 // Update the distribution to match the number of unshuffled features
153 fidsDistribution.param( std::uniform_int_distribution<size_t>::param_type( 0, nb - 1 ) );
154 // Swap the current feature with a random one
155 std::swap( *cursor, *( cursor + static_cast<difference_type>( fidsDistribution( mersenneTwister ) ) ) );
156 // Move the cursor to the next feature
157 ++cursor;
158
159 // Decrement the number of unshuffled features
160 --nb;
161 }
162
163 // Insert the selected features into a QgsFeatureIds set
164 QgsFeatureIds selected;
165 if ( invertSelection )
166 for ( auto it = cursor; it != allFeats.end(); ++it )
167 selected.insert( *it );
168 else
169 for ( auto it = allFeats.begin(); it != cursor; ++it )
170 selected.insert( *it );
171
172 feedback->pushInfo( QObject::tr( "Adding selected features" ) );
173 fit = source->getFeatures( QgsFeatureRequest().setFilterFids( selected ), Qgis::ProcessingFeatureSourceFlag::SkipGeometryValidityChecks );
174 while ( fit.nextFeature( f ) )
175 {
176 if ( feedback->isCanceled() )
177 return QVariantMap();
178
179 if ( !sink->addFeature( f, QgsFeatureSink::FastInsert ) )
180 throw QgsProcessingException( writeFeatureError( sink.get(), parameters, QStringLiteral( "OUTPUT" ) ) );
181 }
182
183 sink->finalize();
184
185 QVariantMap outputs;
186 outputs.insert( QStringLiteral( "OUTPUT" ), dest );
187 return outputs;
188}
189
@ Vector
Tables (i.e. vector layers with or without geometry). When used for a sink this indicates the sink ha...
Definition qgis.h:3539
@ NoGeometry
Geometry is not required. It may still be returned if e.g. required for a filter condition.
Definition qgis.h:2196
@ RegeneratesPrimaryKey
Algorithm always drops any existing primary keys or FID values and regenerates them in outputs.
Definition qgis.h:3619
QFlags< ProcessingAlgorithmDocumentationFlag > ProcessingAlgorithmDocumentationFlags
Flags describing algorithm behavior for documentation purposes.
Definition qgis.h:3630
@ SkipGeometryValidityChecks
Invalid geometry checks should always be skipped. This flag can be useful for algorithms which always...
Definition qgis.h:3711
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
Fetch next feature and stores in f, returns true on success.
Wraps a request for features to a vector layer (or directly its vector data provider).
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
@ RegeneratePrimaryKey
This flag indicates, that a primary key field cannot be guaranteed to be unique and the sink should i...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
Definition qgsfeature.h:58
QgsFeatureId id
Definition qgsfeature.h:66
bool isCanceled() const
Tells whether the operation has been canceled already.
Definition qgsfeedback.h:53
Contains information about the context in which a processing algorithm is executed.
Custom exception class for processing related exceptions.
Base class for providing feedback from a processing algorithm.
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
An enum based parameter for processing algorithms, allowing for selection from predefined values.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A numeric parameter for processing algorithms.
QSet< QgsFeatureId > QgsFeatureIds