QGIS API Documentation 3.37.0-Master (fdefdf9c27f)
qgsalgorithmrandomextract.cpp
Go to the documentation of this file.
1/***************************************************************************
2 qgsalgorithmrandomextract.cpp
3 ---------------------
4 begin : December 2019
5 copyright : (C) 2019 by Alexander Bruy
6 email : alexander dot bruy at gmail dot com
7 ***************************************************************************/
8
9/***************************************************************************
10 * *
11 * This program is free software; you can redistribute it and/or modify *
12 * it under the terms of the GNU General Public License as published by *
13 * the Free Software Foundation; either version 2 of the License, or *
14 * (at your option) any later version. *
15 * *
16 ***************************************************************************/
17
19#include <random>
20#include <functional>
21
23
24QString QgsRandomExtractAlgorithm::name() const
25{
26 return QStringLiteral( "randomextract" );
27}
28
29QString QgsRandomExtractAlgorithm::displayName() const
30{
31 return QObject::tr( "Random extract" );
32}
33
34QStringList QgsRandomExtractAlgorithm::tags() const
35{
36 return QObject::tr( "extract,filter,random,number,percentage" ).split( ',' );
37}
38
39QString QgsRandomExtractAlgorithm::group() const
40{
41 return QObject::tr( "Vector selection" );
42}
43
44QString QgsRandomExtractAlgorithm::groupId() const
45{
46 return QStringLiteral( "vectorselection" );
47}
48
49QString QgsRandomExtractAlgorithm::shortHelpString() const
50{
51 return QObject::tr( "This algorithm takes a vector layer and generates a new one that contains only a subset "
52 "of the features in the input layer.\n\n"
53 "The subset is defined randomly, using a percentage or count value to define the total number "
54 "of features in the subset." );
55}
56
57QgsRandomExtractAlgorithm *QgsRandomExtractAlgorithm::createInstance() const
58{
59 return new QgsRandomExtractAlgorithm();
60}
61
62void QgsRandomExtractAlgorithm::initAlgorithm( const QVariantMap & )
63{
64 addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ), QObject::tr( "Input layer" ),
65 QList< int >() << static_cast< int >( Qgis::ProcessingSourceType::Vector ) ) );
66 addParameter( new QgsProcessingParameterEnum( QStringLiteral( "METHOD" ), QObject::tr( "Method" ), QStringList() << QObject::tr( "Number of features" ) << QObject::tr( "Percentage of features" ), false, 0 ) );
67 addParameter( new QgsProcessingParameterNumber( QStringLiteral( "NUMBER" ), QObject::tr( "Number/percentage of features" ),
69
70 addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Extracted (random)" ) ) );
71}
72
73QVariantMap QgsRandomExtractAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
74{
75 std::unique_ptr< QgsProcessingFeatureSource > source( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) );
76 if ( !source )
77 throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );
78
79 QString dest;
80 std::unique_ptr< QgsFeatureSink > sink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, dest, source->fields(),
81 source->wkbType(), source->sourceCrs(), QgsFeatureSink::RegeneratePrimaryKey ) );
82 if ( !sink )
83 throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) );
84
85 const int method = parameterAsEnum( parameters, QStringLiteral( "METHOD" ), context );
86 int number = parameterAsInt( parameters, QStringLiteral( "NUMBER" ), context );
87 const long count = source->featureCount();
88
89 if ( method == 0 )
90 {
91 // number of features
92 if ( number > count )
93 throw QgsProcessingException( QObject::tr( "Selected number is greater than feature count. Choose a lower value and try again." ) );
94 }
95 else
96 {
97 // percentage of features
98 if ( number > 100 )
99 throw QgsProcessingException( QObject::tr( "Percentage can't be greater than 100. Choose a lower value and try again." ) );
100
101 number = static_cast< int >( std::ceil( number * count / 100 ) );
102 }
103
104 // Build a list of all feature ids
105 QgsFeatureIterator fit = source->getFeatures( QgsFeatureRequest()
107 .setNoAttributes() );
108 std::vector< QgsFeatureId > allFeats;
109 allFeats.reserve( count );
110 QgsFeature f;
111 feedback->pushInfo( QObject::tr( "Building list of all features..." ) );
112 while ( fit.nextFeature( f ) )
113 {
114 if ( feedback->isCanceled() )
115 return QVariantMap();
116 allFeats.push_back( f.id() );
117 }
118 feedback->pushInfo( QObject::tr( "Done." ) );
119
120 // initialize random engine
121 std::random_device randomDevice;
122 std::mt19937 mersenneTwister( randomDevice() );
123 std::uniform_int_distribution<size_t> fidsDistribution;
124
125 // If the number of features to select is greater than half the total number of features
126 // we will instead randomly select features to *exclude* from the output layer
127 size_t actualFeatureCount = allFeats.size();
128 size_t shuffledFeatureCount = number;
129 bool invertSelection = static_cast< size_t>( number ) > actualFeatureCount / 2;
130 if ( invertSelection )
131 shuffledFeatureCount = actualFeatureCount - number;
132
133 size_t nb = actualFeatureCount;
134
135 // Shuffle <number> features at the start of the iterator
136 feedback->pushInfo( QObject::tr( "Randomly select %1 features" ).arg( number ) );
137 auto cursor = allFeats.begin();
138 using difference_type = std::vector<QgsFeatureId>::difference_type;
139 while ( shuffledFeatureCount-- )
140 {
141 if ( feedback->isCanceled() )
142 return QVariantMap();
143
144 // Update the distribution to match the number of unshuffled features
145 fidsDistribution.param( std::uniform_int_distribution<size_t>::param_type( 0, nb - 1 ) );
146 // Swap the current feature with a random one
147 std::swap( *cursor, *( cursor + static_cast<difference_type>( fidsDistribution( mersenneTwister ) ) ) );
148 // Move the cursor to the next feature
149 ++cursor;
150
151 // Decrement the number of unshuffled features
152 --nb;
153 }
154
155 // Insert the selected features into a QgsFeatureIds set
156 QgsFeatureIds selected;
157 if ( invertSelection )
158 for ( auto it = cursor; it != allFeats.end(); ++it )
159 selected.insert( *it );
160 else
161 for ( auto it = allFeats.begin(); it != cursor; ++it )
162 selected.insert( *it );
163
164 feedback->pushInfo( QObject::tr( "Adding selected features" ) );
165 fit = source->getFeatures( QgsFeatureRequest().setFilterFids( selected ), Qgis::ProcessingFeatureSourceFlag::SkipGeometryValidityChecks );
166 while ( fit.nextFeature( f ) )
167 {
168 if ( feedback->isCanceled() )
169 return QVariantMap();
170
171 if ( !sink->addFeature( f, QgsFeatureSink::FastInsert ) )
172 throw QgsProcessingException( writeFeatureError( sink.get(), parameters, QStringLiteral( "OUTPUT" ) ) );
173 }
174
175 QVariantMap outputs;
176 outputs.insert( QStringLiteral( "OUTPUT" ), dest );
177 return outputs;
178}
179
@ Vector
Tables (i.e. vector layers with or without geometry). When used for a sink this indicates the sink ha...
@ NoGeometry
Geometry is not required. It may still be returned if e.g. required for a filter condition.
@ SkipGeometryValidityChecks
Invalid geometry checks should always be skipped. This flag can be useful for algorithms which always...
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
Fetch next feature and stores in f, returns true on success.
This class wraps a request for features to a vector layer (or directly its vector data provider).
QgsFeatureRequest & setFlags(Qgis::FeatureRequestFlags flags)
Sets flags that affect how features will be fetched.
QgsFeatureRequest & setNoAttributes()
Set that no attributes will be fetched.
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
@ RegeneratePrimaryKey
This flag indicates, that a primary key field cannot be guaranteed to be unique and the sink should i...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
Definition: qgsfeature.h:56
Q_GADGET QgsFeatureId id
Definition: qgsfeature.h:64
bool isCanceled() const
Tells whether the operation has been canceled already.
Definition: qgsfeedback.h:53
Contains information about the context in which a processing algorithm is executed.
Custom exception class for processing related exceptions.
Definition: qgsexception.h:83
Base class for providing feedback from a processing algorithm.
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
An enum based parameter for processing algorithms, allowing for selection from predefined values.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A numeric parameter for processing algorithms.
QSet< QgsFeatureId > QgsFeatureIds
Definition: qgsfeatureid.h:37