20 #include <unordered_set>
24 QString QgsDbscanClusteringAlgorithm::name()
const
26 return QStringLiteral(
"dbscanclustering" );
29 QString QgsDbscanClusteringAlgorithm::displayName()
const
31 return QObject::tr(
"DBSCAN clustering" );
34 QString QgsDbscanClusteringAlgorithm::shortDescription()
const
36 return QObject::tr(
"Clusters point features using a density based scan algorithm." );
39 QStringList QgsDbscanClusteringAlgorithm::tags()
const
41 return QObject::tr(
"clustering,clusters,density,based,points" ).split(
',' );
44 QString QgsDbscanClusteringAlgorithm::group()
const
46 return QObject::tr(
"Vector analysis" );
49 QString QgsDbscanClusteringAlgorithm::groupId()
const
51 return QStringLiteral(
"vectoranalysis" );
54 void QgsDbscanClusteringAlgorithm::initAlgorithm(
const QVariantMap & )
61 QObject::tr(
"Maximum distance between clustered points" ), 1, QStringLiteral(
"INPUT" ),
false, 0 ) );
63 auto dbscanStarParam = qgis::make_unique<QgsProcessingParameterBoolean>( QStringLiteral(
"DBSCAN*" ),
64 QObject::tr(
"Treat border points as noise (DBSCAN*)" ),
false,
true );
66 addParameter( dbscanStarParam.release() );
68 auto fieldNameParam = qgis::make_unique<QgsProcessingParameterString>( QStringLiteral(
"FIELD_NAME" ),
69 QObject::tr(
"Cluster field name" ), QStringLiteral(
"CLUSTER_ID" ) );
71 addParameter( fieldNameParam.release() );
78 QString QgsDbscanClusteringAlgorithm::shortHelpString()
const
80 return QObject::tr(
"Clusters point features based on a 2D implementation of Density-based spatial clustering of applications with noise (DBSCAN) algorithm.\n\n"
81 "The algorithm requires two parameters, a minimum cluster size (“minPts”), and the maximum distance allowed between clustered points (“eps”)." );
84 QgsDbscanClusteringAlgorithm *QgsDbscanClusteringAlgorithm::createInstance()
const
86 return new QgsDbscanClusteringAlgorithm();
89 struct KDBushDataEqualById
97 struct KDBushDataHashById
101 return std::hash< QgsFeatureId > {}( a.
id );
107 std::unique_ptr< QgsProcessingFeatureSource > source( parameterAsSource( parameters, QStringLiteral(
"INPUT" ), context ) );
111 const std::size_t minSize =
static_cast< std::size_t
>( parameterAsInt( parameters, QStringLiteral(
"MIN_SIZE" ), context ) );
112 const double eps = parameterAsDouble( parameters, QStringLiteral(
"EPS" ), context );
113 const bool borderPointsAreNoise = parameterAsBoolean( parameters, QStringLiteral(
"DBSCAN*" ), context );
115 QgsFields outputFields = source->fields();
116 const QString clusterFieldName = parameterAsString( parameters, QStringLiteral(
"FIELD_NAME" ), context );
122 std::unique_ptr< QgsFeatureSink > sink( parameterAsSink( parameters, QStringLiteral(
"OUTPUT" ), context, dest, outputFields, source->wkbType(), source->sourceCrs() ) );
127 feedback->
pushInfo( QObject::tr(
"Building spatial index" ) );
130 return QVariantMap();
133 feedback->
pushInfo( QObject::tr(
"Analysing clusters" ) );
134 std::unordered_map< QgsFeatureId, int> idToCluster;
135 idToCluster.reserve( index.size() );
137 const long featureCount = source->featureCount();
139 int clusterCount = 0;
140 dbscan( minSize, eps, borderPointsAreNoise, featureCount, features, index, idToCluster, clusterCount, feedback );
143 const double writeStep = featureCount > 0 ? 10.0 / featureCount : 1;
144 features = source->getFeatures();
157 auto cluster = idToCluster.find( feat.
id() );
158 if ( cluster != idToCluster.end() )
160 attr << cluster->second;
171 outputs.insert( QStringLiteral(
"OUTPUT" ), dest );
172 outputs.insert( QStringLiteral(
"NUM_CLUSTERS" ), clusterCount );
177 void QgsDbscanClusteringAlgorithm::dbscan(
const std::size_t minSize,
179 const bool borderPointsAreNoise,
180 const long featureCount,
183 std::unordered_map< QgsFeatureId, int> &idToCluster,
187 const double step = featureCount > 0 ? 90.0 / featureCount : 1;
189 std::unordered_set< QgsFeatureId > visited;
190 visited.reserve( index.
size() );
209 if ( visited.find( feat.
id() ) != visited.end() )
226 std::unordered_set< QgsSpatialIndexKDBushData, KDBushDataHashById, KDBushDataEqualById> within;
232 within.insert( data );
234 if ( within.size() < minSize )
237 visited.insert( feat.
id() );
247 idToCluster[ feat.
id() ] = clusterCount;
250 while ( !within.empty() )
258 within.erase( within.begin() );
260 if ( visited.find( j.
id ) != visited.end() )
266 visited.insert( j.
id );
272 std::unordered_set< QgsSpatialIndexKDBushData, KDBushDataHashById, KDBushDataEqualById > within2;
275 within2.insert( data );
277 if ( within2.size() >= minSize )
280 std::copy_if( within2.begin(),
282 std::inserter( within, within.end() ),
285 return visited.find( needle.id ) == visited.end();
288 if ( !borderPointsAreNoise || within2.size() >= minSize )
290 idToCluster[ j.
id ] = clusterCount;