QGIS API Documentation 4.1.0-Master (376402f9aeb)
Loading...
Searching...
No Matches
qgsarrowiterator.h
Go to the documentation of this file.
1/***************************************************************************
2 qgsarrowiterator.h
3 ---------------------
4 begin : November 2025
5 copyright : (C) 2025 by Dewey Dunnington
6 email : dewey at dunnington dot ca
7 ***************************************************************************
8 * *
9 * This program is free software; you can redistribute it and/or modify *
10 * it under the terms of the GNU General Public License as published by *
11 * the Free Software Foundation; either version 2 of the License, or *
12 * (at your option) any later version. *
13 * *
14 ***************************************************************************/
15
16#ifndef QGSARROWITERATOR_H
17#define QGSARROWITERATOR_H
18
19#include "qgis_core.h"
20#include "qgis_sip.h"
21#include "qgsfeatureiterator.h"
22#include "qgsvectorlayer.h"
23
24#ifndef ARROW_C_DATA_INTERFACE
25#define ARROW_C_DATA_INTERFACE
26
27#define ARROW_FLAG_DICTIONARY_ORDERED 1
28#define ARROW_FLAG_NULLABLE 2
29#define ARROW_FLAG_MAP_KEYS_SORTED 4
30
31#ifndef SIP_RUN
33struct ArrowSchema
34{
35 // Array type description
36 const char *format;
37 const char *name;
38 const char *metadata;
39 int64_t flags;
40 int64_t n_children;
41 struct ArrowSchema **children;
42 struct ArrowSchema *dictionary;
43
44 // Release callback
45 void ( *release )( struct ArrowSchema * );
46 // Opaque producer-specific data
47 void *private_data;
48};
50#endif
51
52#ifndef SIP_RUN
54struct ArrowArray
55{
56 // Array data description
57 int64_t length;
58 int64_t null_count;
59 int64_t offset;
60 int64_t n_buffers;
61 int64_t n_children;
62 const void **buffers;
63 struct ArrowArray **children;
64 struct ArrowArray *dictionary;
65
66 // Release callback
67 void ( *release )( struct ArrowArray * );
68 // Opaque producer-specific data
69 void *private_data;
70};
72#endif
73
74#endif // ARROW_C_DATA_INTERFACE
75
76#ifndef ARROW_C_STREAM_INTERFACE
77#define ARROW_C_STREAM_INTERFACE
78
79#ifndef SIP_RUN
81struct ArrowArrayStream
82{
83 // Callbacks providing stream functionality
84 int ( *get_schema )( struct ArrowArrayStream *, struct ArrowSchema *out );
85 int ( *get_next )( struct ArrowArrayStream *, struct ArrowArray *out );
86 const char *( *get_last_error )( struct ArrowArrayStream * );
87
88 // Release callback
89 void ( *release )( struct ArrowArrayStream * );
90
91 // Opaque producer-specific data
92 void *private_data;
93};
95#endif
96
97#endif // ARROW_C_STREAM_INTERFACE
98
105{
106 public:
109
116 void setGeometryColumnName( const QString &geometryColumnName );
117
124 QString geometryColumnName() const;
125
126 private:
127 QString mGeometryColumnName;
128};
129
144class CORE_EXPORT QgsArrowSchema
145{
146 public:
149
151 QgsArrowSchema( const QgsArrowSchema &other );
152
154 QgsArrowSchema &operator=( const QgsArrowSchema &other );
155
157
158#ifndef SIP_RUN
160 struct ArrowSchema *schema();
161
163 const struct ArrowSchema *schema() const;
164#endif
165
171 unsigned long long cSchemaAddress() const;
172
178 void exportToAddress( unsigned long long otherAddress );
179
181 bool isValid() const;
182
188 int geometryColumnIndex() const;
189
192
193 // clang-format off
194#ifdef SIP_RUN
205 SIP_PYOBJECT __arrow_c_schema__();
206 % MethodCode
207 struct ArrowSchema *exportedSchema = static_cast<struct ArrowSchema *>( malloc( sizeof( struct ArrowSchema ) ) );
208 if ( !exportedSchema )
209 {
210 PyErr_SetString( PyExc_MemoryError, "Failed to allocate ArrowSchema" );
211 sipIsErr = 1;
212 }
213 else
214 {
215 memcpy( exportedSchema, sipCpp->schema(), sizeof( struct ArrowSchema ) );
216 sipCpp->schema()->release = nullptr;
217 sipRes = PyCapsule_New( exportedSchema, "arrow_schema", []( PyObject *capsule )
218 {
219 struct ArrowSchema *schema = static_cast<struct ArrowSchema *>( PyCapsule_GetPointer( capsule, "arrow_schema" ) );
220 if ( schema && schema->release )
221 {
222 schema->release( schema );
223 }
224 free( schema );
225 } );
226 if ( !sipRes )
227 {
228 if ( exportedSchema->release )
229 {
230 exportedSchema->release( exportedSchema );
231 }
232 free( exportedSchema );
233 sipIsErr = 1;
234 }
235 }
236 % End
237
247 static SIP_PYOBJECT fromArrow( SIP_PYOBJECT obj ) SIP_TYPEHINT( QgsArrowSchema );
248 % MethodCode
249 if ( PyCapsule_CheckExact( a0 ) && PyCapsule_IsValid( a0, "arrow_schema" ) ) {
250 struct ArrowSchema *capsuleSchema = static_cast<struct ArrowSchema *>( PyCapsule_GetPointer( a0, "arrow_schema" ) );
251 QgsArrowSchema *newSchema = new QgsArrowSchema();
252 memcpy(newSchema->schema(), capsuleSchema, sizeof(struct ArrowSchema));
253 capsuleSchema->release = nullptr;
254 sipRes = sipConvertFromNewType( newSchema, sipType_QgsArrowSchema, nullptr );
255 }
256 else if ( PyObject_HasAttrString( a0, "__arrow_c_schema__" ) )
257 {
258 PyObject *method = PyObject_GetAttrString( a0, "__arrow_c_schema__" );
259 if ( method )
260 {
261 PyObject *capsule = PyObject_CallObject( method, nullptr );
262 Py_DECREF( method );
263 if ( capsule )
264 {
265 if ( PyCapsule_CheckExact( capsule ) && PyCapsule_IsValid( capsule, "arrow_schema" ) )
266 {
267 struct ArrowSchema *capsuleSchema = static_cast<struct ArrowSchema *>( PyCapsule_GetPointer( capsule, "arrow_schema" ) );
268 QgsArrowSchema *newSchema = new QgsArrowSchema();
269 memcpy(newSchema->schema(), capsuleSchema, sizeof(struct ArrowSchema));
270 capsuleSchema->release = nullptr;
271 sipRes = sipConvertFromNewType( newSchema, sipType_QgsArrowSchema, nullptr );
272 }
273 else
274 {
275 PyErr_SetString( PyExc_TypeError, "__arrow_c_schema__() did not return a valid arrow_schema PyCapsule" );
276 sipIsErr = 1;
277 }
278 Py_DECREF( capsule );
279 }
280 else
281 {
282 sipIsErr = 1; // Exception already set by PyObject_CallObject
283 }
284 }
285 else
286 {
287 sipIsErr = 1; // Exception already set
288 }
289 }
290 else
291 {
292 PyErr_Format( PyExc_TypeError, "Expected an object implementing __arrow_c_schema__(), got %s", Py_TYPE( a0 )->tp_name );
293 sipIsErr = 1;
294 }
295 % End
296#endif
297
298 private:
299 struct ArrowSchema mSchema {};
300 int mGeometryColumnIndex = -1;
301
302 // clang-format on
303};
304
316class CORE_EXPORT QgsArrowArray
317{
318 public:
320 QgsArrowArray() = default;
321
323 QgsArrowArray( const QgsArrowArray &other ) = delete;
324
325#ifndef SIP_RUN
327 QgsArrowArray( QgsArrowArray &&other );
328
331#endif
332
334
335#ifndef SIP_RUN
337 struct ArrowArray *array();
338
340 const struct ArrowArray *array() const;
341#endif
342
348 unsigned long long cArrayAddress() const;
349
355 void exportToAddress( unsigned long long otherAddress );
356
358 bool isValid() const;
359
360 private:
361 struct ArrowArray mArray {};
362
363#ifdef SIP_RUN
364 QgsArrowArray( const QgsArrowArray &other );
365#endif
366};
367
379class CORE_EXPORT QgsArrowArrayStream
380{
381 public:
384
386 QgsArrowArrayStream( const QgsArrowArrayStream &other ) = delete;
387
388#ifndef SIP_RUN
391
394#endif
395
397
398#ifndef SIP_RUN
400 struct ArrowArrayStream *arrayStream();
401#endif
402
408 unsigned long long cArrayStreamAddress() const;
409
415 void exportToAddress( unsigned long long otherAddress );
416
418 bool isValid() const;
419
420 // clang-format off
421#ifdef SIP_RUN
433 SIP_PYOBJECT __arrow_c_stream__( SIP_PYOBJECT requested_schema = Py_None );
434 % MethodCode
435 Q_UNUSED( a0 ); // requested_schema is not used but required by the protocol signature
436 struct ArrowArrayStream *exportedStream = static_cast<struct ArrowArrayStream *>( malloc( sizeof( struct ArrowArrayStream ) ) );
437 if ( !exportedStream )
438 {
439 PyErr_SetString( PyExc_MemoryError, "Failed to allocate ArrowArrayStream" );
440 sipIsErr = 1;
441 }
442 else
443 {
444 memcpy( exportedStream, sipCpp->arrayStream(), sizeof( struct ArrowArrayStream ) );
445 sipCpp->arrayStream()->release = nullptr;
446 sipRes = PyCapsule_New( exportedStream, "arrow_array_stream", []( PyObject *capsule )
447 {
448 struct ArrowArrayStream *stream = static_cast<struct ArrowArrayStream *>( PyCapsule_GetPointer( capsule, "arrow_array_stream" ) );
449 if ( stream && stream->release )
450 {
451 stream->release( stream );
452 }
453 free( stream );
454 } );
455 if ( !sipRes )
456 {
457 if ( exportedStream->release )
458 {
459 exportedStream->release( exportedStream );
460 }
461 free( exportedStream );
462 sipIsErr = 1;
463 }
464 }
465 % End
466
476 static SIP_PYOBJECT fromArrow( SIP_PYOBJECT obj ) SIP_TYPEHINT( QgsArrowArrayStream );
477 % MethodCode
478 if ( PyCapsule_CheckExact( a0 ) && PyCapsule_IsValid( a0, "arrow_array_stream" ) )
479 {
480 struct ArrowArrayStream *capsuleStream = static_cast<struct ArrowArrayStream *>( PyCapsule_GetPointer( a0, "arrow_array_stream" ) );
481 QgsArrowArrayStream *newStream = new QgsArrowArrayStream();
482 memcpy(newStream->arrayStream(), capsuleStream, sizeof(struct ArrowArrayStream));
483 capsuleStream->release = nullptr;
484 sipRes = sipConvertFromNewType( newStream, sipType_QgsArrowArrayStream, nullptr );
485 }
486 else if ( PyObject_HasAttrString( a0, "__arrow_c_stream__" ) )
487 {
488 PyObject *method = PyObject_GetAttrString( a0, "__arrow_c_stream__" );
489 if ( method )
490 {
491 PyObject *capsule = PyObject_CallObject( method, nullptr );
492 Py_DECREF( method );
493 if ( capsule )
494 {
495 if ( PyCapsule_CheckExact( capsule ) && PyCapsule_IsValid( capsule, "arrow_array_stream" ) )
496 {
497 struct ArrowArrayStream *capsuleStream = static_cast<struct ArrowArrayStream *>( PyCapsule_GetPointer( capsule, "arrow_array_stream" ) );
498 QgsArrowArrayStream *newStream = new QgsArrowArrayStream();
499 memcpy(newStream->arrayStream(), capsuleStream, sizeof(struct ArrowArrayStream));
500 capsuleStream->release = nullptr;
501 sipRes = sipConvertFromNewType( newStream, sipType_QgsArrowArrayStream, nullptr );
502 }
503 else
504 {
505 PyErr_SetString( PyExc_TypeError, "__arrow_c_stream__() did not return a valid arrow_array_stream PyCapsule" );
506 sipIsErr = 1;
507 }
508 Py_DECREF( capsule );
509 }
510 else
511 {
512 sipIsErr = 1; // Exception already set by PyObject_CallObject
513 }
514 }
515 else
516 {
517 sipIsErr = 1; // Exception already set
518 }
519 }
520 else
521 {
522 PyErr_Format( PyExc_TypeError, "Expected an object implementing __arrow_c_stream__(), got %s", Py_TYPE( a0 )->tp_name );
523 sipIsErr = 1;
524 }
525 % End
526#endif
527
528 private:
529 struct ArrowArrayStream mArrayStream {};
530
531#ifdef SIP_RUN
532 QgsArrowArrayStream( const QgsArrowArrayStream &other );
533#endif
534
535 // clang-format on
536};
537
543class CORE_EXPORT QgsArrowIterator
544{
545 public:
547 QgsArrowIterator() = default;
548
550 explicit QgsArrowIterator( QgsFeatureIterator featureIterator );
551
552#ifndef SIP_RUN
554 struct ArrowSchema *schema();
555#endif
556
562 void setSchema( const QgsArrowSchema &schema );
563
565 QgsArrowArrayStream toArrayStream( int batchSize = 65536 ) const;
566
576
583
590 const QgsFields &fields, bool hasGeometry = false, const QgsCoordinateReferenceSystem &crs = QgsCoordinateReferenceSystem(), const QgsArrowInferSchemaOptions &options = QgsArrowInferSchemaOptions()
592
593 private:
594 QgsFeatureIterator mFeatureIterator;
595 QgsArrowSchema mSchema;
596};
597
598#endif // QGSARROWITERATOR_H
Wrapper around an ArrowArrayStream.
QgsArrowArrayStream & operator=(QgsArrowArrayStream &other)=delete
struct ArrowArrayStream * arrayStream()
Access the underlying ArrowArray from C++.
unsigned long long cArrayStreamAddress() const
Returns the address of the underlying ArrowArrayStream for import or export across boundaries.
QgsArrowArrayStream()=default
Construct invalid array stream holder.
void exportToAddress(unsigned long long otherAddress)
Export this array to the address of an empty ArrowArrayStream for export across boundaries.
bool isValid() const
Returns true if this wrapper object holds a valid ArrowArray.
QgsArrowArrayStream(const QgsArrowArrayStream &other)=delete
Wrapper around an ArrowArray.
QgsArrowArray()=default
Construct invalid array holder.
struct ArrowArray * array()
Access the underlying ArrowArray from C++.
QgsArrowArray & operator=(QgsArrowArray &other)=delete
bool isValid() const
Returns true if this wrapper object holds a valid ArrowArray.
QgsArrowArray(const QgsArrowArray &other)=delete
void exportToAddress(unsigned long long otherAddress)
Export this array to the address of an empty ArrowArray for export across boundaries.
unsigned long long cArrayAddress() const
Returns the address of the underlying ArrowArray for import or export across boundaries.
Options for inferring an ArrowSchema from a feature source.
void setGeometryColumnName(const QString &geometryColumnName)
Set the name that should be used to refer to the geometry column.
QgsArrowInferSchemaOptions()
Construct default options.
QString geometryColumnName() const
The name that should be used for a layer's geometry column.
static QgsArrowSchema inferSchema(const QgsVectorLayer &layer, const QgsArrowInferSchemaOptions &options=QgsArrowInferSchemaOptions())
Infer the QgsArrowSchema for a given QgsVectorLayer.
struct ArrowSchema * schema()
Access the output ArrowSchema from C++.
QgsArrowArrayStream toArrayStream(int batchSize=65536) const
Export this iterator as an ArrowArrayStream.
QgsArrowArray nextFeatures(int n)
Build an ArrowArray using the next n features (or fewer depending on the number of features remaining...
QgsArrowIterator()=default
Construct invalid iterator.
void setSchema(const QgsArrowSchema &schema)
Set the ArrowSchema for the output of all future batches.
Wrapper around an ArrowSchema.
int geometryColumnIndex() const
Returns the index of the column in this schema that should be populated with a feature geometry.
bool isValid() const
Returns true if this wrapper object holds a valid ArrowSchema.
unsigned long long cSchemaAddress() const
Returns the address of the underlying ArrowSchema for import or export across boundaries.
struct ArrowSchema * schema()
Access the underlying ArrowSchema from C++.
QgsArrowSchema & operator=(const QgsArrowSchema &other)
Assignment operator.
void exportToAddress(unsigned long long otherAddress)
Export this array to the address of an empty ArrowSchema for export across boundaries.
QgsArrowSchema()
Construct invalid schema holder.
void setGeometryColumnIndex(int geometryColumnIndex)
Set the index of the column in this schema that should be populated with a feature geometry.
Represents a coordinate reference system (CRS).
Defines a QGIS exception class.
Wrapper for iterator of features from vector data provider or vector layer.
Container of fields for a vector layer.
Definition qgsfields.h:46
Represents a vector layer which manages a vector based dataset.
#define SIP_TYPEHINT(type)
Definition qgis_sip.h:239
#define SIP_THROW(name,...)
Definition qgis_sip.h:210