QGIS API Documentation 3.99.0-Master (d270888f95f)
Loading...
Searching...
No Matches
qgsarrowiterator.cpp
Go to the documentation of this file.
1/***************************************************************************
2 qgsarrowiterator.cpp
3 ---------------------
4 begin : November 2025
5 copyright : (C) 2025 by Dewey Dunnington
6 email : dewey at dunnington dot ca
7 ***************************************************************************
8 * *
9 * This program is free software; you can redistribute it and/or modify *
10 * it under the terms of the GNU General Public License as published by *
11 * the Free Software Foundation; either version 2 of the License, or *
12 * (at your option) any later version. *
13 * *
14 ***************************************************************************/
15
16#include "qgsarrowiterator.h"
17
18#include <nlohmann/json.hpp>
19
20#include "nanoarrow/nanoarrow.hpp"
21#include "qgsfeatureiterator.h"
22#include "qgsvectorlayer.h"
23
24#include <QString>
25
26using namespace Qt::StringLiterals;
27
28#define QGIS_NANOARROW_THROW_NOT_OK_ERR( expr, err ) \
29 do \
30 { \
31 const int ec = ( expr ); \
32 if ( ec != NANOARROW_OK ) \
33 { \
34 throw QgsException( u"nanoarrow error (%1): %2"_s.arg( ec ).arg( QString::fromUtf8( ( err )->message ) ) ); \
35 } \
36 } while ( 0 )
37
38#define QGIS_NANOARROW_THROW_NOT_OK( expr ) \
39 do \
40 { \
41 const int ec = ( expr ); \
42 if ( ec != NANOARROW_OK ) \
43 { \
44 throw QgsException( u"nanoarrow error (%1)"_s.arg( ec ) ); \
45 } \
46 } while ( 0 )
47
48
51
53{
54 mGeometryColumnName = geometryColumnName;
55}
56
58{
59 return mGeometryColumnName;
60}
61
64
66{
67 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaDeepCopy( &other.mSchema, &mSchema ) );
68 mGeometryColumnIndex = other.mGeometryColumnIndex;
69}
70
72{
73 if ( mSchema.release )
74 {
75 ArrowSchemaRelease( &mSchema );
76 }
77 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaDeepCopy( &other.mSchema, &mSchema ) );
78 mGeometryColumnIndex = other.mGeometryColumnIndex;
79 return *this;
80}
81
83{
84 if ( mSchema.release )
85 {
86 ArrowSchemaRelease( &mSchema );
87 }
88}
89
90struct ArrowSchema *QgsArrowSchema::schema()
91{
92 return &mSchema;
93}
94
95const struct ArrowSchema *QgsArrowSchema::schema() const
96{
97 return &mSchema;
98}
99
100unsigned long long QgsArrowSchema::cSchemaAddress() const
101{
102 // In the event QGIS is built on platform where unsigned long long is insufficient to
103 // represent a uintptr_t, ensure compilation fails
104 static_assert( sizeof( unsigned long long ) >= sizeof( uintptr_t ) );
105
106 return reinterpret_cast<unsigned long long>( &mSchema );
107}
108
109void QgsArrowSchema::exportToAddress( unsigned long long otherAddress )
110{
111 static_assert( sizeof( unsigned long long ) >= sizeof( uintptr_t ) );
112
113 struct ArrowSchema *otherArrowSchema = reinterpret_cast<struct ArrowSchema *>( otherAddress );
114 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaDeepCopy( &mSchema, otherArrowSchema ) );
115}
116
118{
119 return mSchema.release;
120}
121
122int QgsArrowSchema::geometryColumnIndex() const { return mGeometryColumnIndex; }
123
125
127{
128 if ( mArray.release )
129 {
130 ArrowArrayRelease( &mArray );
131 }
132
133 ArrowArrayMove( other.array(), &mArray );
134}
135
137{
138 if ( this != &other )
139 {
140 ArrowArrayMove( other.array(), &mArray );
141 }
142
143 return *this;
144}
145
147{
148 if ( mArray.release )
149 {
150 ArrowArrayRelease( &mArray );
151 }
152}
153
154struct ArrowArray *QgsArrowArray::array()
155{
156 return &mArray;
157}
158
159const struct ArrowArray *QgsArrowArray::array() const
160{
161 return &mArray;
162}
163
164unsigned long long QgsArrowArray::cArrayAddress() const
165{
166 // In the event QGIS is built on platform where unsigned long long is insufficient to
167 // represent a uintptr_t, ensure compilation fails
168 static_assert( sizeof( unsigned long long ) >= sizeof( uintptr_t ) );
169
170 return reinterpret_cast<unsigned long long>( &mArray );
171}
172
173void QgsArrowArray::exportToAddress( unsigned long long otherAddress )
174{
175 static_assert( sizeof( unsigned long long ) >= sizeof( uintptr_t ) );
176
177 struct ArrowArray *otherArrowArray = reinterpret_cast<struct ArrowArray *>( otherAddress );
178 ArrowArrayMove( &mArray, otherArrowArray );
179}
180
182{
183 return mArray.release;
184}
185
187{
188 if ( mArrayStream.release )
189 {
190 ArrowArrayStreamRelease( &mArrayStream );
191 }
192
193 ArrowArrayStreamMove( other.arrayStream(), &mArrayStream );
194}
195
197{
198 if ( this != &other )
199 {
200 ArrowArrayStreamMove( other.arrayStream(), &mArrayStream );
201 }
202
203 return *this;
204}
205
207{
208 if ( mArrayStream.release )
209 {
210 ArrowArrayStreamRelease( &mArrayStream );
211 }
212}
213
214struct ArrowArrayStream *QgsArrowArrayStream::arrayStream()
215{
216 return &mArrayStream;
217}
218
220{
221 // In the event QGIS is built on platform where unsigned long long is insufficient to
222 // represent a uintptr_t, ensure compilation fails
223 static_assert( sizeof( unsigned long long ) >= sizeof( uintptr_t ) );
224
225 return reinterpret_cast<unsigned long long>( &mArrayStream );
226}
227
228void QgsArrowArrayStream::exportToAddress( unsigned long long otherAddress )
229{
230 static_assert( sizeof( unsigned long long ) >= sizeof( uintptr_t ) );
231
232 struct ArrowArrayStream *otherArrowArrayStream = reinterpret_cast<struct ArrowArrayStream *>( otherAddress );
233 ArrowArrayStreamMove( &mArrayStream, otherArrowArrayStream );
234}
235
237{
238 return mArrayStream.release;
239}
240
241namespace
242{
243
244
245 void inferGeometry( struct ArrowSchema *col, const QString &name, const QgsCoordinateReferenceSystem &crs )
246 {
247 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetName( col, name.toUtf8().constData() ) );
248 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_BINARY ) );
249
250 std::string crsString = crs.toJsonString();
251 std::string geoArrowMetadata;
252 if ( crsString.empty() )
253 {
254 geoArrowMetadata = "{}";
255 }
256 else
257 {
258 geoArrowMetadata = R"({"crs":)" + crsString + R"(})";
259 }
260
261 nanoarrow::UniqueBuffer metadataKv;
262 QGIS_NANOARROW_THROW_NOT_OK( ArrowMetadataBuilderInit( metadataKv.get(), nullptr ) );
263 QGIS_NANOARROW_THROW_NOT_OK( ArrowMetadataBuilderAppend( metadataKv.get(), ArrowCharView( "ARROW:extension:name" ), ArrowCharView( "geoarrow.wkb" ) ) );
264 QGIS_NANOARROW_THROW_NOT_OK( ArrowMetadataBuilderAppend( metadataKv.get(), ArrowCharView( "ARROW:extension:metadata" ), ArrowCharView( geoArrowMetadata.c_str() ) ) );
265 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetMetadata( col, reinterpret_cast<char *>( metadataKv->data ) ) );
266 }
267
268 void appendGeometry( const QgsFeature &feature, struct ArrowArray *col )
269 {
270 if ( !feature.hasGeometry() )
271 {
272 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendNull( col, 1 ) );
273 return;
274 }
275
276 const QByteArray wkb = feature.geometry().asWkb( QgsAbstractGeometry::FlagExportTrianglesAsPolygons );
277 struct ArrowBufferView v;
278 v.data.data = wkb.data();
279 v.size_bytes = static_cast<int64_t>( wkb.size() );
280 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendBytes( col, v ) );
281 }
282
283 void inferMetaType( const QMetaType::Type metaType, struct ArrowSchema *col, const QString &fieldName )
284 {
285 switch ( metaType )
286 {
287 case QMetaType::Bool:
288 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_BOOL ) );
289 return;
290 case QMetaType::QChar:
291 case QMetaType::SChar:
292 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_INT8 ) );
293 return;
294 case QMetaType::UChar:
295 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_UINT8 ) );
296 return;
297 case QMetaType::Short:
298 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_INT16 ) );
299 return;
300 case QMetaType::UShort:
301 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_UINT16 ) );
302 return;
303 case QMetaType::Int:
304 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_INT32 ) );
305 return;
306 case QMetaType::UInt:
307 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_UINT32 ) );
308 return;
309 case QMetaType::Long:
310 case QMetaType::LongLong:
311 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_INT64 ) );
312 return;
313 case QMetaType::ULong:
314 case QMetaType::ULongLong:
315 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_UINT64 ) );
316 return;
317 case QMetaType::Float:
318 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_FLOAT ) );
319 return;
320 case QMetaType::Double:
321 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_DOUBLE ) );
322 return;
323 case QMetaType::QString:
324 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_STRING ) );
325 return;
326 case QMetaType::QByteArray:
327 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_BINARY ) );
328 return;
329 case QMetaType::QDate:
330 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_DATE32 ) );
331 return;
332 case QMetaType::QTime:
333 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetTypeDateTime( col, NANOARROW_TYPE_TIME32, NANOARROW_TIME_UNIT_MILLI, nullptr ) );
334 return;
335 case QMetaType::QDateTime:
336 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetTypeDateTime( col, NANOARROW_TYPE_TIMESTAMP, NANOARROW_TIME_UNIT_MILLI, "UTC" ) );
337 return;
338 case QMetaType::QStringList:
339 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_LIST ) );
340 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col->children[0], NANOARROW_TYPE_STRING ) );
341 return;
342 default:
343 throw QgsException( u"QgsArrowIterator can't infer field type '%1' for field '%2'"_s.arg( QMetaType::typeName( metaType ) ).arg( fieldName ) );
344 }
345 }
346
347 void inferField( const QgsField &field, struct ArrowSchema *col )
348 {
349 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetName( col, field.name().toUtf8().constData() ) );
350 switch ( field.type() )
351 {
352 case QMetaType::QVariantList:
353 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_LIST ) );
354 inferMetaType( field.subType(), col->children[0], field.name() );
355 break;
356 default:
357 inferMetaType( field.type(), col, field.name() );
358 break;
359 }
360 }
361
362 void appendVariant( const QVariant &v, struct ArrowArray *col, struct ArrowSchemaView &columnTypeView, struct ArrowSchemaView &columnListTypeView )
363 {
364 if ( QgsVariantUtils::isNull( v ) )
365 {
366 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendNull( col, 1 ) );
367 return;
368 }
369
370 switch ( columnTypeView.type )
371 {
372 case NANOARROW_TYPE_BOOL:
373 if ( v.canConvert( QMetaType::Bool ) )
374 {
375 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendInt( col, v.toBool() ) );
376 return;
377 }
378 break;
379 case NANOARROW_TYPE_UINT8:
380 case NANOARROW_TYPE_UINT16:
381 case NANOARROW_TYPE_UINT32:
382 case NANOARROW_TYPE_UINT64:
383 if ( v.canConvert( QMetaType::ULongLong ) )
384 {
385 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendUInt( col, v.toULongLong() ) );
386 return;
387 }
388 break;
389 case NANOARROW_TYPE_INT8:
390 case NANOARROW_TYPE_INT16:
391 case NANOARROW_TYPE_INT32:
392 case NANOARROW_TYPE_INT64:
393 if ( v.canConvert( QMetaType::LongLong ) )
394 {
395 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendInt( col, v.toLongLong() ) );
396 return;
397 }
398 break;
399 case NANOARROW_TYPE_HALF_FLOAT:
400 case NANOARROW_TYPE_FLOAT:
401 case NANOARROW_TYPE_DOUBLE:
402 if ( v.canConvert( QMetaType::Double ) )
403 {
404 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendDouble( col, v.toDouble() ) );
405 return;
406 }
407 break;
408 case NANOARROW_TYPE_STRING:
409 case NANOARROW_TYPE_LARGE_STRING:
410 case NANOARROW_TYPE_STRING_VIEW:
411 {
412 if ( v.canConvert( QMetaType::QString ) )
413 {
414 const QByteArray string = v.toString().toUtf8();
415 struct ArrowBufferView bytesView;
416 bytesView.data.data = string.constData();
417 bytesView.size_bytes = static_cast<int64_t>( string.size() );
418 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendBytes( col, bytesView ) );
419 return;
420 }
421 break;
422 }
423
424 case NANOARROW_TYPE_BINARY:
425 case NANOARROW_TYPE_LARGE_BINARY:
426 case NANOARROW_TYPE_BINARY_VIEW:
427 case NANOARROW_TYPE_FIXED_SIZE_BINARY:
428 {
429 if ( v.canConvert( QMetaType::QByteArray ) )
430 {
431 const QByteArray bytes = v.toByteArray();
432 struct ArrowBufferView bytesView;
433 bytesView.data.data = bytes.data();
434 bytesView.size_bytes = static_cast<int64_t>( bytes.size() );
435 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendBytes( col, bytesView ) );
436 return;
437 }
438 break;
439 }
440
441 case NANOARROW_TYPE_DATE32:
442 {
443 if ( v.canConvert( QMetaType::QDate ) )
444 {
445 static QDate epoch = QDate( 1970, 1, 1 );
446 int64_t daysSinceEpoch = epoch.daysTo( v.toDate() );
447 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendInt( col, daysSinceEpoch ) );
448 return;
449 }
450 break;
451 }
452
453 case NANOARROW_TYPE_DATE64:
454 {
455 if ( v.canConvert( QMetaType::QDate ) )
456 {
457 static QDate epoch = QDate( 1970, 1, 1 );
458 int64_t daysSinceEpoch = epoch.daysTo( v.toDate() );
459 int64_t msSinceEpoch = daysSinceEpoch * 24 * 60 * 60 * 1000;
460 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendInt( col, msSinceEpoch ) );
461 return;
462 }
463 break;
464 }
465
466 case NANOARROW_TYPE_TIMESTAMP:
467 {
468 if ( v.canConvert( QMetaType::QDateTime ) )
469 {
470 const QDateTime dateTime = v.toDateTime().toUTC();
471 switch ( columnTypeView.time_unit )
472 {
473 case NANOARROW_TIME_UNIT_SECOND:
474 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendInt( col, dateTime.toSecsSinceEpoch() ) );
475 return;
476 case NANOARROW_TIME_UNIT_MILLI:
477 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendInt( col, dateTime.toMSecsSinceEpoch() ) );
478 return;
479 case NANOARROW_TIME_UNIT_MICRO:
480 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendInt( col, dateTime.toMSecsSinceEpoch() * 1000 ) );
481 return;
482 case NANOARROW_TIME_UNIT_NANO:
483 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendInt( col, dateTime.toMSecsSinceEpoch() * 1000 * 1000 ) );
484 return;
485 }
486 }
487
488 break;
489 }
490 case NANOARROW_TYPE_TIME32:
491 case NANOARROW_TYPE_TIME64:
492 {
493 if ( v.canConvert( QMetaType::QTime ) )
494 {
495 const QTime time = v.toTime();
496 switch ( columnTypeView.time_unit )
497 {
498 case NANOARROW_TIME_UNIT_SECOND:
499 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendInt( col, time.msecsSinceStartOfDay() / 1000 ) );
500 return;
501 case NANOARROW_TIME_UNIT_MILLI:
502 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendInt( col, time.msecsSinceStartOfDay() ) );
503 return;
504 case NANOARROW_TIME_UNIT_MICRO:
505 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendInt( col, static_cast<int64_t>( time.msecsSinceStartOfDay() ) * 1000 ) );
506 return;
507 case NANOARROW_TIME_UNIT_NANO:
508 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendInt( col, static_cast<int64_t>( time.msecsSinceStartOfDay() ) * 1000 * 1000 ) );
509 return;
510 }
511 }
512
513 break;
514 }
515
516 case NANOARROW_TYPE_LIST:
517 case NANOARROW_TYPE_FIXED_SIZE_LIST:
518 case NANOARROW_TYPE_LARGE_LIST:
519 case NANOARROW_TYPE_LIST_VIEW:
520 case NANOARROW_TYPE_LARGE_LIST_VIEW:
521 {
522 if ( v.canConvert( QMetaType::QVariantList ) )
523 {
524 const QVariantList variantList = v.toList();
525 struct ArrowSchemaView dummyListType {};
526 for ( const QVariant &item : variantList )
527 {
528 appendVariant( item, col->children[0], columnListTypeView, dummyListType );
529 }
530
531 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayFinishElement( col ) );
532 return;
533 }
534 break;
535 }
536 default:
537 break;
538 }
539
540 throw QgsException( u"Can't convert variant of type '%1' to Arrow type '%2'"_s.arg( v.typeName() ).arg( ArrowTypeString( columnTypeView.type ) ) );
541 }
542
543 class ArrowIteratorArrayStreamImpl
544 {
545 public:
546 ArrowIteratorArrayStreamImpl( QgsArrowIterator iterator, int batchSize )
547 : mIterator( iterator ), mBatchSize( batchSize ) {}
548
549 int GetSchema( struct ArrowSchema *schema )
550 {
551 NANOARROW_RETURN_NOT_OK( ArrowSchemaDeepCopy( mIterator.schema(), schema ) );
552 return NANOARROW_OK;
553 }
554
555 int GetNext( struct ArrowArray *array )
556 {
557 try
558 {
559 QgsArrowArray batch = mIterator.nextFeatures( mBatchSize );
560 ArrowArrayMove( batch.array(), array );
561 return NANOARROW_OK;
562 }
563 catch ( QgsException &e )
564 {
565 mLastError = e.what().toStdString();
566 return EINVAL;
567 }
568 catch ( std::exception &e )
569 {
570 mLastError = e.what();
571 return EINVAL;
572 }
573 catch ( ... )
574 {
575 mLastError = "unknown error";
576 return EINVAL;
577 }
578 }
579
580 const char *GetLastError() const { return mLastError.c_str(); }
581
582 private:
583 QgsArrowIterator mIterator;
584 int mBatchSize { 65536 };
585 std::string mLastError {};
586 };
587
588} //namespace
589
591 : mFeatureIterator( featureIterator )
592{
593}
594
595struct ArrowSchema *QgsArrowIterator::schema()
596{
597 return mSchema.schema();
598}
599
601{
602 if ( !schema.isValid() )
603 {
604 throw QgsException( u"Invalid or null ArrowSchema provided"_s );
605 }
606
607 mSchema = schema;
608}
609
611{
613 nanoarrow::ArrayStreamFactory<ArrowIteratorArrayStreamImpl>::InitArrayStream( new ArrowIteratorArrayStreamImpl( *this, batchSize ), out.arrayStream() );
614 return out;
615}
616
617
619{
620 if ( n < 1 )
621 {
622 throw QgsException( u"QgsArrowIterator can't iterate over less than one feature"_s );
623 }
624
625 if ( !mSchema.isValid() )
626 {
627 throw QgsException( u"QgsArrowIterator schema not set"_s );
628 }
629
630 // Check the schema and cache a few things about it before we loop over features.
631 // This could also be done when setting the schema (although the struct ArrowSchemaView
632 // would have to be opaque in the header if this were cached as a class member).
633 const struct ArrowSchema *schema = mSchema.schema();
634
635 struct ArrowError error {};
636
637 // Check that the top-level schema is a struct
638 struct ArrowSchemaView schemaView;
639 QGIS_NANOARROW_THROW_NOT_OK_ERR( ArrowSchemaViewInit( &schemaView, schema, &error ), &error );
640 if ( schemaView.type != NANOARROW_TYPE_STRUCT )
641 {
642 throw QgsException( u"QgsArrowIterator expected requested schema as struct but got '%1'"_s.arg( ArrowTypeString( schemaView.type ) ) );
643 }
644
645 std::vector<QString> columnNames( schema->n_children );
646 std::vector<struct ArrowSchemaView> colTypeViews( schema->n_children );
647 std::vector<struct ArrowSchemaView> colListTypeViews( schema->n_children );
648 for ( int64_t i = 0; i < schema->n_children; i++ )
649 {
650 // Parse the column schema
651 columnNames[i] = QString( schema->children[i]->name != nullptr ? schema->children[i]->name : QString() );
652 QGIS_NANOARROW_THROW_NOT_OK_ERR( ArrowSchemaViewInit( &colTypeViews[i], schema->children[i], &error ), &error );
653
654 // Parse the column list type if applicable
655 switch ( colTypeViews[i].type )
656 {
657 case NANOARROW_TYPE_LIST:
658 case NANOARROW_TYPE_FIXED_SIZE_LIST:
659 case NANOARROW_TYPE_LARGE_LIST:
660 case NANOARROW_TYPE_LIST_VIEW:
661 case NANOARROW_TYPE_LARGE_LIST_VIEW:
662 {
663 struct ArrowSchemaView childView;
664 QGIS_NANOARROW_THROW_NOT_OK_ERR( ArrowSchemaViewInit( &childView, schema->children[i]->children[0], &error ), &error );
665 colListTypeViews[i] = std::move( childView );
666 break;
667 }
668 default:
669 colListTypeViews[i] = ArrowSchemaView {};
670 break;
671 }
672 }
673
674 // Create the output array
675 nanoarrow::UniqueArray tmp;
676 QGIS_NANOARROW_THROW_NOT_OK_ERR( ArrowArrayInitFromSchema( tmp.get(), schema, &error ), &error );
677 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayReserve( tmp.get(), n ) );
678 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayStartAppending( tmp.get() ) );
679
680 // Loop features
681 std::vector<int> featureAttributeIndex;
682 QgsFeature feature;
683 while ( n > 0 && mFeatureIterator.nextFeature( feature ) )
684 {
685 --n;
686
687 // Cache the attribute index per output schema index on the first feature
688 if ( featureAttributeIndex.empty() )
689 {
690 for ( int64_t i = 0; i < schema->n_children; i++ )
691 {
692 featureAttributeIndex.push_back( feature.fieldNameIndex( columnNames[i] ) );
693 }
694 }
695
696 // Loop over the output schema fields and append the appropriate attribute from the
697 // feature (or geometry, or null if the feature does not contain that field).
698 for ( int64_t i = 0; i < schema->n_children; i++ )
699 {
700 int attributeIndex = featureAttributeIndex[i];
701 struct ArrowArray *columnArray = tmp->children[i];
702
703 if ( i == mSchema.geometryColumnIndex() )
704 {
705 appendGeometry( feature, columnArray );
706 }
707 else if ( attributeIndex >= 0 && attributeIndex < feature.attributeCount() )
708 {
709 appendVariant( feature.attribute( attributeIndex ), columnArray, colTypeViews[i], colListTypeViews[i] );
710 }
711 else
712 {
713 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendNull( columnArray, 1 ) );
714 }
715 }
716
717 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayFinishElement( tmp.get() ) );
718 }
719
720 QGIS_NANOARROW_THROW_NOT_OK_ERR( ArrowArrayFinishBuildingDefault( tmp.get(), &error ), &error );
721
722 QgsArrowArray out;
723 if ( tmp->length > 0 )
724 {
725 ArrowArrayMove( tmp.get(), out.array() );
726 }
727 return out;
728}
729
731{
732 bool layerHasGeometry = layer.isSpatial();
733 if ( layerHasGeometry && options.geometryColumnName().isEmpty() )
734 {
735 QgsArrowInferSchemaOptions optionsClone( options );
736 optionsClone.setGeometryColumnName( layer.dataProvider()->geometryColumnName() );
737 return inferSchema( layer.fields(), layerHasGeometry, layer.crs(), optionsClone );
738 }
739 else
740 {
741 return inferSchema( layer.fields(), layerHasGeometry, layer.crs(), options );
742 }
743}
744
745
747{
748 QgsArrowSchema out;
749 QgisPrivateArrowSchemaInit( out.schema() );
750 QGIS_NANOARROW_THROW_NOT_OK( QgisPrivateArrowSchemaSetTypeStruct( out.schema(), fields.count() + hasGeometry ) );
751 for ( int i = 0; i < fields.count(); i++ )
752 {
753 inferField( fields.field( i ), out.schema()->children[i] );
754 }
755
756 if ( hasGeometry )
757 {
758 QString geometryColumnName = options.geometryColumnName();
759 if ( geometryColumnName.isEmpty() )
760 {
761 geometryColumnName = u"geometry"_s;
762 }
763
764 inferGeometry( out.schema()->children[fields.count()], geometryColumnName, crs );
765 out.setGeometryColumnIndex( fields.count() );
766 }
767
768 return out;
769}
@ FlagExportTrianglesAsPolygons
Triangles should be exported as polygon geometries.
Wrapper around an ArrowArrayStream.
QgsArrowArrayStream & operator=(QgsArrowArrayStream &other)=delete
struct ArrowArrayStream * arrayStream()
Access the underlying ArrowArray from C++.
unsigned long long cArrayStreamAddress() const
Returns the address of the underlying ArrowArrayStream for import or export across boundaries.
QgsArrowArrayStream()=default
Construct invalid array stream holder.
void exportToAddress(unsigned long long otherAddress)
Export this array to the address of an empty ArrowArrayStream for export across boundaries.
bool isValid() const
Returns true if this wrapper object holds a valid ArrowArray.
Wrapper around an ArrowArray.
QgsArrowArray()=default
Construct invalid array holder.
struct ArrowArray * array()
Access the underlying ArrowArray from C++.
QgsArrowArray & operator=(QgsArrowArray &other)=delete
bool isValid() const
Returns true if this wrapper object holds a valid ArrowArray.
void exportToAddress(unsigned long long otherAddress)
Export this array to the address of an empty ArrowArray for export across boundaries.
unsigned long long cArrayAddress() const
Returns the address of the underlying ArrowArray for import or export across boundaries.
Options for inferring an ArrowSchema from a feature source.
void setGeometryColumnName(const QString &geometryColumnName)
Set the name that should be used to refer to the geometry column.
QgsArrowInferSchemaOptions()
Construct default options.
QString geometryColumnName() const
The name that should be used for a layer's geometry column.
static QgsArrowSchema inferSchema(const QgsVectorLayer &layer, const QgsArrowInferSchemaOptions &options=QgsArrowInferSchemaOptions())
Infer the QgsArrowSchema for a given QgsVectorLayer.
struct ArrowSchema * schema()
Access the output ArrowSchema from C++.
QgsArrowArrayStream toArrayStream(int batchSize=65536) const
Export this iterator as an ArrowArrayStream.
QgsArrowArray nextFeatures(int n)
Build an ArrowArray using the next n features (or fewer depending on the number of features remaining...
QgsArrowIterator()=default
Construct invalid iterator.
void setSchema(const QgsArrowSchema &schema)
Set the ArrowSchema for the output of all future batches.
Wrapper around an ArrowSchema.
int geometryColumnIndex() const
Returns the index of the column in this schema that should be populated with a feature geometry.
bool isValid() const
Returns true if this wrapper object holds a valid ArrowSchema.
unsigned long long cSchemaAddress() const
Returns the address of the underlying ArrowSchema for import or export across boundaries.
struct ArrowSchema * schema()
Access the underlying ArrowSchema from C++.
QgsArrowSchema & operator=(const QgsArrowSchema &other)
Assignment operator.
void exportToAddress(unsigned long long otherAddress)
Export this array to the address of an empty ArrowSchema for export across boundaries.
QgsArrowSchema()
Construct invalid schema holder.
void setGeometryColumnIndex(int geometryColumnIndex)
Set the index of the column in this schema that should be populated with a feature geometry.
Represents a coordinate reference system (CRS).
std::string toJsonString(bool multiline=false, int indentationWidth=4, const QString &schema=QString()) const
Returns a JSON string representation of this CRS.
Defines a QGIS exception class.
QString what() const
Wrapper for iterator of features from vector data provider or vector layer.
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
Definition qgsfeature.h:60
int fieldNameIndex(const QString &fieldName) const
Utility method to get attribute index from name.
int attributeCount() const
Returns the number of attributes attached to the feature.
QgsGeometry geometry
Definition qgsfeature.h:71
bool hasGeometry() const
Returns true if the feature has an associated geometry.
Q_INVOKABLE QVariant attribute(const QString &name) const
Lookup attribute value by attribute name.
Encapsulate a field in an attribute table or data source.
Definition qgsfield.h:56
QMetaType::Type type
Definition qgsfield.h:63
QString name
Definition qgsfield.h:65
QMetaType::Type subType() const
If the field is a collection, gets its element's type.
Definition qgsfield.cpp:162
Container of fields for a vector layer.
Definition qgsfields.h:46
int count
Definition qgsfields.h:50
QgsField field(int fieldIdx) const
Returns the field at particular index (must be in range 0..N-1).
QByteArray asWkb(QgsAbstractGeometry::WkbFlags flags=QgsAbstractGeometry::WkbFlags()) const
Export the geometry to WKB.
QgsCoordinateReferenceSystem crs
Definition qgsmaplayer.h:90
static bool isNull(const QVariant &variant, bool silenceNullWarnings=false)
Returns true if the specified variant should be considered a NULL value.
virtual QString geometryColumnName() const
Returns the name of the column storing geometry, if applicable.
Represents a vector layer which manages a vector based dataset.
bool isSpatial() const final
Returns true if this is a geometry layer and false in case of NoGeometry (table only) or UnknownGeome...
QgsVectorDataProvider * dataProvider() final
Returns the layer's data provider, it may be nullptr.
#define QGIS_NANOARROW_THROW_NOT_OK_ERR(expr, err)
#define QGIS_NANOARROW_THROW_NOT_OK(expr)