QGIS API Documentation 4.0.0-Norrköping (1ddcee3d0e4)
Loading...
Searching...
No Matches
qgsarrowiterator.cpp
Go to the documentation of this file.
1/***************************************************************************
2 qgsarrowiterator.cpp
3 ---------------------
4 begin : November 2025
5 copyright : (C) 2025 by Dewey Dunnington
6 email : dewey at dunnington dot ca
7 ***************************************************************************
8 * *
9 * This program is free software; you can redistribute it and/or modify *
10 * it under the terms of the GNU General Public License as published by *
11 * the Free Software Foundation; either version 2 of the License, or *
12 * (at your option) any later version. *
13 * *
14 ***************************************************************************/
15
16#include "qgsarrowiterator.h"
17
18#include <nlohmann/json.hpp>
19
20#include "nanoarrow/nanoarrow.hpp"
21#include "qgsfeatureiterator.h"
22#include "qgsvectorlayer.h"
23
24#include <QString>
25
26using namespace Qt::StringLiterals;
27
28#define QGIS_NANOARROW_THROW_NOT_OK_ERR( expr, err ) \
29 do \
30 { \
31 const int ec = ( expr ); \
32 if ( ec != NANOARROW_OK ) \
33 { \
34 throw QgsException( u"nanoarrow error (%1): %2"_s.arg( ec ).arg( QString::fromUtf8( ( err )->message ) ) ); \
35 } \
36 } while ( 0 )
37
38#define QGIS_NANOARROW_THROW_NOT_OK( expr ) \
39 do \
40 { \
41 const int ec = ( expr ); \
42 if ( ec != NANOARROW_OK ) \
43 { \
44 throw QgsException( u"nanoarrow error (%1)"_s.arg( ec ) ); \
45 } \
46 } while ( 0 )
47
48
51
53{
54 mGeometryColumnName = geometryColumnName;
55}
56
58{
59 return mGeometryColumnName;
60}
61
64
66{
67 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaDeepCopy( &other.mSchema, &mSchema ) );
68 mGeometryColumnIndex = other.mGeometryColumnIndex;
69}
70
72{
73 if ( mSchema.release )
74 {
75 ArrowSchemaRelease( &mSchema );
76 }
77 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaDeepCopy( &other.mSchema, &mSchema ) );
78 mGeometryColumnIndex = other.mGeometryColumnIndex;
79 return *this;
80}
81
83{
84 if ( mSchema.release )
85 {
86 ArrowSchemaRelease( &mSchema );
87 }
88}
89
90struct ArrowSchema *QgsArrowSchema::schema()
91{
92 return &mSchema;
93}
94
95const struct ArrowSchema *QgsArrowSchema::schema() const
96{
97 return &mSchema;
98}
99
100unsigned long long QgsArrowSchema::cSchemaAddress() const
101{
102 // In the event QGIS is built on platform where unsigned long long is insufficient to
103 // represent a uintptr_t, ensure compilation fails
104 static_assert( sizeof( unsigned long long ) >= sizeof( uintptr_t ) );
105
106 return reinterpret_cast<unsigned long long>( &mSchema );
107}
108
109void QgsArrowSchema::exportToAddress( unsigned long long otherAddress )
110{
111 static_assert( sizeof( unsigned long long ) >= sizeof( uintptr_t ) );
112
113 struct ArrowSchema *otherArrowSchema = reinterpret_cast<struct ArrowSchema *>( otherAddress );
114 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaDeepCopy( &mSchema, otherArrowSchema ) );
115}
116
118{
119 return mSchema.release;
120}
121
123{
124 return mGeometryColumnIndex;
125}
126
131
133{
134 if ( mArray.release )
135 {
136 ArrowArrayRelease( &mArray );
137 }
138
139 ArrowArrayMove( other.array(), &mArray );
140}
141
143{
144 if ( this != &other )
145 {
146 ArrowArrayMove( other.array(), &mArray );
147 }
148
149 return *this;
150}
151
153{
154 if ( mArray.release )
155 {
156 ArrowArrayRelease( &mArray );
157 }
158}
159
160struct ArrowArray *QgsArrowArray::array()
161{
162 return &mArray;
163}
164
165const struct ArrowArray *QgsArrowArray::array() const
166{
167 return &mArray;
168}
169
170unsigned long long QgsArrowArray::cArrayAddress() const
171{
172 // In the event QGIS is built on platform where unsigned long long is insufficient to
173 // represent a uintptr_t, ensure compilation fails
174 static_assert( sizeof( unsigned long long ) >= sizeof( uintptr_t ) );
175
176 return reinterpret_cast<unsigned long long>( &mArray );
177}
178
179void QgsArrowArray::exportToAddress( unsigned long long otherAddress )
180{
181 static_assert( sizeof( unsigned long long ) >= sizeof( uintptr_t ) );
182
183 struct ArrowArray *otherArrowArray = reinterpret_cast<struct ArrowArray *>( otherAddress );
184 ArrowArrayMove( &mArray, otherArrowArray );
185}
186
188{
189 return mArray.release;
190}
191
193{
194 if ( mArrayStream.release )
195 {
196 ArrowArrayStreamRelease( &mArrayStream );
197 }
198
199 ArrowArrayStreamMove( other.arrayStream(), &mArrayStream );
200}
201
203{
204 if ( this != &other )
205 {
206 ArrowArrayStreamMove( other.arrayStream(), &mArrayStream );
207 }
208
209 return *this;
210}
211
213{
214 if ( mArrayStream.release )
215 {
216 ArrowArrayStreamRelease( &mArrayStream );
217 }
218}
219
220struct ArrowArrayStream *QgsArrowArrayStream::arrayStream()
221{
222 return &mArrayStream;
223}
224
226{
227 // In the event QGIS is built on platform where unsigned long long is insufficient to
228 // represent a uintptr_t, ensure compilation fails
229 static_assert( sizeof( unsigned long long ) >= sizeof( uintptr_t ) );
230
231 return reinterpret_cast<unsigned long long>( &mArrayStream );
232}
233
234void QgsArrowArrayStream::exportToAddress( unsigned long long otherAddress )
235{
236 static_assert( sizeof( unsigned long long ) >= sizeof( uintptr_t ) );
237
238 struct ArrowArrayStream *otherArrowArrayStream = reinterpret_cast<struct ArrowArrayStream *>( otherAddress );
239 ArrowArrayStreamMove( &mArrayStream, otherArrowArrayStream );
240}
241
243{
244 return mArrayStream.release;
245}
246
247namespace
248{
249
250
251 void inferGeometry( struct ArrowSchema *col, const QString &name, const QgsCoordinateReferenceSystem &crs )
252 {
253 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetName( col, name.toUtf8().constData() ) );
254 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_BINARY ) );
255
256 std::string crsString = crs.toJsonString();
257 std::string geoArrowMetadata;
258 if ( crsString.empty() )
259 {
260 geoArrowMetadata = "{}";
261 }
262 else
263 {
264 geoArrowMetadata = R"({"crs":)" + crsString + R"(})";
265 }
266
267 nanoarrow::UniqueBuffer metadataKv;
268 QGIS_NANOARROW_THROW_NOT_OK( ArrowMetadataBuilderInit( metadataKv.get(), nullptr ) );
269 QGIS_NANOARROW_THROW_NOT_OK( ArrowMetadataBuilderAppend( metadataKv.get(), ArrowCharView( "ARROW:extension:name" ), ArrowCharView( "geoarrow.wkb" ) ) );
270 QGIS_NANOARROW_THROW_NOT_OK( ArrowMetadataBuilderAppend( metadataKv.get(), ArrowCharView( "ARROW:extension:metadata" ), ArrowCharView( geoArrowMetadata.c_str() ) ) );
271 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetMetadata( col, reinterpret_cast<char *>( metadataKv->data ) ) );
272 }
273
274 void appendGeometry( const QgsFeature &feature, struct ArrowArray *col )
275 {
276 if ( !feature.hasGeometry() )
277 {
278 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendNull( col, 1 ) );
279 return;
280 }
281
282 const QByteArray wkb = feature.geometry().asWkb( QgsAbstractGeometry::FlagExportTrianglesAsPolygons );
283 struct ArrowBufferView v;
284 v.data.data = wkb.data();
285 v.size_bytes = static_cast<int64_t>( wkb.size() );
286 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendBytes( col, v ) );
287 }
288
289 void inferMetaType( const QMetaType::Type metaType, struct ArrowSchema *col, const QString &fieldName )
290 {
291 switch ( metaType )
292 {
293 case QMetaType::Bool:
294 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_BOOL ) );
295 return;
296 case QMetaType::QChar:
297 case QMetaType::SChar:
298 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_INT8 ) );
299 return;
300 case QMetaType::UChar:
301 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_UINT8 ) );
302 return;
303 case QMetaType::Short:
304 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_INT16 ) );
305 return;
306 case QMetaType::UShort:
307 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_UINT16 ) );
308 return;
309 case QMetaType::Int:
310 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_INT32 ) );
311 return;
312 case QMetaType::UInt:
313 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_UINT32 ) );
314 return;
315 case QMetaType::Long:
316 case QMetaType::LongLong:
317 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_INT64 ) );
318 return;
319 case QMetaType::ULong:
320 case QMetaType::ULongLong:
321 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_UINT64 ) );
322 return;
323 case QMetaType::Float:
324 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_FLOAT ) );
325 return;
326 case QMetaType::Double:
327 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_DOUBLE ) );
328 return;
329 case QMetaType::QString:
330 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_STRING ) );
331 return;
332 case QMetaType::QByteArray:
333 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_BINARY ) );
334 return;
335 case QMetaType::QDate:
336 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_DATE32 ) );
337 return;
338 case QMetaType::QTime:
339 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetTypeDateTime( col, NANOARROW_TYPE_TIME32, NANOARROW_TIME_UNIT_MILLI, nullptr ) );
340 return;
341 case QMetaType::QDateTime:
342 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetTypeDateTime( col, NANOARROW_TYPE_TIMESTAMP, NANOARROW_TIME_UNIT_MILLI, "UTC" ) );
343 return;
344 case QMetaType::QStringList:
345 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_LIST ) );
346 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col->children[0], NANOARROW_TYPE_STRING ) );
347 return;
348 default:
349 throw QgsException( u"QgsArrowIterator can't infer field type '%1' for field '%2'"_s.arg( QMetaType::typeName( metaType ) ).arg( fieldName ) );
350 }
351 }
352
353 void inferField( const QgsField &field, struct ArrowSchema *col )
354 {
355 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetName( col, field.name().toUtf8().constData() ) );
356 switch ( field.type() )
357 {
358 case QMetaType::QVariantList:
359 QGIS_NANOARROW_THROW_NOT_OK( ArrowSchemaSetType( col, NANOARROW_TYPE_LIST ) );
360 inferMetaType( field.subType(), col->children[0], field.name() );
361 break;
362 default:
363 inferMetaType( field.type(), col, field.name() );
364 break;
365 }
366 }
367
368 void appendVariant( const QVariant &v, struct ArrowArray *col, struct ArrowSchemaView &columnTypeView, struct ArrowSchemaView &columnListTypeView )
369 {
370 if ( QgsVariantUtils::isNull( v ) )
371 {
372 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendNull( col, 1 ) );
373 return;
374 }
375
376 switch ( columnTypeView.type )
377 {
378 case NANOARROW_TYPE_BOOL:
379 if ( v.canConvert( QMetaType::Bool ) )
380 {
381 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendInt( col, v.toBool() ) );
382 return;
383 }
384 break;
385 case NANOARROW_TYPE_UINT8:
386 case NANOARROW_TYPE_UINT16:
387 case NANOARROW_TYPE_UINT32:
388 case NANOARROW_TYPE_UINT64:
389 if ( v.canConvert( QMetaType::ULongLong ) )
390 {
391 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendUInt( col, v.toULongLong() ) );
392 return;
393 }
394 break;
395 case NANOARROW_TYPE_INT8:
396 case NANOARROW_TYPE_INT16:
397 case NANOARROW_TYPE_INT32:
398 case NANOARROW_TYPE_INT64:
399 if ( v.canConvert( QMetaType::LongLong ) )
400 {
401 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendInt( col, v.toLongLong() ) );
402 return;
403 }
404 break;
405 case NANOARROW_TYPE_HALF_FLOAT:
406 case NANOARROW_TYPE_FLOAT:
407 case NANOARROW_TYPE_DOUBLE:
408 if ( v.canConvert( QMetaType::Double ) )
409 {
410 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendDouble( col, v.toDouble() ) );
411 return;
412 }
413 break;
414 case NANOARROW_TYPE_STRING:
415 case NANOARROW_TYPE_LARGE_STRING:
416 case NANOARROW_TYPE_STRING_VIEW:
417 {
418 if ( v.canConvert( QMetaType::QString ) )
419 {
420 const QByteArray string = v.toString().toUtf8();
421 struct ArrowBufferView bytesView;
422 bytesView.data.data = string.constData();
423 bytesView.size_bytes = static_cast<int64_t>( string.size() );
424 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendBytes( col, bytesView ) );
425 return;
426 }
427 break;
428 }
429
430 case NANOARROW_TYPE_BINARY:
431 case NANOARROW_TYPE_LARGE_BINARY:
432 case NANOARROW_TYPE_BINARY_VIEW:
433 case NANOARROW_TYPE_FIXED_SIZE_BINARY:
434 {
435 if ( v.canConvert( QMetaType::QByteArray ) )
436 {
437 const QByteArray bytes = v.toByteArray();
438 struct ArrowBufferView bytesView;
439 bytesView.data.data = bytes.data();
440 bytesView.size_bytes = static_cast<int64_t>( bytes.size() );
441 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendBytes( col, bytesView ) );
442 return;
443 }
444 break;
445 }
446
447 case NANOARROW_TYPE_DATE32:
448 {
449 if ( v.canConvert( QMetaType::QDate ) )
450 {
451 static QDate epoch = QDate( 1970, 1, 1 );
452 int64_t daysSinceEpoch = epoch.daysTo( v.toDate() );
453 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendInt( col, daysSinceEpoch ) );
454 return;
455 }
456 break;
457 }
458
459 case NANOARROW_TYPE_DATE64:
460 {
461 if ( v.canConvert( QMetaType::QDate ) )
462 {
463 static QDate epoch = QDate( 1970, 1, 1 );
464 int64_t daysSinceEpoch = epoch.daysTo( v.toDate() );
465 int64_t msSinceEpoch = daysSinceEpoch * 24 * 60 * 60 * 1000;
466 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendInt( col, msSinceEpoch ) );
467 return;
468 }
469 break;
470 }
471
472 case NANOARROW_TYPE_TIMESTAMP:
473 {
474 if ( v.canConvert( QMetaType::QDateTime ) )
475 {
476 const QDateTime dateTime = v.toDateTime().toUTC();
477 switch ( columnTypeView.time_unit )
478 {
479 case NANOARROW_TIME_UNIT_SECOND:
480 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendInt( col, dateTime.toSecsSinceEpoch() ) );
481 return;
482 case NANOARROW_TIME_UNIT_MILLI:
483 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendInt( col, dateTime.toMSecsSinceEpoch() ) );
484 return;
485 case NANOARROW_TIME_UNIT_MICRO:
486 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendInt( col, dateTime.toMSecsSinceEpoch() * 1000 ) );
487 return;
488 case NANOARROW_TIME_UNIT_NANO:
489 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendInt( col, dateTime.toMSecsSinceEpoch() * 1000 * 1000 ) );
490 return;
491 }
492 }
493
494 break;
495 }
496 case NANOARROW_TYPE_TIME32:
497 case NANOARROW_TYPE_TIME64:
498 {
499 if ( v.canConvert( QMetaType::QTime ) )
500 {
501 const QTime time = v.toTime();
502 switch ( columnTypeView.time_unit )
503 {
504 case NANOARROW_TIME_UNIT_SECOND:
505 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendInt( col, time.msecsSinceStartOfDay() / 1000 ) );
506 return;
507 case NANOARROW_TIME_UNIT_MILLI:
508 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendInt( col, time.msecsSinceStartOfDay() ) );
509 return;
510 case NANOARROW_TIME_UNIT_MICRO:
511 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendInt( col, static_cast<int64_t>( time.msecsSinceStartOfDay() ) * 1000 ) );
512 return;
513 case NANOARROW_TIME_UNIT_NANO:
514 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendInt( col, static_cast<int64_t>( time.msecsSinceStartOfDay() ) * 1000 * 1000 ) );
515 return;
516 }
517 }
518
519 break;
520 }
521
522 case NANOARROW_TYPE_LIST:
523 case NANOARROW_TYPE_FIXED_SIZE_LIST:
524 case NANOARROW_TYPE_LARGE_LIST:
525 case NANOARROW_TYPE_LIST_VIEW:
526 case NANOARROW_TYPE_LARGE_LIST_VIEW:
527 {
528 if ( v.canConvert( QMetaType::QVariantList ) )
529 {
530 const QVariantList variantList = v.toList();
531 struct ArrowSchemaView dummyListType {};
532 for ( const QVariant &item : variantList )
533 {
534 appendVariant( item, col->children[0], columnListTypeView, dummyListType );
535 }
536
537 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayFinishElement( col ) );
538 return;
539 }
540 break;
541 }
542 default:
543 break;
544 }
545
546 throw QgsException( u"Can't convert variant of type '%1' to Arrow type '%2'"_s.arg( v.typeName() ).arg( ArrowTypeString( columnTypeView.type ) ) );
547 }
548
549 class ArrowIteratorArrayStreamImpl
550 {
551 public:
552 ArrowIteratorArrayStreamImpl( QgsArrowIterator iterator, int batchSize )
553 : mIterator( iterator )
554 , mBatchSize( batchSize )
555 {}
556
557 int GetSchema( struct ArrowSchema *schema )
558 {
559 NANOARROW_RETURN_NOT_OK( ArrowSchemaDeepCopy( mIterator.schema(), schema ) );
560 return NANOARROW_OK;
561 }
562
563 int GetNext( struct ArrowArray *array )
564 {
565 try
566 {
567 QgsArrowArray batch = mIterator.nextFeatures( mBatchSize );
568 ArrowArrayMove( batch.array(), array );
569 return NANOARROW_OK;
570 }
571 catch ( QgsException &e )
572 {
573 mLastError = e.what().toStdString();
574 return EINVAL;
575 }
576 catch ( std::exception &e )
577 {
578 mLastError = e.what();
579 return EINVAL;
580 }
581 catch ( ... )
582 {
583 mLastError = "unknown error";
584 return EINVAL;
585 }
586 }
587
588 const char *GetLastError() const { return mLastError.c_str(); }
589
590 private:
591 QgsArrowIterator mIterator;
592 int mBatchSize { 65536 };
593 std::string mLastError {};
594 };
595
596} //namespace
597
599 : mFeatureIterator( featureIterator )
600{}
601
602struct ArrowSchema *QgsArrowIterator::schema()
603{
604 return mSchema.schema();
605}
606
608{
609 if ( !schema.isValid() )
610 {
611 throw QgsException( u"Invalid or null ArrowSchema provided"_s );
612 }
613
614 mSchema = schema;
615}
616
618{
620 nanoarrow::ArrayStreamFactory<ArrowIteratorArrayStreamImpl>::InitArrayStream( new ArrowIteratorArrayStreamImpl( *this, batchSize ), out.arrayStream() );
621 return out;
622}
623
624
626{
627 if ( n < 1 )
628 {
629 throw QgsException( u"QgsArrowIterator can't iterate over less than one feature"_s );
630 }
631
632 if ( !mSchema.isValid() )
633 {
634 throw QgsException( u"QgsArrowIterator schema not set"_s );
635 }
636
637 // Check the schema and cache a few things about it before we loop over features.
638 // This could also be done when setting the schema (although the struct ArrowSchemaView
639 // would have to be opaque in the header if this were cached as a class member).
640 const struct ArrowSchema *schema = mSchema.schema();
641
642 struct ArrowError error {};
643
644 // Check that the top-level schema is a struct
645 struct ArrowSchemaView schemaView;
646 QGIS_NANOARROW_THROW_NOT_OK_ERR( ArrowSchemaViewInit( &schemaView, schema, &error ), &error );
647 if ( schemaView.type != NANOARROW_TYPE_STRUCT )
648 {
649 throw QgsException( u"QgsArrowIterator expected requested schema as struct but got '%1'"_s.arg( ArrowTypeString( schemaView.type ) ) );
650 }
651
652 std::vector<QString> columnNames( schema->n_children );
653 std::vector<struct ArrowSchemaView> colTypeViews( schema->n_children );
654 std::vector<struct ArrowSchemaView> colListTypeViews( schema->n_children );
655 for ( int64_t i = 0; i < schema->n_children; i++ )
656 {
657 // Parse the column schema
658 columnNames[i] = QString( schema->children[i]->name != nullptr ? schema->children[i]->name : QString() );
659 QGIS_NANOARROW_THROW_NOT_OK_ERR( ArrowSchemaViewInit( &colTypeViews[i], schema->children[i], &error ), &error );
660
661 // Parse the column list type if applicable
662 switch ( colTypeViews[i].type )
663 {
664 case NANOARROW_TYPE_LIST:
665 case NANOARROW_TYPE_FIXED_SIZE_LIST:
666 case NANOARROW_TYPE_LARGE_LIST:
667 case NANOARROW_TYPE_LIST_VIEW:
668 case NANOARROW_TYPE_LARGE_LIST_VIEW:
669 {
670 struct ArrowSchemaView childView;
671 QGIS_NANOARROW_THROW_NOT_OK_ERR( ArrowSchemaViewInit( &childView, schema->children[i]->children[0], &error ), &error );
672 colListTypeViews[i] = std::move( childView );
673 break;
674 }
675 default:
676 colListTypeViews[i] = ArrowSchemaView {};
677 break;
678 }
679 }
680
681 // Create the output array
682 nanoarrow::UniqueArray tmp;
683 QGIS_NANOARROW_THROW_NOT_OK_ERR( ArrowArrayInitFromSchema( tmp.get(), schema, &error ), &error );
684 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayReserve( tmp.get(), n ) );
685 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayStartAppending( tmp.get() ) );
686
687 // Loop features
688 std::vector<int> featureAttributeIndex;
689 QgsFeature feature;
690 while ( n > 0 && mFeatureIterator.nextFeature( feature ) )
691 {
692 --n;
693
694 // Cache the attribute index per output schema index on the first feature
695 if ( featureAttributeIndex.empty() )
696 {
697 for ( int64_t i = 0; i < schema->n_children; i++ )
698 {
699 featureAttributeIndex.push_back( feature.fieldNameIndex( columnNames[i] ) );
700 }
701 }
702
703 // Loop over the output schema fields and append the appropriate attribute from the
704 // feature (or geometry, or null if the feature does not contain that field).
705 for ( int64_t i = 0; i < schema->n_children; i++ )
706 {
707 int attributeIndex = featureAttributeIndex[i];
708 struct ArrowArray *columnArray = tmp->children[i];
709
710 if ( i == mSchema.geometryColumnIndex() )
711 {
712 appendGeometry( feature, columnArray );
713 }
714 else if ( attributeIndex >= 0 && attributeIndex < feature.attributeCount() )
715 {
716 appendVariant( feature.attribute( attributeIndex ), columnArray, colTypeViews[i], colListTypeViews[i] );
717 }
718 else
719 {
720 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayAppendNull( columnArray, 1 ) );
721 }
722 }
723
724 QGIS_NANOARROW_THROW_NOT_OK( ArrowArrayFinishElement( tmp.get() ) );
725 }
726
727 QGIS_NANOARROW_THROW_NOT_OK_ERR( ArrowArrayFinishBuildingDefault( tmp.get(), &error ), &error );
728
729 QgsArrowArray out;
730 if ( tmp->length > 0 )
731 {
732 ArrowArrayMove( tmp.get(), out.array() );
733 }
734 return out;
735}
736
738{
739 bool layerHasGeometry = layer.isSpatial();
740 if ( layerHasGeometry && options.geometryColumnName().isEmpty() )
741 {
742 QgsArrowInferSchemaOptions optionsClone( options );
743 optionsClone.setGeometryColumnName( layer.dataProvider()->geometryColumnName() );
744 return inferSchema( layer.fields(), layerHasGeometry, layer.crs(), optionsClone );
745 }
746 else
747 {
748 return inferSchema( layer.fields(), layerHasGeometry, layer.crs(), options );
749 }
750}
751
752
754{
755 QgsArrowSchema out;
756 QgisPrivateArrowSchemaInit( out.schema() );
757 QGIS_NANOARROW_THROW_NOT_OK( QgisPrivateArrowSchemaSetTypeStruct( out.schema(), fields.count() + hasGeometry ) );
758 for ( int i = 0; i < fields.count(); i++ )
759 {
760 inferField( fields.field( i ), out.schema()->children[i] );
761 }
762
763 if ( hasGeometry )
764 {
765 QString geometryColumnName = options.geometryColumnName();
766 if ( geometryColumnName.isEmpty() )
767 {
768 geometryColumnName = u"geometry"_s;
769 }
770
771 inferGeometry( out.schema()->children[fields.count()], geometryColumnName, crs );
772 out.setGeometryColumnIndex( fields.count() );
773 }
774
775 return out;
776}
@ FlagExportTrianglesAsPolygons
Triangles should be exported as polygon geometries.
Wrapper around an ArrowArrayStream.
QgsArrowArrayStream & operator=(QgsArrowArrayStream &other)=delete
struct ArrowArrayStream * arrayStream()
Access the underlying ArrowArray from C++.
unsigned long long cArrayStreamAddress() const
Returns the address of the underlying ArrowArrayStream for import or export across boundaries.
QgsArrowArrayStream()=default
Construct invalid array stream holder.
void exportToAddress(unsigned long long otherAddress)
Export this array to the address of an empty ArrowArrayStream for export across boundaries.
bool isValid() const
Returns true if this wrapper object holds a valid ArrowArray.
Wrapper around an ArrowArray.
QgsArrowArray()=default
Construct invalid array holder.
struct ArrowArray * array()
Access the underlying ArrowArray from C++.
QgsArrowArray & operator=(QgsArrowArray &other)=delete
bool isValid() const
Returns true if this wrapper object holds a valid ArrowArray.
void exportToAddress(unsigned long long otherAddress)
Export this array to the address of an empty ArrowArray for export across boundaries.
unsigned long long cArrayAddress() const
Returns the address of the underlying ArrowArray for import or export across boundaries.
Options for inferring an ArrowSchema from a feature source.
void setGeometryColumnName(const QString &geometryColumnName)
Set the name that should be used to refer to the geometry column.
QgsArrowInferSchemaOptions()
Construct default options.
QString geometryColumnName() const
The name that should be used for a layer's geometry column.
static QgsArrowSchema inferSchema(const QgsVectorLayer &layer, const QgsArrowInferSchemaOptions &options=QgsArrowInferSchemaOptions())
Infer the QgsArrowSchema for a given QgsVectorLayer.
struct ArrowSchema * schema()
Access the output ArrowSchema from C++.
QgsArrowArrayStream toArrayStream(int batchSize=65536) const
Export this iterator as an ArrowArrayStream.
QgsArrowArray nextFeatures(int n)
Build an ArrowArray using the next n features (or fewer depending on the number of features remaining...
QgsArrowIterator()=default
Construct invalid iterator.
void setSchema(const QgsArrowSchema &schema)
Set the ArrowSchema for the output of all future batches.
Wrapper around an ArrowSchema.
int geometryColumnIndex() const
Returns the index of the column in this schema that should be populated with a feature geometry.
bool isValid() const
Returns true if this wrapper object holds a valid ArrowSchema.
unsigned long long cSchemaAddress() const
Returns the address of the underlying ArrowSchema for import or export across boundaries.
struct ArrowSchema * schema()
Access the underlying ArrowSchema from C++.
QgsArrowSchema & operator=(const QgsArrowSchema &other)
Assignment operator.
void exportToAddress(unsigned long long otherAddress)
Export this array to the address of an empty ArrowSchema for export across boundaries.
QgsArrowSchema()
Construct invalid schema holder.
void setGeometryColumnIndex(int geometryColumnIndex)
Set the index of the column in this schema that should be populated with a feature geometry.
Represents a coordinate reference system (CRS).
std::string toJsonString(bool multiline=false, int indentationWidth=4, const QString &schema=QString()) const
Returns a JSON string representation of this CRS.
Defines a QGIS exception class.
QString what() const
Wrapper for iterator of features from vector data provider or vector layer.
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
Definition qgsfeature.h:60
int fieldNameIndex(const QString &fieldName) const
Utility method to get attribute index from name.
int attributeCount() const
Returns the number of attributes attached to the feature.
QgsGeometry geometry
Definition qgsfeature.h:71
bool hasGeometry() const
Returns true if the feature has an associated geometry.
Q_INVOKABLE QVariant attribute(const QString &name) const
Lookup attribute value by attribute name.
Encapsulate a field in an attribute table or data source.
Definition qgsfield.h:56
QMetaType::Type type
Definition qgsfield.h:63
QString name
Definition qgsfield.h:65
QMetaType::Type subType() const
If the field is a collection, gets its element's type.
Definition qgsfield.cpp:153
Container of fields for a vector layer.
Definition qgsfields.h:46
int count
Definition qgsfields.h:50
QgsField field(int fieldIdx) const
Returns the field at particular index (must be in range 0..N-1).
QByteArray asWkb(QgsAbstractGeometry::WkbFlags flags=QgsAbstractGeometry::WkbFlags()) const
Export the geometry to WKB.
QgsCoordinateReferenceSystem crs
Definition qgsmaplayer.h:90
static bool isNull(const QVariant &variant, bool silenceNullWarnings=false)
Returns true if the specified variant should be considered a NULL value.
virtual QString geometryColumnName() const
Returns the name of the column storing geometry, if applicable.
Represents a vector layer which manages a vector based dataset.
bool isSpatial() const final
Returns true if this is a geometry layer and false in case of NoGeometry (table only) or UnknownGeome...
QgsVectorDataProvider * dataProvider() final
Returns the layer's data provider, it may be nullptr.
#define QGIS_NANOARROW_THROW_NOT_OK_ERR(expr, err)
#define QGIS_NANOARROW_THROW_NOT_OK(expr)