18#include <nlohmann/json.hpp>
20#include "nanoarrow/nanoarrow.hpp"
26using namespace Qt::StringLiterals;
28#define QGIS_NANOARROW_THROW_NOT_OK_ERR( expr, err ) \
31 const int ec = ( expr ); \
32 if ( ec != NANOARROW_OK ) \
34 throw QgsException( u"nanoarrow error (%1): %2"_s.arg( ec ).arg( QString::fromUtf8( ( err )->message ) ) ); \
38#define QGIS_NANOARROW_THROW_NOT_OK( expr ) \
41 const int ec = ( expr ); \
42 if ( ec != NANOARROW_OK ) \
44 throw QgsException( u"nanoarrow error (%1)"_s.arg( ec ) ); \
59 return mGeometryColumnName;
68 mGeometryColumnIndex = other.mGeometryColumnIndex;
73 if ( mSchema.release )
75 ArrowSchemaRelease( &mSchema );
78 mGeometryColumnIndex = other.mGeometryColumnIndex;
84 if ( mSchema.release )
86 ArrowSchemaRelease( &mSchema );
104 static_assert(
sizeof(
unsigned long long ) >=
sizeof( uintptr_t ) );
106 return reinterpret_cast<unsigned long long>( &mSchema );
111 static_assert(
sizeof(
unsigned long long ) >=
sizeof( uintptr_t ) );
113 struct ArrowSchema *otherArrowSchema =
reinterpret_cast<struct ArrowSchema *
>( otherAddress );
119 return mSchema.release;
128 if ( mArray.release )
130 ArrowArrayRelease( &mArray );
133 ArrowArrayMove( other.array(), &mArray );
138 if (
this != &other )
140 ArrowArrayMove( other.array(), &mArray );
148 if ( mArray.release )
150 ArrowArrayRelease( &mArray );
168 static_assert(
sizeof(
unsigned long long ) >=
sizeof( uintptr_t ) );
170 return reinterpret_cast<unsigned long long>( &mArray );
175 static_assert(
sizeof(
unsigned long long ) >=
sizeof( uintptr_t ) );
177 struct ArrowArray *otherArrowArray =
reinterpret_cast<struct ArrowArray *
>( otherAddress );
178 ArrowArrayMove( &mArray, otherArrowArray );
183 return mArray.release;
188 if ( mArrayStream.release )
190 ArrowArrayStreamRelease( &mArrayStream );
193 ArrowArrayStreamMove( other.arrayStream(), &mArrayStream );
198 if (
this != &other )
200 ArrowArrayStreamMove( other.arrayStream(), &mArrayStream );
208 if ( mArrayStream.release )
210 ArrowArrayStreamRelease( &mArrayStream );
216 return &mArrayStream;
223 static_assert(
sizeof(
unsigned long long ) >=
sizeof( uintptr_t ) );
225 return reinterpret_cast<unsigned long long>( &mArrayStream );
230 static_assert(
sizeof(
unsigned long long ) >=
sizeof( uintptr_t ) );
232 struct ArrowArrayStream *otherArrowArrayStream =
reinterpret_cast<struct ArrowArrayStream *
>( otherAddress );
233 ArrowArrayStreamMove( &mArrayStream, otherArrowArrayStream );
238 return mArrayStream.release;
251 std::string geoArrowMetadata;
252 if ( crsString.empty() )
254 geoArrowMetadata =
"{}";
258 geoArrowMetadata = R
"({"crs":)" + crsString + R"(})";
261 nanoarrow::UniqueBuffer metadataKv;
263 QGIS_NANOARROW_THROW_NOT_OK( ArrowMetadataBuilderAppend( metadataKv.get(), ArrowCharView(
"ARROW:extension:name" ), ArrowCharView(
"geoarrow.wkb" ) ) );
264 QGIS_NANOARROW_THROW_NOT_OK( ArrowMetadataBuilderAppend( metadataKv.get(), ArrowCharView(
"ARROW:extension:metadata" ), ArrowCharView( geoArrowMetadata.c_str() ) ) );
268 void appendGeometry(
const QgsFeature &feature,
struct ArrowArray *col )
277 struct ArrowBufferView v;
278 v.data.data = wkb.data();
279 v.size_bytes =
static_cast<int64_t
>( wkb.size() );
283 void inferMetaType(
const QMetaType::Type metaType,
struct ArrowSchema *col,
const QString &fieldName )
287 case QMetaType::Bool:
290 case QMetaType::QChar:
291 case QMetaType::SChar:
294 case QMetaType::UChar:
297 case QMetaType::Short:
300 case QMetaType::UShort:
306 case QMetaType::UInt:
309 case QMetaType::Long:
310 case QMetaType::LongLong:
313 case QMetaType::ULong:
314 case QMetaType::ULongLong:
317 case QMetaType::Float:
320 case QMetaType::Double:
323 case QMetaType::QString:
326 case QMetaType::QByteArray:
329 case QMetaType::QDate:
332 case QMetaType::QTime:
335 case QMetaType::QDateTime:
338 case QMetaType::QStringList:
343 throw QgsException( u
"QgsArrowIterator can't infer field type '%1' for field '%2'"_s.arg( QMetaType::typeName( metaType ) ).arg( fieldName ) );
347 void inferField(
const QgsField &field,
struct ArrowSchema *col )
350 switch ( field.
type() )
352 case QMetaType::QVariantList:
354 inferMetaType( field.
subType(), col->children[0], field.
name() );
357 inferMetaType( field.
type(), col, field.
name() );
362 void appendVariant(
const QVariant &v,
struct ArrowArray *col,
struct ArrowSchemaView &columnTypeView,
struct ArrowSchemaView &columnListTypeView )
370 switch ( columnTypeView.type )
372 case NANOARROW_TYPE_BOOL:
373 if ( v.canConvert( QMetaType::Bool ) )
379 case NANOARROW_TYPE_UINT8:
380 case NANOARROW_TYPE_UINT16:
381 case NANOARROW_TYPE_UINT32:
382 case NANOARROW_TYPE_UINT64:
383 if ( v.canConvert( QMetaType::ULongLong ) )
389 case NANOARROW_TYPE_INT8:
390 case NANOARROW_TYPE_INT16:
391 case NANOARROW_TYPE_INT32:
392 case NANOARROW_TYPE_INT64:
393 if ( v.canConvert( QMetaType::LongLong ) )
399 case NANOARROW_TYPE_HALF_FLOAT:
400 case NANOARROW_TYPE_FLOAT:
401 case NANOARROW_TYPE_DOUBLE:
402 if ( v.canConvert( QMetaType::Double ) )
408 case NANOARROW_TYPE_STRING:
409 case NANOARROW_TYPE_LARGE_STRING:
410 case NANOARROW_TYPE_STRING_VIEW:
412 if ( v.canConvert( QMetaType::QString ) )
414 const QByteArray
string = v.toString().toUtf8();
415 struct ArrowBufferView bytesView;
416 bytesView.data.data =
string.constData();
417 bytesView.size_bytes =
static_cast<int64_t
>(
string.size() );
424 case NANOARROW_TYPE_BINARY:
425 case NANOARROW_TYPE_LARGE_BINARY:
426 case NANOARROW_TYPE_BINARY_VIEW:
427 case NANOARROW_TYPE_FIXED_SIZE_BINARY:
429 if ( v.canConvert( QMetaType::QByteArray ) )
431 const QByteArray bytes = v.toByteArray();
432 struct ArrowBufferView bytesView;
433 bytesView.data.data = bytes.data();
434 bytesView.size_bytes =
static_cast<int64_t
>( bytes.size() );
441 case NANOARROW_TYPE_DATE32:
443 if ( v.canConvert( QMetaType::QDate ) )
445 static QDate epoch = QDate( 1970, 1, 1 );
446 int64_t daysSinceEpoch = epoch.daysTo( v.toDate() );
453 case NANOARROW_TYPE_DATE64:
455 if ( v.canConvert( QMetaType::QDate ) )
457 static QDate epoch = QDate( 1970, 1, 1 );
458 int64_t daysSinceEpoch = epoch.daysTo( v.toDate() );
459 int64_t msSinceEpoch = daysSinceEpoch * 24 * 60 * 60 * 1000;
466 case NANOARROW_TYPE_TIMESTAMP:
468 if ( v.canConvert( QMetaType::QDateTime ) )
470 const QDateTime dateTime = v.toDateTime().toUTC();
471 switch ( columnTypeView.time_unit )
473 case NANOARROW_TIME_UNIT_SECOND:
476 case NANOARROW_TIME_UNIT_MILLI:
479 case NANOARROW_TIME_UNIT_MICRO:
482 case NANOARROW_TIME_UNIT_NANO:
490 case NANOARROW_TYPE_TIME32:
491 case NANOARROW_TYPE_TIME64:
493 if ( v.canConvert( QMetaType::QTime ) )
495 const QTime time = v.toTime();
496 switch ( columnTypeView.time_unit )
498 case NANOARROW_TIME_UNIT_SECOND:
501 case NANOARROW_TIME_UNIT_MILLI:
504 case NANOARROW_TIME_UNIT_MICRO:
507 case NANOARROW_TIME_UNIT_NANO:
516 case NANOARROW_TYPE_LIST:
517 case NANOARROW_TYPE_FIXED_SIZE_LIST:
518 case NANOARROW_TYPE_LARGE_LIST:
519 case NANOARROW_TYPE_LIST_VIEW:
520 case NANOARROW_TYPE_LARGE_LIST_VIEW:
522 if ( v.canConvert( QMetaType::QVariantList ) )
524 const QVariantList variantList = v.toList();
525 struct ArrowSchemaView dummyListType {};
526 for (
const QVariant &item : variantList )
528 appendVariant( item, col->children[0], columnListTypeView, dummyListType );
540 throw QgsException( u
"Can't convert variant of type '%1' to Arrow type '%2'"_s.arg( v.typeName() ).arg( ArrowTypeString( columnTypeView.type ) ) );
543 class ArrowIteratorArrayStreamImpl
546 ArrowIteratorArrayStreamImpl( QgsArrowIterator iterator,
int batchSize )
547 : mIterator( iterator ), mBatchSize( batchSize ) {}
549 int GetSchema(
struct ArrowSchema *schema )
551 NANOARROW_RETURN_NOT_OK( ArrowSchemaDeepCopy( mIterator.schema(), schema ) );
555 int GetNext(
struct ArrowArray *array )
559 QgsArrowArray batch = mIterator.nextFeatures( mBatchSize );
560 ArrowArrayMove( batch.
array(), array );
563 catch ( QgsException &e )
565 mLastError = e.
what().toStdString();
568 catch ( std::exception &e )
570 mLastError = e.what();
575 mLastError =
"unknown error";
580 const char *GetLastError()
const {
return mLastError.c_str(); }
583 QgsArrowIterator mIterator;
584 int mBatchSize { 65536 };
585 std::string mLastError {};
591 : mFeatureIterator( featureIterator )
597 return mSchema.schema();
604 throw QgsException( u
"Invalid or null ArrowSchema provided"_s );
613 nanoarrow::ArrayStreamFactory<ArrowIteratorArrayStreamImpl>::InitArrayStream(
new ArrowIteratorArrayStreamImpl( *
this, batchSize ), out.
arrayStream() );
622 throw QgsException( u
"QgsArrowIterator can't iterate over less than one feature"_s );
625 if ( !mSchema.isValid() )
627 throw QgsException( u
"QgsArrowIterator schema not set"_s );
633 const struct ArrowSchema *
schema = mSchema.schema();
635 struct ArrowError error {};
638 struct ArrowSchemaView schemaView;
640 if ( schemaView.type != NANOARROW_TYPE_STRUCT )
642 throw QgsException( u
"QgsArrowIterator expected requested schema as struct but got '%1'"_s.arg( ArrowTypeString( schemaView.type ) ) );
645 std::vector<QString> columnNames(
schema->n_children );
646 std::vector<struct ArrowSchemaView> colTypeViews(
schema->n_children );
647 std::vector<struct ArrowSchemaView> colListTypeViews(
schema->n_children );
648 for ( int64_t i = 0; i <
schema->n_children; i++ )
651 columnNames[i] = QString(
schema->children[i]->name !=
nullptr ?
schema->children[i]->name : QString() );
655 switch ( colTypeViews[i].type )
657 case NANOARROW_TYPE_LIST:
658 case NANOARROW_TYPE_FIXED_SIZE_LIST:
659 case NANOARROW_TYPE_LARGE_LIST:
660 case NANOARROW_TYPE_LIST_VIEW:
661 case NANOARROW_TYPE_LARGE_LIST_VIEW:
663 struct ArrowSchemaView childView;
665 colListTypeViews[i] = std::move( childView );
669 colListTypeViews[i] = ArrowSchemaView {};
675 nanoarrow::UniqueArray tmp;
681 std::vector<int> featureAttributeIndex;
683 while ( n > 0 && mFeatureIterator.nextFeature( feature ) )
688 if ( featureAttributeIndex.empty() )
690 for ( int64_t i = 0; i <
schema->n_children; i++ )
692 featureAttributeIndex.push_back( feature.
fieldNameIndex( columnNames[i] ) );
698 for ( int64_t i = 0; i <
schema->n_children; i++ )
700 int attributeIndex = featureAttributeIndex[i];
701 struct ArrowArray *columnArray = tmp->children[i];
703 if ( i == mSchema.geometryColumnIndex() )
705 appendGeometry( feature, columnArray );
707 else if ( attributeIndex >= 0 && attributeIndex < feature.
attributeCount() )
709 appendVariant( feature.
attribute( attributeIndex ), columnArray, colTypeViews[i], colListTypeViews[i] );
723 if ( tmp->length > 0 )
725 ArrowArrayMove( tmp.get(), out.
array() );
732 bool layerHasGeometry = layer.
isSpatial();
749 QgisPrivateArrowSchemaInit( out.
schema() );
751 for (
int i = 0; i < fields.
count(); i++ )
753 inferField( fields.
field( i ), out.
schema()->children[i] );
759 if ( geometryColumnName.isEmpty() )
761 geometryColumnName = u
"geometry"_s;
764 inferGeometry( out.
schema()->children[fields.
count()], geometryColumnName, crs );
@ FlagExportTrianglesAsPolygons
Triangles should be exported as polygon geometries.
Wrapper around an ArrowArrayStream.
QgsArrowArrayStream & operator=(QgsArrowArrayStream &other)=delete
struct ArrowArrayStream * arrayStream()
Access the underlying ArrowArray from C++.
unsigned long long cArrayStreamAddress() const
Returns the address of the underlying ArrowArrayStream for import or export across boundaries.
QgsArrowArrayStream()=default
Construct invalid array stream holder.
void exportToAddress(unsigned long long otherAddress)
Export this array to the address of an empty ArrowArrayStream for export across boundaries.
bool isValid() const
Returns true if this wrapper object holds a valid ArrowArray.
Wrapper around an ArrowArray.
QgsArrowArray()=default
Construct invalid array holder.
struct ArrowArray * array()
Access the underlying ArrowArray from C++.
QgsArrowArray & operator=(QgsArrowArray &other)=delete
bool isValid() const
Returns true if this wrapper object holds a valid ArrowArray.
void exportToAddress(unsigned long long otherAddress)
Export this array to the address of an empty ArrowArray for export across boundaries.
unsigned long long cArrayAddress() const
Returns the address of the underlying ArrowArray for import or export across boundaries.
Options for inferring an ArrowSchema from a feature source.
void setGeometryColumnName(const QString &geometryColumnName)
Set the name that should be used to refer to the geometry column.
QgsArrowInferSchemaOptions()
Construct default options.
QString geometryColumnName() const
The name that should be used for a layer's geometry column.
static QgsArrowSchema inferSchema(const QgsVectorLayer &layer, const QgsArrowInferSchemaOptions &options=QgsArrowInferSchemaOptions())
Infer the QgsArrowSchema for a given QgsVectorLayer.
struct ArrowSchema * schema()
Access the output ArrowSchema from C++.
QgsArrowArrayStream toArrayStream(int batchSize=65536) const
Export this iterator as an ArrowArrayStream.
QgsArrowArray nextFeatures(int n)
Build an ArrowArray using the next n features (or fewer depending on the number of features remaining...
QgsArrowIterator()=default
Construct invalid iterator.
void setSchema(const QgsArrowSchema &schema)
Set the ArrowSchema for the output of all future batches.
Wrapper around an ArrowSchema.
int geometryColumnIndex() const
Returns the index of the column in this schema that should be populated with a feature geometry.
bool isValid() const
Returns true if this wrapper object holds a valid ArrowSchema.
unsigned long long cSchemaAddress() const
Returns the address of the underlying ArrowSchema for import or export across boundaries.
struct ArrowSchema * schema()
Access the underlying ArrowSchema from C++.
QgsArrowSchema & operator=(const QgsArrowSchema &other)
Assignment operator.
void exportToAddress(unsigned long long otherAddress)
Export this array to the address of an empty ArrowSchema for export across boundaries.
QgsArrowSchema()
Construct invalid schema holder.
void setGeometryColumnIndex(int geometryColumnIndex)
Set the index of the column in this schema that should be populated with a feature geometry.
Represents a coordinate reference system (CRS).
std::string toJsonString(bool multiline=false, int indentationWidth=4, const QString &schema=QString()) const
Returns a JSON string representation of this CRS.
Defines a QGIS exception class.
Wrapper for iterator of features from vector data provider or vector layer.
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
int fieldNameIndex(const QString &fieldName) const
Utility method to get attribute index from name.
int attributeCount() const
Returns the number of attributes attached to the feature.
bool hasGeometry() const
Returns true if the feature has an associated geometry.
Q_INVOKABLE QVariant attribute(const QString &name) const
Lookup attribute value by attribute name.
Encapsulate a field in an attribute table or data source.
QMetaType::Type subType() const
If the field is a collection, gets its element's type.
Container of fields for a vector layer.
QgsField field(int fieldIdx) const
Returns the field at particular index (must be in range 0..N-1).
QByteArray asWkb(QgsAbstractGeometry::WkbFlags flags=QgsAbstractGeometry::WkbFlags()) const
Export the geometry to WKB.
QgsCoordinateReferenceSystem crs
static bool isNull(const QVariant &variant, bool silenceNullWarnings=false)
Returns true if the specified variant should be considered a NULL value.
virtual QString geometryColumnName() const
Returns the name of the column storing geometry, if applicable.
Represents a vector layer which manages a vector based dataset.
bool isSpatial() const final
Returns true if this is a geometry layer and false in case of NoGeometry (table only) or UnknownGeome...
QgsVectorDataProvider * dataProvider() final
Returns the layer's data provider, it may be nullptr.
#define QGIS_NANOARROW_THROW_NOT_OK_ERR(expr, err)
#define QGIS_NANOARROW_THROW_NOT_OK(expr)