18#include <nlohmann/json.hpp>
20#include "nanoarrow/nanoarrow.hpp"
26using namespace Qt::StringLiterals;
28#define QGIS_NANOARROW_THROW_NOT_OK_ERR( expr, err ) \
31 const int ec = ( expr ); \
32 if ( ec != NANOARROW_OK ) \
34 throw QgsException( u"nanoarrow error (%1): %2"_s.arg( ec ).arg( QString::fromUtf8( ( err )->message ) ) ); \
38#define QGIS_NANOARROW_THROW_NOT_OK( expr ) \
41 const int ec = ( expr ); \
42 if ( ec != NANOARROW_OK ) \
44 throw QgsException( u"nanoarrow error (%1)"_s.arg( ec ) ); \
59 return mGeometryColumnName;
68 mGeometryColumnIndex = other.mGeometryColumnIndex;
73 if ( mSchema.release )
75 ArrowSchemaRelease( &mSchema );
78 mGeometryColumnIndex = other.mGeometryColumnIndex;
84 if ( mSchema.release )
86 ArrowSchemaRelease( &mSchema );
104 static_assert(
sizeof(
unsigned long long ) >=
sizeof( uintptr_t ) );
106 return reinterpret_cast<unsigned long long>( &mSchema );
111 static_assert(
sizeof(
unsigned long long ) >=
sizeof( uintptr_t ) );
113 struct ArrowSchema *otherArrowSchema =
reinterpret_cast<struct ArrowSchema *
>( otherAddress );
119 return mSchema.release;
124 return mGeometryColumnIndex;
134 if ( mArray.release )
136 ArrowArrayRelease( &mArray );
139 ArrowArrayMove( other.array(), &mArray );
144 if (
this != &other )
146 ArrowArrayMove( other.array(), &mArray );
154 if ( mArray.release )
156 ArrowArrayRelease( &mArray );
174 static_assert(
sizeof(
unsigned long long ) >=
sizeof( uintptr_t ) );
176 return reinterpret_cast<unsigned long long>( &mArray );
181 static_assert(
sizeof(
unsigned long long ) >=
sizeof( uintptr_t ) );
183 struct ArrowArray *otherArrowArray =
reinterpret_cast<struct ArrowArray *
>( otherAddress );
184 ArrowArrayMove( &mArray, otherArrowArray );
189 return mArray.release;
194 if ( mArrayStream.release )
196 ArrowArrayStreamRelease( &mArrayStream );
199 ArrowArrayStreamMove( other.arrayStream(), &mArrayStream );
204 if (
this != &other )
206 ArrowArrayStreamMove( other.arrayStream(), &mArrayStream );
214 if ( mArrayStream.release )
216 ArrowArrayStreamRelease( &mArrayStream );
222 return &mArrayStream;
229 static_assert(
sizeof(
unsigned long long ) >=
sizeof( uintptr_t ) );
231 return reinterpret_cast<unsigned long long>( &mArrayStream );
236 static_assert(
sizeof(
unsigned long long ) >=
sizeof( uintptr_t ) );
238 struct ArrowArrayStream *otherArrowArrayStream =
reinterpret_cast<struct ArrowArrayStream *
>( otherAddress );
239 ArrowArrayStreamMove( &mArrayStream, otherArrowArrayStream );
244 return mArrayStream.release;
257 std::string geoArrowMetadata;
258 if ( crsString.empty() )
260 geoArrowMetadata =
"{}";
264 geoArrowMetadata = R
"({"crs":)" + crsString + R"(})";
267 nanoarrow::UniqueBuffer metadataKv;
269 QGIS_NANOARROW_THROW_NOT_OK( ArrowMetadataBuilderAppend( metadataKv.get(), ArrowCharView(
"ARROW:extension:name" ), ArrowCharView(
"geoarrow.wkb" ) ) );
270 QGIS_NANOARROW_THROW_NOT_OK( ArrowMetadataBuilderAppend( metadataKv.get(), ArrowCharView(
"ARROW:extension:metadata" ), ArrowCharView( geoArrowMetadata.c_str() ) ) );
274 void appendGeometry(
const QgsFeature &feature,
struct ArrowArray *col )
283 struct ArrowBufferView v;
284 v.data.data = wkb.data();
285 v.size_bytes =
static_cast<int64_t
>( wkb.size() );
289 void inferMetaType(
const QMetaType::Type metaType,
struct ArrowSchema *col,
const QString &fieldName )
293 case QMetaType::Bool:
296 case QMetaType::QChar:
297 case QMetaType::SChar:
300 case QMetaType::UChar:
303 case QMetaType::Short:
306 case QMetaType::UShort:
312 case QMetaType::UInt:
315 case QMetaType::Long:
316 case QMetaType::LongLong:
319 case QMetaType::ULong:
320 case QMetaType::ULongLong:
323 case QMetaType::Float:
326 case QMetaType::Double:
329 case QMetaType::QString:
332 case QMetaType::QByteArray:
335 case QMetaType::QDate:
338 case QMetaType::QTime:
341 case QMetaType::QDateTime:
344 case QMetaType::QStringList:
349 throw QgsException( u
"QgsArrowIterator can't infer field type '%1' for field '%2'"_s.arg( QMetaType::typeName( metaType ) ).arg( fieldName ) );
353 void inferField(
const QgsField &field,
struct ArrowSchema *col )
356 switch ( field.
type() )
358 case QMetaType::QVariantList:
360 inferMetaType( field.
subType(), col->children[0], field.
name() );
363 inferMetaType( field.
type(), col, field.
name() );
368 void appendVariant(
const QVariant &v,
struct ArrowArray *col,
struct ArrowSchemaView &columnTypeView,
struct ArrowSchemaView &columnListTypeView )
376 switch ( columnTypeView.type )
378 case NANOARROW_TYPE_BOOL:
379 if ( v.canConvert( QMetaType::Bool ) )
385 case NANOARROW_TYPE_UINT8:
386 case NANOARROW_TYPE_UINT16:
387 case NANOARROW_TYPE_UINT32:
388 case NANOARROW_TYPE_UINT64:
389 if ( v.canConvert( QMetaType::ULongLong ) )
395 case NANOARROW_TYPE_INT8:
396 case NANOARROW_TYPE_INT16:
397 case NANOARROW_TYPE_INT32:
398 case NANOARROW_TYPE_INT64:
399 if ( v.canConvert( QMetaType::LongLong ) )
405 case NANOARROW_TYPE_HALF_FLOAT:
406 case NANOARROW_TYPE_FLOAT:
407 case NANOARROW_TYPE_DOUBLE:
408 if ( v.canConvert( QMetaType::Double ) )
414 case NANOARROW_TYPE_STRING:
415 case NANOARROW_TYPE_LARGE_STRING:
416 case NANOARROW_TYPE_STRING_VIEW:
418 if ( v.canConvert( QMetaType::QString ) )
420 const QByteArray
string = v.toString().toUtf8();
421 struct ArrowBufferView bytesView;
422 bytesView.data.data =
string.constData();
423 bytesView.size_bytes =
static_cast<int64_t
>(
string.size() );
430 case NANOARROW_TYPE_BINARY:
431 case NANOARROW_TYPE_LARGE_BINARY:
432 case NANOARROW_TYPE_BINARY_VIEW:
433 case NANOARROW_TYPE_FIXED_SIZE_BINARY:
435 if ( v.canConvert( QMetaType::QByteArray ) )
437 const QByteArray bytes = v.toByteArray();
438 struct ArrowBufferView bytesView;
439 bytesView.data.data = bytes.data();
440 bytesView.size_bytes =
static_cast<int64_t
>( bytes.size() );
447 case NANOARROW_TYPE_DATE32:
449 if ( v.canConvert( QMetaType::QDate ) )
451 static QDate epoch = QDate( 1970, 1, 1 );
452 int64_t daysSinceEpoch = epoch.daysTo( v.toDate() );
459 case NANOARROW_TYPE_DATE64:
461 if ( v.canConvert( QMetaType::QDate ) )
463 static QDate epoch = QDate( 1970, 1, 1 );
464 int64_t daysSinceEpoch = epoch.daysTo( v.toDate() );
465 int64_t msSinceEpoch = daysSinceEpoch * 24 * 60 * 60 * 1000;
472 case NANOARROW_TYPE_TIMESTAMP:
474 if ( v.canConvert( QMetaType::QDateTime ) )
476 const QDateTime dateTime = v.toDateTime().toUTC();
477 switch ( columnTypeView.time_unit )
479 case NANOARROW_TIME_UNIT_SECOND:
482 case NANOARROW_TIME_UNIT_MILLI:
485 case NANOARROW_TIME_UNIT_MICRO:
488 case NANOARROW_TIME_UNIT_NANO:
496 case NANOARROW_TYPE_TIME32:
497 case NANOARROW_TYPE_TIME64:
499 if ( v.canConvert( QMetaType::QTime ) )
501 const QTime time = v.toTime();
502 switch ( columnTypeView.time_unit )
504 case NANOARROW_TIME_UNIT_SECOND:
507 case NANOARROW_TIME_UNIT_MILLI:
510 case NANOARROW_TIME_UNIT_MICRO:
513 case NANOARROW_TIME_UNIT_NANO:
522 case NANOARROW_TYPE_LIST:
523 case NANOARROW_TYPE_FIXED_SIZE_LIST:
524 case NANOARROW_TYPE_LARGE_LIST:
525 case NANOARROW_TYPE_LIST_VIEW:
526 case NANOARROW_TYPE_LARGE_LIST_VIEW:
528 if ( v.canConvert( QMetaType::QVariantList ) )
530 const QVariantList variantList = v.toList();
531 struct ArrowSchemaView dummyListType {};
532 for (
const QVariant &item : variantList )
534 appendVariant( item, col->children[0], columnListTypeView, dummyListType );
546 throw QgsException( u
"Can't convert variant of type '%1' to Arrow type '%2'"_s.arg( v.typeName() ).arg( ArrowTypeString( columnTypeView.type ) ) );
549 class ArrowIteratorArrayStreamImpl
552 ArrowIteratorArrayStreamImpl( QgsArrowIterator iterator,
int batchSize )
553 : mIterator( iterator )
554 , mBatchSize( batchSize )
557 int GetSchema(
struct ArrowSchema *schema )
559 NANOARROW_RETURN_NOT_OK( ArrowSchemaDeepCopy( mIterator.schema(), schema ) );
563 int GetNext(
struct ArrowArray *array )
567 QgsArrowArray batch = mIterator.nextFeatures( mBatchSize );
568 ArrowArrayMove( batch.
array(), array );
571 catch ( QgsException &e )
573 mLastError = e.
what().toStdString();
576 catch ( std::exception &e )
578 mLastError = e.what();
583 mLastError =
"unknown error";
588 const char *GetLastError()
const {
return mLastError.c_str(); }
591 QgsArrowIterator mIterator;
592 int mBatchSize { 65536 };
593 std::string mLastError {};
599 : mFeatureIterator( featureIterator )
604 return mSchema.schema();
611 throw QgsException( u
"Invalid or null ArrowSchema provided"_s );
620 nanoarrow::ArrayStreamFactory<ArrowIteratorArrayStreamImpl>::InitArrayStream(
new ArrowIteratorArrayStreamImpl( *
this, batchSize ), out.
arrayStream() );
629 throw QgsException( u
"QgsArrowIterator can't iterate over less than one feature"_s );
632 if ( !mSchema.isValid() )
634 throw QgsException( u
"QgsArrowIterator schema not set"_s );
640 const struct ArrowSchema *
schema = mSchema.schema();
642 struct ArrowError error {};
645 struct ArrowSchemaView schemaView;
647 if ( schemaView.type != NANOARROW_TYPE_STRUCT )
649 throw QgsException( u
"QgsArrowIterator expected requested schema as struct but got '%1'"_s.arg( ArrowTypeString( schemaView.type ) ) );
652 std::vector<QString> columnNames(
schema->n_children );
653 std::vector<struct ArrowSchemaView> colTypeViews(
schema->n_children );
654 std::vector<struct ArrowSchemaView> colListTypeViews(
schema->n_children );
655 for ( int64_t i = 0; i <
schema->n_children; i++ )
658 columnNames[i] = QString(
schema->children[i]->name !=
nullptr ?
schema->children[i]->name : QString() );
662 switch ( colTypeViews[i].type )
664 case NANOARROW_TYPE_LIST:
665 case NANOARROW_TYPE_FIXED_SIZE_LIST:
666 case NANOARROW_TYPE_LARGE_LIST:
667 case NANOARROW_TYPE_LIST_VIEW:
668 case NANOARROW_TYPE_LARGE_LIST_VIEW:
670 struct ArrowSchemaView childView;
672 colListTypeViews[i] = std::move( childView );
676 colListTypeViews[i] = ArrowSchemaView {};
682 nanoarrow::UniqueArray tmp;
688 std::vector<int> featureAttributeIndex;
690 while ( n > 0 && mFeatureIterator.nextFeature( feature ) )
695 if ( featureAttributeIndex.empty() )
697 for ( int64_t i = 0; i <
schema->n_children; i++ )
699 featureAttributeIndex.push_back( feature.
fieldNameIndex( columnNames[i] ) );
705 for ( int64_t i = 0; i <
schema->n_children; i++ )
707 int attributeIndex = featureAttributeIndex[i];
708 struct ArrowArray *columnArray = tmp->children[i];
710 if ( i == mSchema.geometryColumnIndex() )
712 appendGeometry( feature, columnArray );
714 else if ( attributeIndex >= 0 && attributeIndex < feature.
attributeCount() )
716 appendVariant( feature.
attribute( attributeIndex ), columnArray, colTypeViews[i], colListTypeViews[i] );
730 if ( tmp->length > 0 )
732 ArrowArrayMove( tmp.get(), out.
array() );
739 bool layerHasGeometry = layer.
isSpatial();
756 QgisPrivateArrowSchemaInit( out.
schema() );
758 for (
int i = 0; i < fields.
count(); i++ )
760 inferField( fields.
field( i ), out.
schema()->children[i] );
766 if ( geometryColumnName.isEmpty() )
768 geometryColumnName = u
"geometry"_s;
771 inferGeometry( out.
schema()->children[fields.
count()], geometryColumnName, crs );
@ FlagExportTrianglesAsPolygons
Triangles should be exported as polygon geometries.
Wrapper around an ArrowArrayStream.
QgsArrowArrayStream & operator=(QgsArrowArrayStream &other)=delete
struct ArrowArrayStream * arrayStream()
Access the underlying ArrowArray from C++.
unsigned long long cArrayStreamAddress() const
Returns the address of the underlying ArrowArrayStream for import or export across boundaries.
QgsArrowArrayStream()=default
Construct invalid array stream holder.
void exportToAddress(unsigned long long otherAddress)
Export this array to the address of an empty ArrowArrayStream for export across boundaries.
bool isValid() const
Returns true if this wrapper object holds a valid ArrowArray.
Wrapper around an ArrowArray.
QgsArrowArray()=default
Construct invalid array holder.
struct ArrowArray * array()
Access the underlying ArrowArray from C++.
QgsArrowArray & operator=(QgsArrowArray &other)=delete
bool isValid() const
Returns true if this wrapper object holds a valid ArrowArray.
void exportToAddress(unsigned long long otherAddress)
Export this array to the address of an empty ArrowArray for export across boundaries.
unsigned long long cArrayAddress() const
Returns the address of the underlying ArrowArray for import or export across boundaries.
Options for inferring an ArrowSchema from a feature source.
void setGeometryColumnName(const QString &geometryColumnName)
Set the name that should be used to refer to the geometry column.
QgsArrowInferSchemaOptions()
Construct default options.
QString geometryColumnName() const
The name that should be used for a layer's geometry column.
static QgsArrowSchema inferSchema(const QgsVectorLayer &layer, const QgsArrowInferSchemaOptions &options=QgsArrowInferSchemaOptions())
Infer the QgsArrowSchema for a given QgsVectorLayer.
struct ArrowSchema * schema()
Access the output ArrowSchema from C++.
QgsArrowArrayStream toArrayStream(int batchSize=65536) const
Export this iterator as an ArrowArrayStream.
QgsArrowArray nextFeatures(int n)
Build an ArrowArray using the next n features (or fewer depending on the number of features remaining...
QgsArrowIterator()=default
Construct invalid iterator.
void setSchema(const QgsArrowSchema &schema)
Set the ArrowSchema for the output of all future batches.
Wrapper around an ArrowSchema.
int geometryColumnIndex() const
Returns the index of the column in this schema that should be populated with a feature geometry.
bool isValid() const
Returns true if this wrapper object holds a valid ArrowSchema.
unsigned long long cSchemaAddress() const
Returns the address of the underlying ArrowSchema for import or export across boundaries.
struct ArrowSchema * schema()
Access the underlying ArrowSchema from C++.
QgsArrowSchema & operator=(const QgsArrowSchema &other)
Assignment operator.
void exportToAddress(unsigned long long otherAddress)
Export this array to the address of an empty ArrowSchema for export across boundaries.
QgsArrowSchema()
Construct invalid schema holder.
void setGeometryColumnIndex(int geometryColumnIndex)
Set the index of the column in this schema that should be populated with a feature geometry.
Represents a coordinate reference system (CRS).
std::string toJsonString(bool multiline=false, int indentationWidth=4, const QString &schema=QString()) const
Returns a JSON string representation of this CRS.
Defines a QGIS exception class.
Wrapper for iterator of features from vector data provider or vector layer.
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
int fieldNameIndex(const QString &fieldName) const
Utility method to get attribute index from name.
int attributeCount() const
Returns the number of attributes attached to the feature.
bool hasGeometry() const
Returns true if the feature has an associated geometry.
Q_INVOKABLE QVariant attribute(const QString &name) const
Lookup attribute value by attribute name.
Encapsulate a field in an attribute table or data source.
QMetaType::Type subType() const
If the field is a collection, gets its element's type.
Container of fields for a vector layer.
QgsField field(int fieldIdx) const
Returns the field at particular index (must be in range 0..N-1).
QByteArray asWkb(QgsAbstractGeometry::WkbFlags flags=QgsAbstractGeometry::WkbFlags()) const
Export the geometry to WKB.
QgsCoordinateReferenceSystem crs
static bool isNull(const QVariant &variant, bool silenceNullWarnings=false)
Returns true if the specified variant should be considered a NULL value.
virtual QString geometryColumnName() const
Returns the name of the column storing geometry, if applicable.
Represents a vector layer which manages a vector based dataset.
bool isSpatial() const final
Returns true if this is a geometry layer and false in case of NoGeometry (table only) or UnknownGeome...
QgsVectorDataProvider * dataProvider() final
Returns the layer's data provider, it may be nullptr.
#define QGIS_NANOARROW_THROW_NOT_OK_ERR(expr, err)
#define QGIS_NANOARROW_THROW_NOT_OK(expr)