QGIS API Documentation  3.8.0-Zanzibar (11aff65)
qgsgmlschema.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  qgsgmlschema.cpp
3  --------------------------------------
4  Date : February 2013
5  Copyright : (C) 2013 by Radim Blazek
6  Email : radim.blazek@gmail.com
7  ***************************************************************************
8  * *
9  * This program is free software; you can redistribute it and/or modify *
10  * it under the terms of the GNU General Public License as published by *
11  * the Free Software Foundation; either version 2 of the License, or *
12  * (at your option) any later version. *
13  * *
14  ***************************************************************************/
15 #include "qgsgmlschema.h"
16 #include "qgsrectangle.h"
18 #include "qgserror.h"
19 #include "qgsgeometry.h"
20 #include "qgslogger.h"
22 #include <QBuffer>
23 #include <QList>
24 #include <QNetworkRequest>
25 #include <QNetworkReply>
26 #include <QProgressDialog>
27 #include <QSet>
28 #include <QSettings>
29 #include <QUrl>
30 
31 #include <limits>
32 
33 const char NS_SEPARATOR = '?';
34 const QString GML_NAMESPACE = QStringLiteral( "http://www.opengis.net/gml" );
35 
36 
37 QgsGmlFeatureClass::QgsGmlFeatureClass( const QString &name, const QString &path )
38  : mName( name )
39  , mPath( path )
40 {
41 }
42 
43 int QgsGmlFeatureClass::fieldIndex( const QString &name )
44 {
45  for ( int i = 0; i < mFields.size(); i++ )
46  {
47  if ( mFields[i].name() == name ) return i;
48  }
49  return -1;
50 }
51 
52 // --------------------------- QgsGmlSchema -------------------------------
54  : mSkipLevel( std::numeric_limits<int>::max() )
55 {
56  mGeometryTypes << QStringLiteral( "Point" ) << QStringLiteral( "MultiPoint" )
57  << QStringLiteral( "LineString" ) << QStringLiteral( "MultiLineString" )
58  << QStringLiteral( "Polygon" ) << QStringLiteral( "MultiPolygon" );
59 }
60 
61 QString QgsGmlSchema::readAttribute( const QString &attributeName, const XML_Char **attr ) const
62 {
63  int i = 0;
64  while ( attr[i] )
65  {
66  if ( attributeName.compare( attr[i] ) == 0 )
67  {
68  return QString( attr[i + 1] );
69  }
70  i += 2;
71  }
72  return QString();
73 }
74 
75 bool QgsGmlSchema::parseXSD( const QByteArray &xml )
76 {
77  QDomDocument dom;
78  QString errorMsg;
79  int errorLine;
80  int errorColumn;
81  if ( !dom.setContent( xml, false, &errorMsg, &errorLine, &errorColumn ) )
82  {
83  // TODO: error
84  return false;
85  }
86 
87  QDomElement docElem = dom.documentElement();
88 
89  QList<QDomElement> elementElements = domElements( docElem, QStringLiteral( "element" ) );
90 
91  //QgsDebugMsg( QStringLiteral( "%1 elemets read" ).arg( elementElements.size() ) );
92 
93  const auto constElementElements = elementElements;
94  for ( const QDomElement &elementElement : constElementElements )
95  {
96  QString name = elementElement.attribute( QStringLiteral( "name" ) );
97  QString type = elementElement.attribute( QStringLiteral( "type" ) );
98 
99  QString gmlBaseType = xsdComplexTypeGmlBaseType( docElem, stripNS( type ) );
100  //QgsDebugMsg( QStringLiteral( "gmlBaseType = %1" ).arg( gmlBaseType ) );
101  //QgsDebugMsg( QStringLiteral( "name = %1 gmlBaseType = %2" ).arg( name ).arg( gmlBaseType ) );
102  // We should only use gml:AbstractFeatureType descendants which have
103  // ancestor listed in gml:FeatureAssociationType (featureMember) descendant
104  // But we could only loose some data if XSD was not correct, I think.
105 
106  if ( gmlBaseType == QLatin1String( "AbstractFeatureType" ) )
107  {
108  // Get feature type definition
109  QgsGmlFeatureClass featureClass( name, QString() );
110  xsdFeatureClass( docElem, stripNS( type ), featureClass );
111  mFeatureClassMap.insert( name, featureClass );
112  }
113  // A feature may have more geometries, we take just the first one
114  }
115 
116  return true;
117 }
118 
119 bool QgsGmlSchema::xsdFeatureClass( const QDomElement &element, const QString &typeName, QgsGmlFeatureClass &featureClass )
120 {
121  //QgsDebugMsg("typeName = " + typeName );
122  QDomElement complexTypeElement = domElement( element, QStringLiteral( "complexType" ), QStringLiteral( "name" ), typeName );
123  if ( complexTypeElement.isNull() ) return false;
124 
125  // extension or restriction
126  QDomElement extrest = domElement( complexTypeElement, QStringLiteral( "complexContent.extension" ) );
127  if ( extrest.isNull() )
128  {
129  extrest = domElement( complexTypeElement, QStringLiteral( "complexContent.restriction" ) );
130  }
131  if ( extrest.isNull() ) return false;
132 
133  QString extrestName = extrest.attribute( QStringLiteral( "base" ) );
134  if ( extrestName == QLatin1String( "gml:AbstractFeatureType" ) )
135  {
136  // In theory we should add gml:AbstractFeatureType default attributes gml:description
137  // and gml:name but it does not seem to be a common practice and we would probably
138  // confuse most users
139  }
140  else
141  {
142  // Get attributes from extrest
143  if ( !xsdFeatureClass( element, stripNS( extrestName ), featureClass ) ) return false;
144  }
145 
146  // Supported geometry types
147  QStringList geometryPropertyTypes;
148  const auto constMGeometryTypes = mGeometryTypes;
149  for ( const QString &geom : constMGeometryTypes )
150  {
151  geometryPropertyTypes << geom + "PropertyType";
152  }
153 
154  QStringList geometryAliases;
155  geometryAliases << QStringLiteral( "location" ) << QStringLiteral( "centerOf" ) << QStringLiteral( "position" ) << QStringLiteral( "extentOf" )
156  << QStringLiteral( "coverage" ) << QStringLiteral( "edgeOf" ) << QStringLiteral( "centerLineOf" ) << QStringLiteral( "multiLocation" )
157  << QStringLiteral( "multiCenterOf" ) << QStringLiteral( "multiPosition" ) << QStringLiteral( "multiCenterLineOf" )
158  << QStringLiteral( "multiEdgeOf" ) << QStringLiteral( "multiCoverage" ) << QStringLiteral( "multiExtentOf" );
159 
160  // Add attributes from current comple type
161  QList<QDomElement> sequenceElements = domElements( extrest, QStringLiteral( "sequence.element" ) );
162  const auto constSequenceElements = sequenceElements;
163  for ( const QDomElement &sequenceElement : constSequenceElements )
164  {
165  QString fieldName = sequenceElement.attribute( QStringLiteral( "name" ) );
166  QString fieldTypeName = stripNS( sequenceElement.attribute( QStringLiteral( "type" ) ) );
167  QString ref = sequenceElement.attribute( QStringLiteral( "ref" ) );
168  //QgsDebugMsg ( QString("fieldName = %1 fieldTypeName = %2 ref = %3").arg(fieldName).arg(fieldTypeName).arg(ref) );
169 
170  if ( !ref.isEmpty() )
171  {
172  if ( ref.startsWith( QLatin1String( "gml:" ) ) )
173  {
174  if ( geometryAliases.contains( stripNS( ref ) ) )
175  {
176  featureClass.geometryAttributes().append( stripNS( ref ) );
177  }
178  else
179  {
180  QgsDebugMsg( QStringLiteral( "Unknown referenced GML element: %1" ).arg( ref ) );
181  }
182  }
183  else
184  {
185  // TODO: get type from referenced element
186  QgsDebugMsg( QStringLiteral( "field %1.%2 is referencing %3 - not supported" ).arg( typeName, fieldName ) );
187  }
188  continue;
189  }
190 
191  if ( fieldName.isEmpty() )
192  {
193  QgsDebugMsg( QStringLiteral( "field in %1 without name" ).arg( typeName ) );
194  continue;
195  }
196 
197  // type is either type attribute
198  if ( fieldTypeName.isEmpty() )
199  {
200  // or type is inheriting from xs:simpleType
201  QDomElement sequenceElementRestriction = domElement( sequenceElement, QStringLiteral( "simpleType.restriction" ) );
202  fieldTypeName = stripNS( sequenceElementRestriction.attribute( QStringLiteral( "base" ) ) );
203  }
204 
205  QVariant::Type fieldType = QVariant::String;
206  if ( fieldTypeName.isEmpty() )
207  {
208  QgsDebugMsg( QStringLiteral( "Cannot get %1.%2 field type" ).arg( typeName, fieldName ) );
209  }
210  else
211  {
212  if ( geometryPropertyTypes.contains( fieldTypeName ) )
213  {
214  // Geometry attribute
215  featureClass.geometryAttributes().append( fieldName );
216  continue;
217  }
218 
219  if ( fieldTypeName == QLatin1String( "decimal" ) )
220  {
221  fieldType = QVariant::Double;
222  }
223  else if ( fieldTypeName == QLatin1String( "integer" ) )
224  {
225  fieldType = QVariant::Int;
226  }
227  }
228 
229  QgsField field( fieldName, fieldType, fieldTypeName );
230  featureClass.fields().append( field );
231  }
232 
233  return true;
234 }
235 
236 QString QgsGmlSchema::xsdComplexTypeGmlBaseType( const QDomElement &element, const QString &name )
237 {
238  //QgsDebugMsg("name = " + name );
239  QDomElement complexTypeElement = domElement( element, QStringLiteral( "complexType" ), QStringLiteral( "name" ), name );
240  if ( complexTypeElement.isNull() ) return QString();
241 
242  QDomElement extrest = domElement( complexTypeElement, QStringLiteral( "complexContent.extension" ) );
243  if ( extrest.isNull() )
244  {
245  extrest = domElement( complexTypeElement, QStringLiteral( "complexContent.restriction" ) );
246  }
247  if ( extrest.isNull() ) return QString();
248 
249  QString extrestName = extrest.attribute( QStringLiteral( "base" ) );
250  if ( extrestName.startsWith( QLatin1String( "gml:" ) ) )
251  {
252  // GML base type found
253  return stripNS( extrestName );
254  }
255  // Continue recursively until GML base type is reached
256  return xsdComplexTypeGmlBaseType( element, stripNS( extrestName ) );
257 }
258 
259 QString QgsGmlSchema::stripNS( const QString &name )
260 {
261  return name.contains( ':' ) ? name.section( ':', 1 ) : name;
262 }
263 
264 QList<QDomElement> QgsGmlSchema::domElements( const QDomElement &element, const QString &path )
265 {
266  QList<QDomElement> list;
267 
268  QStringList names = path.split( '.' );
269  if ( names.isEmpty() ) return list;
270  QString name = names.value( 0 );
271  names.removeFirst();
272 
273  QDomNode n1 = element.firstChild();
274  while ( !n1.isNull() )
275  {
276  QDomElement el = n1.toElement();
277  if ( !el.isNull() )
278  {
279  QString tagName = stripNS( el.tagName() );
280  if ( tagName == name )
281  {
282  if ( names.isEmpty() )
283  {
284  list.append( el );
285  }
286  else
287  {
288  list.append( domElements( el, names.join( QStringLiteral( "." ) ) ) );
289  }
290  }
291  }
292  n1 = n1.nextSibling();
293  }
294 
295  return list;
296 }
297 
298 QDomElement QgsGmlSchema::domElement( const QDomElement &element, const QString &path )
299 {
300  return domElements( element, path ).value( 0 );
301 }
302 
303 QList<QDomElement> QgsGmlSchema::domElements( QList<QDomElement> &elements, const QString &attr, const QString &attrVal )
304 {
305  QList<QDomElement> list;
306  const auto constElements = elements;
307  for ( const QDomElement &el : constElements )
308  {
309  if ( el.attribute( attr ) == attrVal )
310  {
311  list << el;
312  }
313  }
314  return list;
315 }
316 
317 QDomElement QgsGmlSchema::domElement( const QDomElement &element, const QString &path, const QString &attr, const QString &attrVal )
318 {
319  QList<QDomElement> list = domElements( element, path );
320  return domElements( list, attr, attrVal ).value( 0 );
321 }
322 
323 bool QgsGmlSchema::guessSchema( const QByteArray &data )
324 {
325  mLevel = 0;
326  mSkipLevel = std::numeric_limits<int>::max();
327  XML_Parser p = XML_ParserCreateNS( nullptr, NS_SEPARATOR );
328  XML_SetUserData( p, this );
329  XML_SetElementHandler( p, QgsGmlSchema::start, QgsGmlSchema::end );
330  XML_SetCharacterDataHandler( p, QgsGmlSchema::chars );
331  int atEnd = 1;
332  int res = XML_Parse( p, data.constData(), data.size(), atEnd );
333 
334  if ( res == 0 )
335  {
336  QString err = QString( XML_ErrorString( XML_GetErrorCode( p ) ) );
337  QgsDebugMsg( QStringLiteral( "XML_Parse returned %1 error %2" ).arg( res ).arg( err ) );
338  mError = QgsError( err, QStringLiteral( "GML schema" ) );
339  mError.append( tr( "Cannot guess schema" ) );
340  }
341 
342  return res != 0;
343 }
344 
345 void QgsGmlSchema::startElement( const XML_Char *el, const XML_Char **attr )
346 {
347  Q_UNUSED( attr )
348  mLevel++;
349 
350  QString elementName = QString::fromUtf8( el );
351  QgsDebugMsgLevel( QStringLiteral( "-> %1 %2 %3" ).arg( mLevel ).arg( elementName, mLevel >= mSkipLevel ? "skip" : "" ), 5 );
352 
353  if ( mLevel >= mSkipLevel )
354  {
355  //QgsDebugMsg( QStringLiteral("skip level %1").arg( mLevel ) );
356  return;
357  }
358 
359  mParsePathStack.append( elementName );
360  QString path = mParsePathStack.join( QStringLiteral( "." ) );
361 
362  QStringList splitName = elementName.split( NS_SEPARATOR );
363  QString localName = splitName.last();
364  QString ns = splitName.size() > 1 ? splitName.first() : QString();
365  //QgsDebugMsg( "ns = " + ns + " localName = " + localName );
366 
367  ParseMode parseMode = modeStackTop();
368  //QgsDebugMsg ( QString("localName = %1 parseMode = %2").arg(localName).arg(parseMode) );
369 
370  if ( ns == GML_NAMESPACE && localName == QLatin1String( "boundedBy" ) )
371  {
372  // gml:boundedBy in feature or feature collection -> skip
373  mSkipLevel = mLevel + 1;
374  }
375  else if ( localName.compare( QLatin1String( "featureMembers" ), Qt::CaseInsensitive ) == 0 )
376  {
377  mParseModeStack.push( QgsGmlSchema::FeatureMembers );
378  }
379  // GML does not specify that gml:FeatureAssociationType elements should end
380  // with 'Member' apart standard gml:featureMember, but it is quite usual to
381  // that the names ends with 'Member', e.g.: osgb:topographicMember, cityMember,...
382  // so this is really fail if the name does not contain 'Member'
383 
384  else if ( localName.endsWith( QLatin1String( "member" ), Qt::CaseInsensitive ) )
385  {
386  mParseModeStack.push( QgsGmlSchema::FeatureMember );
387  }
388  // UMN Mapserver simple GetFeatureInfo response layer element (ends with _layer)
389  else if ( elementName.endsWith( QLatin1String( "_layer" ) ) )
390  {
391  // do nothing, we catch _feature children
392  }
393  // UMN Mapserver simple GetFeatureInfo response feature element (ends with _feature)
394  // or featureMember children.
395  // QGIS mapserver 2.2 GetFeatureInfo is using <Feature id="###"> for feature member,
396  // without any feature class distinction.
397  else if ( elementName.endsWith( QLatin1String( "_feature" ) )
398  || parseMode == QgsGmlSchema::FeatureMember
399  || parseMode == QgsGmlSchema::FeatureMembers
400  || localName.compare( QLatin1String( "feature" ), Qt::CaseInsensitive ) == 0 )
401  {
402  QgsDebugMsg( "is feature path = " + path );
403  if ( mFeatureClassMap.count( localName ) == 0 )
404  {
405  mFeatureClassMap.insert( localName, QgsGmlFeatureClass( localName, path ) );
406  }
407  mCurrentFeatureName = localName;
408  mParseModeStack.push( QgsGmlSchema::Feature );
409  }
410  else if ( parseMode == QgsGmlSchema::Attribute && ns == GML_NAMESPACE && mGeometryTypes.indexOf( localName ) >= 0 )
411  {
412  // Geometry (Point,MultiPoint,...) in geometry attribute
413  QStringList &geometryAttributes = mFeatureClassMap[mCurrentFeatureName].geometryAttributes();
414  if ( geometryAttributes.count( mAttributeName ) == 0 )
415  {
416  geometryAttributes.append( mAttributeName );
417  }
418  mSkipLevel = mLevel + 1; // no need to parse children
419  }
420  else if ( parseMode == QgsGmlSchema::Feature )
421  {
422  // An element in feature should be ordinary or geometry attribute
423  //QgsDebugMsg( "is attribute");
424 
425  // Usually localName is attribute name, e.g.
426  // <gml:desc>My description</gml:desc>
427  // but QGIS server (2.2) is using:
428  // <Attribute value="My description" name="desc"/>
429  QString name = readAttribute( QStringLiteral( "name" ), attr );
430  //QgsDebugMsg ( "attribute name = " + name );
431  if ( localName.compare( QLatin1String( "attribute" ), Qt::CaseInsensitive ) == 0
432  && !name.isEmpty() )
433  {
434  QString value = readAttribute( QStringLiteral( "value" ), attr );
435  //QgsDebugMsg ( "attribute value = " + value );
436  addAttribute( name, value );
437  }
438  else
439  {
440  mAttributeName = localName;
441  mParseModeStack.push( QgsGmlSchema::Attribute );
442  mStringCash.clear();
443  }
444  }
445 }
446 
447 void QgsGmlSchema::endElement( const XML_Char *el )
448 {
449  QString elementName = QString::fromUtf8( el );
450  QgsDebugMsgLevel( QStringLiteral( "<- %1 %2" ).arg( mLevel ).arg( elementName ), 5 );
451 
452  if ( mLevel >= mSkipLevel )
453  {
454  //QgsDebugMsg( QStringLiteral("skip level %1").arg( mLevel ) );
455  mLevel--;
456  return;
457  }
458  else
459  {
460  // clear possible skip level
461  mSkipLevel = std::numeric_limits<int>::max();
462  }
463 
464  QStringList splitName = elementName.split( NS_SEPARATOR );
465  QString localName = splitName.last();
466  QString ns = splitName.size() > 1 ? splitName.first() : QString();
467 
468  QgsGmlSchema::ParseMode parseMode = modeStackTop();
469 
470  if ( parseMode == QgsGmlSchema::FeatureMembers )
471  {
472  modeStackPop();
473  }
474  else if ( parseMode == QgsGmlSchema::Attribute && localName == mAttributeName )
475  {
476  // End of attribute
477  //QgsDebugMsg("end attribute");
478  modeStackPop(); // go up to feature
479 
480  if ( mFeatureClassMap[mCurrentFeatureName].geometryAttributes().count( mAttributeName ) == 0 )
481  {
482  addAttribute( mAttributeName, mStringCash );
483  }
484  }
485  else if ( ns == GML_NAMESPACE && localName == QLatin1String( "boundedBy" ) )
486  {
487  // was skipped
488  }
489  else if ( localName.endsWith( QLatin1String( "member" ), Qt::CaseInsensitive ) )
490  {
491  modeStackPop();
492  }
493  mParsePathStack.removeLast();
494  mLevel--;
495 }
496 
497 void QgsGmlSchema::characters( const XML_Char *chars, int len )
498 {
499  //QgsDebugMsg( QStringLiteral("level %1 : %2").arg( mLevel ).arg( QString::fromUtf8( chars, len ) ) );
500  if ( mLevel >= mSkipLevel )
501  {
502  //QgsDebugMsg( QStringLiteral("skip level %1").arg( mLevel ) );
503  return;
504  }
505 
506  //save chars in mStringCash attribute mode for value type analysis
507  if ( modeStackTop() == QgsGmlSchema::Attribute )
508  {
509  mStringCash.append( QString::fromUtf8( chars, len ) );
510  }
511 }
512 
513 void QgsGmlSchema::addAttribute( const QString &name, const QString &value )
514 {
515  // It is not geometry attribute -> analyze value
516  bool ok;
517  value.toInt( &ok );
518  QVariant::Type type = QVariant::String;
519  if ( ok )
520  {
521  type = QVariant::Int;
522  }
523  else
524  {
525  value.toDouble( &ok );
526  if ( ok )
527  {
528  type = QVariant::Double;
529  }
530  }
531  //QgsDebugMsg( "mStringCash = " + mStringCash + " type = " + QVariant::typeToName( type ) );
532  //QMap<QString, QgsField> & fields = mFeatureClassMap[mCurrentFeatureName].fields();
533  QList<QgsField> &fields = mFeatureClassMap[mCurrentFeatureName].fields();
534  int fieldIndex = mFeatureClassMap[mCurrentFeatureName].fieldIndex( name );
535  if ( fieldIndex == -1 )
536  {
537  QgsField field( name, type );
538  fields.append( field );
539  }
540  else
541  {
542  QgsField &field = fields[fieldIndex];
543  // check if type is sufficient
544  if ( ( field.type() == QVariant::Int && ( type == QVariant::String || type == QVariant::Double ) ) ||
545  ( field.type() == QVariant::Double && type == QVariant::String ) )
546  {
547  field.setType( type );
548  }
549  }
550 }
551 
552 QStringList QgsGmlSchema::typeNames() const
553 {
554  return mFeatureClassMap.keys();
555 }
556 
557 QList<QgsField> QgsGmlSchema::fields( const QString &typeName )
558 {
559  if ( mFeatureClassMap.count( typeName ) == 0 ) return QList<QgsField>();
560  return mFeatureClassMap[typeName].fields();
561 }
562 
563 QStringList QgsGmlSchema::geometryAttributes( const QString &typeName )
564 {
565  if ( mFeatureClassMap.count( typeName ) == 0 ) return QStringList();
566  return mFeatureClassMap[typeName].geometryAttributes();
567 }
bool guessSchema(const QByteArray &data)
Guess GML schema from data if XSD does not exist.
#define QgsDebugMsg(str)
Definition: qgslogger.h:38
const QString GML_NAMESPACE
QList< QgsField > & fields()
Definition: qgsgmlschema.h:50
#define QgsDebugMsgLevel(str, level)
Definition: qgslogger.h:39
const QString & typeName
QList< QgsField > fields(const QString &typeName)
Gets fields for type/class name parsed from GML or XSD.
void append(const QString &message, const QString &tag)
Append new error message.
Definition: qgserror.cpp:39
Encapsulate a field in an attribute table or data source.
Definition: qgsfield.h:48
Description of feature class in GML.
Definition: qgsgmlschema.h:40
void setType(QVariant::Type type)
Set variant type.
Definition: qgsfield.cpp:146
QgsError is container for error messages (report).
Definition: qgserror.h:80
const char NS_SEPARATOR
QStringList & geometryAttributes()
Definition: qgsgmlschema.h:56
QgsGmlFeatureClass()=default
Constructor for QgsGmlFeatureClass.
QVariant::Type type
Definition: qgsfield.h:56
QStringList typeNames() const
Gets list of dot separated paths to feature classes parsed from GML or XSD.
int fieldIndex(const QString &name)
bool parseXSD(const QByteArray &xml)
Gets fields info from XSD.
QStringList geometryAttributes(const QString &typeName)
Gets list of geometry attributes for type/class name.