QGIS API Documentation  3.2.0-Bonn (bc43194)
qgsgmlschema.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  qgsgmlschema.cpp
3  --------------------------------------
4  Date : February 2013
5  Copyright : (C) 2013 by Radim Blazek
6  Email : [email protected]
7  ***************************************************************************
8  * *
9  * This program is free software; you can redistribute it and/or modify *
10  * it under the terms of the GNU General Public License as published by *
11  * the Free Software Foundation; either version 2 of the License, or *
12  * (at your option) any later version. *
13  * *
14  ***************************************************************************/
15 #include "qgsgmlschema.h"
16 #include "qgsrectangle.h"
18 #include "qgserror.h"
19 #include "qgsgeometry.h"
20 #include "qgslogger.h"
22 #include <QBuffer>
23 #include <QList>
24 #include <QNetworkRequest>
25 #include <QNetworkReply>
26 #include <QProgressDialog>
27 #include <QSet>
28 #include <QSettings>
29 #include <QUrl>
30 
31 #include <limits>
32 
33 const char NS_SEPARATOR = '?';
34 const QString GML_NAMESPACE = QStringLiteral( "http://www.opengis.net/gml" );
35 
36 
37 QgsGmlFeatureClass::QgsGmlFeatureClass( const QString &name, const QString &path )
38  : mName( name )
39  , mPath( path )
40 {
41 }
42 
43 int QgsGmlFeatureClass::fieldIndex( const QString &name )
44 {
45  for ( int i = 0; i < mFields.size(); i++ )
46  {
47  if ( mFields[i].name() == name ) return i;
48  }
49  return -1;
50 }
51 
52 // --------------------------- QgsGmlSchema -------------------------------
54  : mSkipLevel( std::numeric_limits<int>::max() )
55 {
56  mGeometryTypes << QStringLiteral( "Point" ) << QStringLiteral( "MultiPoint" )
57  << QStringLiteral( "LineString" ) << QStringLiteral( "MultiLineString" )
58  << QStringLiteral( "Polygon" ) << QStringLiteral( "MultiPolygon" );
59 }
60 
61 QString QgsGmlSchema::readAttribute( const QString &attributeName, const XML_Char **attr ) const
62 {
63  int i = 0;
64  while ( attr[i] )
65  {
66  if ( attributeName.compare( attr[i] ) == 0 )
67  {
68  return QString( attr[i + 1] );
69  }
70  i += 2;
71  }
72  return QString();
73 }
74 
75 bool QgsGmlSchema::parseXSD( const QByteArray &xml )
76 {
77  QDomDocument dom;
78  QString errorMsg;
79  int errorLine;
80  int errorColumn;
81  if ( !dom.setContent( xml, false, &errorMsg, &errorLine, &errorColumn ) )
82  {
83  // TODO: error
84  return false;
85  }
86 
87  QDomElement docElem = dom.documentElement();
88 
89  QList<QDomElement> elementElements = domElements( docElem, QStringLiteral( "element" ) );
90 
91  //QgsDebugMsg( QString( "%1 elemets read" ).arg( elementElements.size() ) );
92 
93  Q_FOREACH ( const QDomElement &elementElement, elementElements )
94  {
95  QString name = elementElement.attribute( QStringLiteral( "name" ) );
96  QString type = elementElement.attribute( QStringLiteral( "type" ) );
97 
98  QString gmlBaseType = xsdComplexTypeGmlBaseType( docElem, stripNS( type ) );
99  //QgsDebugMsg( QString( "gmlBaseType = %1" ).arg( gmlBaseType ) );
100  //QgsDebugMsg( QString( "name = %1 gmlBaseType = %2" ).arg( name ).arg( gmlBaseType ) );
101  // We should only use gml:AbstractFeatureType descendants which have
102  // ancestor listed in gml:FeatureAssociationType (featureMember) descendant
103  // But we could only loose some data if XSD was not correct, I think.
104 
105  if ( gmlBaseType == QLatin1String( "AbstractFeatureType" ) )
106  {
107  // Get feature type definition
108  QgsGmlFeatureClass featureClass( name, QLatin1String( "" ) );
109  xsdFeatureClass( docElem, stripNS( type ), featureClass );
110  mFeatureClassMap.insert( name, featureClass );
111  }
112  // A feature may have more geometries, we take just the first one
113  }
114 
115  return true;
116 }
117 
118 bool QgsGmlSchema::xsdFeatureClass( const QDomElement &element, const QString &typeName, QgsGmlFeatureClass &featureClass )
119 {
120  //QgsDebugMsg("typeName = " + typeName );
121  QDomElement complexTypeElement = domElement( element, QStringLiteral( "complexType" ), QStringLiteral( "name" ), typeName );
122  if ( complexTypeElement.isNull() ) return false;
123 
124  // extension or restriction
125  QDomElement extrest = domElement( complexTypeElement, QStringLiteral( "complexContent.extension" ) );
126  if ( extrest.isNull() )
127  {
128  extrest = domElement( complexTypeElement, QStringLiteral( "complexContent.restriction" ) );
129  }
130  if ( extrest.isNull() ) return false;
131 
132  QString extrestName = extrest.attribute( QStringLiteral( "base" ) );
133  if ( extrestName == QLatin1String( "gml:AbstractFeatureType" ) )
134  {
135  // In theory we should add gml:AbstractFeatureType default attributes gml:description
136  // and gml:name but it does not seem to be a common practice and we would probably
137  // confuse most users
138  }
139  else
140  {
141  // Get attributes from extrest
142  if ( !xsdFeatureClass( element, stripNS( extrestName ), featureClass ) ) return false;
143  }
144 
145  // Supported geometry types
146  QStringList geometryPropertyTypes;
147  Q_FOREACH ( const QString &geom, mGeometryTypes )
148  {
149  geometryPropertyTypes << geom + "PropertyType";
150  }
151 
152  QStringList geometryAliases;
153  geometryAliases << QStringLiteral( "location" ) << QStringLiteral( "centerOf" ) << QStringLiteral( "position" ) << QStringLiteral( "extentOf" )
154  << QStringLiteral( "coverage" ) << QStringLiteral( "edgeOf" ) << QStringLiteral( "centerLineOf" ) << QStringLiteral( "multiLocation" )
155  << QStringLiteral( "multiCenterOf" ) << QStringLiteral( "multiPosition" ) << QStringLiteral( "multiCenterLineOf" )
156  << QStringLiteral( "multiEdgeOf" ) << QStringLiteral( "multiCoverage" ) << QStringLiteral( "multiExtentOf" );
157 
158  // Add attributes from current comple type
159  QList<QDomElement> sequenceElements = domElements( extrest, QStringLiteral( "sequence.element" ) );
160  Q_FOREACH ( const QDomElement &sequenceElement, sequenceElements )
161  {
162  QString fieldName = sequenceElement.attribute( QStringLiteral( "name" ) );
163  QString fieldTypeName = stripNS( sequenceElement.attribute( QStringLiteral( "type" ) ) );
164  QString ref = sequenceElement.attribute( QStringLiteral( "ref" ) );
165  //QgsDebugMsg ( QString("fieldName = %1 fieldTypeName = %2 ref = %3").arg(fieldName).arg(fieldTypeName).arg(ref) );
166 
167  if ( !ref.isEmpty() )
168  {
169  if ( ref.startsWith( QLatin1String( "gml:" ) ) )
170  {
171  if ( geometryAliases.contains( stripNS( ref ) ) )
172  {
173  featureClass.geometryAttributes().append( stripNS( ref ) );
174  }
175  else
176  {
177  QgsDebugMsg( QString( "Unknown referenced GML element: %1" ).arg( ref ) );
178  }
179  }
180  else
181  {
182  // TODO: get type from referenced element
183  QgsDebugMsg( QString( "field %1.%2 is referencing %3 - not supported" ).arg( typeName, fieldName ) );
184  }
185  continue;
186  }
187 
188  if ( fieldName.isEmpty() )
189  {
190  QgsDebugMsg( QString( "field in %1 without name" ).arg( typeName ) );
191  continue;
192  }
193 
194  // type is either type attribute
195  if ( fieldTypeName.isEmpty() )
196  {
197  // or type is inheriting from xs:simpleType
198  QDomElement sequenceElementRestriction = domElement( sequenceElement, QStringLiteral( "simpleType.restriction" ) );
199  fieldTypeName = stripNS( sequenceElementRestriction.attribute( QStringLiteral( "base" ) ) );
200  }
201 
202  QVariant::Type fieldType = QVariant::String;
203  if ( fieldTypeName.isEmpty() )
204  {
205  QgsDebugMsg( QString( "Cannot get %1.%2 field type" ).arg( typeName, fieldName ) );
206  }
207  else
208  {
209  if ( geometryPropertyTypes.contains( fieldTypeName ) )
210  {
211  // Geometry attribute
212  featureClass.geometryAttributes().append( fieldName );
213  continue;
214  }
215 
216  if ( fieldTypeName == QLatin1String( "decimal" ) )
217  {
218  fieldType = QVariant::Double;
219  }
220  else if ( fieldTypeName == QLatin1String( "integer" ) )
221  {
222  fieldType = QVariant::Int;
223  }
224  }
225 
226  QgsField field( fieldName, fieldType, fieldTypeName );
227  featureClass.fields().append( field );
228  }
229 
230  return true;
231 }
232 
233 QString QgsGmlSchema::xsdComplexTypeGmlBaseType( const QDomElement &element, const QString &name )
234 {
235  //QgsDebugMsg("name = " + name );
236  QDomElement complexTypeElement = domElement( element, QStringLiteral( "complexType" ), QStringLiteral( "name" ), name );
237  if ( complexTypeElement.isNull() ) return QLatin1String( "" );
238 
239  QDomElement extrest = domElement( complexTypeElement, QStringLiteral( "complexContent.extension" ) );
240  if ( extrest.isNull() )
241  {
242  extrest = domElement( complexTypeElement, QStringLiteral( "complexContent.restriction" ) );
243  }
244  if ( extrest.isNull() ) return QLatin1String( "" );
245 
246  QString extrestName = extrest.attribute( QStringLiteral( "base" ) );
247  if ( extrestName.startsWith( QLatin1String( "gml:" ) ) )
248  {
249  // GML base type found
250  return stripNS( extrestName );
251  }
252  // Continue recursively until GML base type is reached
253  return xsdComplexTypeGmlBaseType( element, stripNS( extrestName ) );
254 }
255 
256 QString QgsGmlSchema::stripNS( const QString &name )
257 {
258  return name.contains( ':' ) ? name.section( ':', 1 ) : name;
259 }
260 
261 QList<QDomElement> QgsGmlSchema::domElements( const QDomElement &element, const QString &path )
262 {
263  QList<QDomElement> list;
264 
265  QStringList names = path.split( '.' );
266  if ( names.isEmpty() ) return list;
267  QString name = names.value( 0 );
268  names.removeFirst();
269 
270  QDomNode n1 = element.firstChild();
271  while ( !n1.isNull() )
272  {
273  QDomElement el = n1.toElement();
274  if ( !el.isNull() )
275  {
276  QString tagName = stripNS( el.tagName() );
277  if ( tagName == name )
278  {
279  if ( names.isEmpty() )
280  {
281  list.append( el );
282  }
283  else
284  {
285  list.append( domElements( el, names.join( QStringLiteral( "." ) ) ) );
286  }
287  }
288  }
289  n1 = n1.nextSibling();
290  }
291 
292  return list;
293 }
294 
295 QDomElement QgsGmlSchema::domElement( const QDomElement &element, const QString &path )
296 {
297  return domElements( element, path ).value( 0 );
298 }
299 
300 QList<QDomElement> QgsGmlSchema::domElements( QList<QDomElement> &elements, const QString &attr, const QString &attrVal )
301 {
302  QList<QDomElement> list;
303  Q_FOREACH ( const QDomElement &el, elements )
304  {
305  if ( el.attribute( attr ) == attrVal )
306  {
307  list << el;
308  }
309  }
310  return list;
311 }
312 
313 QDomElement QgsGmlSchema::domElement( const QDomElement &element, const QString &path, const QString &attr, const QString &attrVal )
314 {
315  QList<QDomElement> list = domElements( element, path );
316  return domElements( list, attr, attrVal ).value( 0 );
317 }
318 
319 bool QgsGmlSchema::guessSchema( const QByteArray &data )
320 {
321  mLevel = 0;
322  mSkipLevel = std::numeric_limits<int>::max();
323  XML_Parser p = XML_ParserCreateNS( nullptr, NS_SEPARATOR );
324  XML_SetUserData( p, this );
325  XML_SetElementHandler( p, QgsGmlSchema::start, QgsGmlSchema::end );
326  XML_SetCharacterDataHandler( p, QgsGmlSchema::chars );
327  int atEnd = 1;
328  int res = XML_Parse( p, data.constData(), data.size(), atEnd );
329 
330  if ( res == 0 )
331  {
332  QString err = QString( XML_ErrorString( XML_GetErrorCode( p ) ) );
333  QgsDebugMsg( QString( "XML_Parse returned %1 error %2" ).arg( res ).arg( err ) );
334  mError = QgsError( err, QStringLiteral( "GML schema" ) );
335  mError.append( tr( "Cannot guess schema" ) );
336  }
337 
338  return res != 0;
339 }
340 
341 void QgsGmlSchema::startElement( const XML_Char *el, const XML_Char **attr )
342 {
343  Q_UNUSED( attr );
344  mLevel++;
345 
346  QString elementName = QString::fromUtf8( el );
347  QgsDebugMsgLevel( QString( "-> %1 %2 %3" ).arg( mLevel ).arg( elementName, mLevel >= mSkipLevel ? "skip" : "" ), 5 );
348 
349  if ( mLevel >= mSkipLevel )
350  {
351  //QgsDebugMsg( QString("skip level %1").arg( mLevel ) );
352  return;
353  }
354 
355  mParsePathStack.append( elementName );
356  QString path = mParsePathStack.join( QStringLiteral( "." ) );
357 
358  QStringList splitName = elementName.split( NS_SEPARATOR );
359  QString localName = splitName.last();
360  QString ns = splitName.size() > 1 ? splitName.first() : QLatin1String( "" );
361  //QgsDebugMsg( "ns = " + ns + " localName = " + localName );
362 
363  ParseMode parseMode = modeStackTop();
364  //QgsDebugMsg ( QString("localName = %1 parseMode = %2").arg(localName).arg(parseMode) );
365 
366  if ( ns == GML_NAMESPACE && localName == QLatin1String( "boundedBy" ) )
367  {
368  // gml:boundedBy in feature or feature collection -> skip
369  mSkipLevel = mLevel + 1;
370  }
371  else if ( localName.compare( QLatin1String( "featureMembers" ), Qt::CaseInsensitive ) == 0 )
372  {
373  mParseModeStack.push( QgsGmlSchema::FeatureMembers );
374  }
375  // GML does not specify that gml:FeatureAssociationType elements should end
376  // with 'Member' apart standard gml:featureMember, but it is quite usual to
377  // that the names ends with 'Member', e.g.: osgb:topographicMember, cityMember,...
378  // so this is really fail if the name does not contain 'Member'
379 
380  else if ( localName.endsWith( QLatin1String( "member" ), Qt::CaseInsensitive ) )
381  {
382  mParseModeStack.push( QgsGmlSchema::FeatureMember );
383  }
384  // UMN Mapserver simple GetFeatureInfo response layer element (ends with _layer)
385  else if ( elementName.endsWith( QLatin1String( "_layer" ) ) )
386  {
387  // do nothing, we catch _feature children
388  }
389  // UMN Mapserver simple GetFeatureInfo response feature element (ends with _feature)
390  // or featureMember children.
391  // QGIS mapserver 2.2 GetFeatureInfo is using <Feature id="###"> for feature member,
392  // without any feature class distinction.
393  else if ( elementName.endsWith( QLatin1String( "_feature" ) )
394  || parseMode == QgsGmlSchema::FeatureMember
395  || parseMode == QgsGmlSchema::FeatureMembers
396  || localName.compare( QLatin1String( "feature" ), Qt::CaseInsensitive ) == 0 )
397  {
398  QgsDebugMsg( "is feature path = " + path );
399  if ( mFeatureClassMap.count( localName ) == 0 )
400  {
401  mFeatureClassMap.insert( localName, QgsGmlFeatureClass( localName, path ) );
402  }
403  mCurrentFeatureName = localName;
404  mParseModeStack.push( QgsGmlSchema::Feature );
405  }
406  else if ( parseMode == QgsGmlSchema::Attribute && ns == GML_NAMESPACE && mGeometryTypes.indexOf( localName ) >= 0 )
407  {
408  // Geometry (Point,MultiPoint,...) in geometry attribute
409  QStringList &geometryAttributes = mFeatureClassMap[mCurrentFeatureName].geometryAttributes();
410  if ( geometryAttributes.count( mAttributeName ) == 0 )
411  {
412  geometryAttributes.append( mAttributeName );
413  }
414  mSkipLevel = mLevel + 1; // no need to parse children
415  }
416  else if ( parseMode == QgsGmlSchema::Feature )
417  {
418  // An element in feature should be ordinary or geometry attribute
419  //QgsDebugMsg( "is attribute");
420 
421  // Usually localName is attribute name, e.g.
422  // <gml:desc>My description</gml:desc>
423  // but QGIS server (2.2) is using:
424  // <Attribute value="My description" name="desc"/>
425  QString name = readAttribute( QStringLiteral( "name" ), attr );
426  //QgsDebugMsg ( "attribute name = " + name );
427  if ( localName.compare( QLatin1String( "attribute" ), Qt::CaseInsensitive ) == 0
428  && !name.isEmpty() )
429  {
430  QString value = readAttribute( QStringLiteral( "value" ), attr );
431  //QgsDebugMsg ( "attribute value = " + value );
432  addAttribute( name, value );
433  }
434  else
435  {
436  mAttributeName = localName;
437  mParseModeStack.push( QgsGmlSchema::Attribute );
438  mStringCash.clear();
439  }
440  }
441 }
442 
443 void QgsGmlSchema::endElement( const XML_Char *el )
444 {
445  QString elementName = QString::fromUtf8( el );
446  QgsDebugMsgLevel( QString( "<- %1 %2" ).arg( mLevel ).arg( elementName ), 5 );
447 
448  if ( mLevel >= mSkipLevel )
449  {
450  //QgsDebugMsg( QString("skip level %1").arg( mLevel ) );
451  mLevel--;
452  return;
453  }
454  else
455  {
456  // clear possible skip level
457  mSkipLevel = std::numeric_limits<int>::max();
458  }
459 
460  QStringList splitName = elementName.split( NS_SEPARATOR );
461  QString localName = splitName.last();
462  QString ns = splitName.size() > 1 ? splitName.first() : QLatin1String( "" );
463 
464  QgsGmlSchema::ParseMode parseMode = modeStackTop();
465 
466  if ( parseMode == QgsGmlSchema::FeatureMembers )
467  {
468  modeStackPop();
469  }
470  else if ( parseMode == QgsGmlSchema::Attribute && localName == mAttributeName )
471  {
472  // End of attribute
473  //QgsDebugMsg("end attribute");
474  modeStackPop(); // go up to feature
475 
476  if ( mFeatureClassMap[mCurrentFeatureName].geometryAttributes().count( mAttributeName ) == 0 )
477  {
478  addAttribute( mAttributeName, mStringCash );
479  }
480  }
481  else if ( ns == GML_NAMESPACE && localName == QLatin1String( "boundedBy" ) )
482  {
483  // was skipped
484  }
485  else if ( localName.endsWith( QLatin1String( "member" ), Qt::CaseInsensitive ) )
486  {
487  modeStackPop();
488  }
489  mParsePathStack.removeLast();
490  mLevel--;
491 }
492 
493 void QgsGmlSchema::characters( const XML_Char *chars, int len )
494 {
495  //QgsDebugMsg( QString("level %1 : %2").arg( mLevel ).arg( QString::fromUtf8( chars, len ) ) );
496  if ( mLevel >= mSkipLevel )
497  {
498  //QgsDebugMsg( QString("skip level %1").arg( mLevel ) );
499  return;
500  }
501 
502  //save chars in mStringCash attribute mode for value type analysis
503  if ( modeStackTop() == QgsGmlSchema::Attribute )
504  {
505  mStringCash.append( QString::fromUtf8( chars, len ) );
506  }
507 }
508 
509 void QgsGmlSchema::addAttribute( const QString &name, const QString &value )
510 {
511  // It is not geometry attribute -> analyze value
512  bool ok;
513  value.toInt( &ok );
514  QVariant::Type type = QVariant::String;
515  if ( ok )
516  {
517  type = QVariant::Int;
518  }
519  else
520  {
521  value.toDouble( &ok );
522  if ( ok )
523  {
524  type = QVariant::Double;
525  }
526  }
527  //QgsDebugMsg( "mStringCash = " + mStringCash + " type = " + QVariant::typeToName( type ) );
528  //QMap<QString, QgsField> & fields = mFeatureClassMap[mCurrentFeatureName].fields();
529  QList<QgsField> &fields = mFeatureClassMap[mCurrentFeatureName].fields();
530  int fieldIndex = mFeatureClassMap[mCurrentFeatureName].fieldIndex( name );
531  if ( fieldIndex == -1 )
532  {
533  QgsField field( name, type );
534  fields.append( field );
535  }
536  else
537  {
538  QgsField &field = fields[fieldIndex];
539  // check if type is sufficient
540  if ( ( field.type() == QVariant::Int && ( type == QVariant::String || type == QVariant::Double ) ) ||
541  ( field.type() == QVariant::Double && type == QVariant::String ) )
542  {
543  field.setType( type );
544  }
545  }
546 }
547 
548 QStringList QgsGmlSchema::typeNames() const
549 {
550  return mFeatureClassMap.keys();
551 }
552 
553 QList<QgsField> QgsGmlSchema::fields( const QString &typeName )
554 {
555  if ( mFeatureClassMap.count( typeName ) == 0 ) return QList<QgsField>();
556  return mFeatureClassMap[typeName].fields();
557 }
558 
559 QStringList QgsGmlSchema::geometryAttributes( const QString &typeName )
560 {
561  if ( mFeatureClassMap.count( typeName ) == 0 ) return QStringList();
562  return mFeatureClassMap[typeName].geometryAttributes();
563 }
bool guessSchema(const QByteArray &data)
Guess GML schema from data if XSD does not exist.
#define QgsDebugMsg(str)
Definition: qgslogger.h:38
const QString GML_NAMESPACE
QList< QgsField > & fields()
Definition: qgsgmlschema.h:50
#define QgsDebugMsgLevel(str, level)
Definition: qgslogger.h:39
QList< QgsField > fields(const QString &typeName)
Gets fields for type/class name parsed from GML or XSD.
void append(const QString &message, const QString &tag)
Append new error message.
Definition: qgserror.cpp:39
Encapsulate a field in an attribute table or data source.
Definition: qgsfield.h:48
Description of feature class in GML.
Definition: qgsgmlschema.h:40
void setType(QVariant::Type type)
Set variant type.
Definition: qgsfield.cpp:140
QgsError is container for error messages (report).
Definition: qgserror.h:80
const char NS_SEPARATOR
QStringList & geometryAttributes()
Definition: qgsgmlschema.h:56
QgsGmlFeatureClass()=default
Constructor for QgsGmlFeatureClass.
QVariant::Type type
Definition: qgsfield.h:55
QStringList typeNames() const
Gets list of dot separated paths to feature classes parsed from GML or XSD.
int fieldIndex(const QString &name)
bool parseXSD(const QByteArray &xml)
Gets fields info from XSD.
QStringList geometryAttributes(const QString &typeName)
Gets list of geometry attributes for type/class name.