QGIS API Documentation  3.16.0-Hannover (43b64b13f3)
qgsstringutils.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  qgsstringutils.cpp
3  ------------------
4  begin : June 2015
5  copyright : (C) 2015 by Nyall Dawson
6  email : nyall dot dawson at gmail dot com
7  ***************************************************************************
8  * *
9  * This program is free software; you can redistribute it and/or modify *
10  * it under the terms of the GNU General Public License as published by *
11  * the Free Software Foundation; either version 2 of the License, or *
12  * (at your option) any later version. *
13  * *
14  ***************************************************************************/
15 
16 #include "qgsstringutils.h"
17 #include "qgslogger.h"
18 #include <QVector>
19 #include <QRegExp>
20 #include <QStringList>
21 #include <QTextBoundaryFinder>
22 #include <QRegularExpression>
23 #include <cstdlib> // for std::abs
24 
25 QString QgsStringUtils::capitalize( const QString &string, QgsStringUtils::Capitalization capitalization )
26 {
27  if ( string.isEmpty() )
28  return QString();
29 
30  switch ( capitalization )
31  {
32  case MixedCase:
33  return string;
34 
35  case AllUppercase:
36  return string.toUpper();
37 
38  case AllLowercase:
39  return string.toLower();
40 
42  {
43  QString temp = string;
44 
45  QTextBoundaryFinder wordSplitter( QTextBoundaryFinder::Word, string.constData(), string.length(), nullptr, 0 );
46  QTextBoundaryFinder letterSplitter( QTextBoundaryFinder::Grapheme, string.constData(), string.length(), nullptr, 0 );
47 
48  wordSplitter.setPosition( 0 );
49  bool first = true;
50  while ( ( first && wordSplitter.boundaryReasons() & QTextBoundaryFinder::StartOfItem )
51  || wordSplitter.toNextBoundary() >= 0 )
52  {
53  first = false;
54  letterSplitter.setPosition( wordSplitter.position() );
55  letterSplitter.toNextBoundary();
56  QString substr = string.mid( wordSplitter.position(), letterSplitter.position() - wordSplitter.position() );
57  temp.replace( wordSplitter.position(), substr.length(), substr.toUpper() );
58  }
59  return temp;
60  }
61 
62  case TitleCase:
63  {
64  // yes, this is MASSIVELY simplifying the problem!!
65 
66  static QStringList smallWords;
67  static QStringList newPhraseSeparators;
68  static QRegularExpression splitWords;
69  if ( smallWords.empty() )
70  {
71  smallWords = QObject::tr( "a|an|and|as|at|but|by|en|for|if|in|nor|of|on|or|per|s|the|to|vs.|vs|via" ).split( '|' );
72  newPhraseSeparators = QObject::tr( ".|:" ).split( '|' );
73  splitWords = QRegularExpression( QStringLiteral( "\\b" ), QRegularExpression::UseUnicodePropertiesOption );
74  }
75 
76  const bool allSameCase = string.toLower() == string || string.toUpper() == string;
77 #if QT_VERSION < QT_VERSION_CHECK(5, 15, 0)
78  const QStringList parts = ( allSameCase ? string.toLower() : string ).split( splitWords, QString::SkipEmptyParts );
79 #else
80  const QStringList parts = ( allSameCase ? string.toLower() : string ).split( splitWords, Qt::SkipEmptyParts );
81 #endif
82  QString result;
83  bool firstWord = true;
84  int i = 0;
85  int lastWord = parts.count() - 1;
86  for ( const QString &word : qgis::as_const( parts ) )
87  {
88  if ( newPhraseSeparators.contains( word.trimmed() ) )
89  {
90  firstWord = true;
91  result += word;
92  }
93  else if ( firstWord || ( i == lastWord ) || !smallWords.contains( word ) )
94  {
95  result += word.at( 0 ).toUpper() + word.mid( 1 );
96  firstWord = false;
97  }
98  else
99  {
100  result += word;
101  }
102  i++;
103  }
104  return result;
105  }
106 
107  case UpperCamelCase:
108  QString result = QgsStringUtils::capitalize( string.toLower(), QgsStringUtils::ForceFirstLetterToCapital ).simplified();
109  result.remove( ' ' );
110  return result;
111  }
112  // no warnings
113  return string;
114 }
115 
116 // original code from http://www.qtcentre.org/threads/52456-HTML-Unicode-ampersand-encoding
117 QString QgsStringUtils::ampersandEncode( const QString &string )
118 {
119  QString encoded;
120  for ( int i = 0; i < string.size(); ++i )
121  {
122  QChar ch = string.at( i );
123  if ( ch.unicode() > 160 )
124  encoded += QStringLiteral( "&#%1;" ).arg( static_cast< int >( ch.unicode() ) );
125  else if ( ch.unicode() == 38 )
126  encoded += QLatin1String( "&amp;" );
127  else if ( ch.unicode() == 60 )
128  encoded += QLatin1String( "&lt;" );
129  else if ( ch.unicode() == 62 )
130  encoded += QLatin1String( "&gt;" );
131  else
132  encoded += ch;
133  }
134  return encoded;
135 }
136 
137 int QgsStringUtils::levenshteinDistance( const QString &string1, const QString &string2, bool caseSensitive )
138 {
139  int length1 = string1.length();
140  int length2 = string2.length();
141 
142  //empty strings? solution is trivial...
143  if ( string1.isEmpty() )
144  {
145  return length2;
146  }
147  else if ( string2.isEmpty() )
148  {
149  return length1;
150  }
151 
152  //handle case sensitive flag (or not)
153  QString s1( caseSensitive ? string1 : string1.toLower() );
154  QString s2( caseSensitive ? string2 : string2.toLower() );
155 
156  const QChar *s1Char = s1.constData();
157  const QChar *s2Char = s2.constData();
158 
159  //strip out any common prefix
160  int commonPrefixLen = 0;
161  while ( length1 > 0 && length2 > 0 && *s1Char == *s2Char )
162  {
163  commonPrefixLen++;
164  length1--;
165  length2--;
166  s1Char++;
167  s2Char++;
168  }
169 
170  //strip out any common suffix
171  while ( length1 > 0 && length2 > 0 && s1.at( commonPrefixLen + length1 - 1 ) == s2.at( commonPrefixLen + length2 - 1 ) )
172  {
173  length1--;
174  length2--;
175  }
176 
177  //fully checked either string? if so, the answer is easy...
178  if ( length1 == 0 )
179  {
180  return length2;
181  }
182  else if ( length2 == 0 )
183  {
184  return length1;
185  }
186 
187  //ensure the inner loop is longer
188  if ( length1 > length2 )
189  {
190  std::swap( s1, s2 );
191  std::swap( length1, length2 );
192  }
193 
194  //levenshtein algorithm begins here
195  QVector< int > col;
196  col.fill( 0, length2 + 1 );
197  QVector< int > prevCol;
198  prevCol.reserve( length2 + 1 );
199  for ( int i = 0; i < length2 + 1; ++i )
200  {
201  prevCol << i;
202  }
203  const QChar *s2start = s2Char;
204  for ( int i = 0; i < length1; ++i )
205  {
206  col[0] = i + 1;
207  s2Char = s2start;
208  for ( int j = 0; j < length2; ++j )
209  {
210  col[j + 1] = std::min( std::min( 1 + col[j], 1 + prevCol[1 + j] ), prevCol[j] + ( ( *s1Char == *s2Char ) ? 0 : 1 ) );
211  s2Char++;
212  }
213  col.swap( prevCol );
214  s1Char++;
215  }
216  return prevCol[length2];
217 }
218 
219 QString QgsStringUtils::longestCommonSubstring( const QString &string1, const QString &string2, bool caseSensitive )
220 {
221  if ( string1.isEmpty() || string2.isEmpty() )
222  {
223  //empty strings, solution is trivial...
224  return QString();
225  }
226 
227  //handle case sensitive flag (or not)
228  QString s1( caseSensitive ? string1 : string1.toLower() );
229  QString s2( caseSensitive ? string2 : string2.toLower() );
230 
231  if ( s1 == s2 )
232  {
233  //another trivial case, identical strings
234  return s1;
235  }
236 
237  int *currentScores = new int [ s2.length()];
238  int *previousScores = new int [ s2.length()];
239  int maxCommonLength = 0;
240  int lastMaxBeginIndex = 0;
241 
242  const QChar *s1Char = s1.constData();
243  const QChar *s2Char = s2.constData();
244  const QChar *s2Start = s2Char;
245 
246  for ( int i = 0; i < s1.length(); ++i )
247  {
248  for ( int j = 0; j < s2.length(); ++j )
249  {
250  if ( *s1Char != *s2Char )
251  {
252  currentScores[j] = 0;
253  }
254  else
255  {
256  if ( i == 0 || j == 0 )
257  {
258  currentScores[j] = 1;
259  }
260  else
261  {
262  currentScores[j] = 1 + previousScores[j - 1];
263  }
264 
265  if ( maxCommonLength < currentScores[j] )
266  {
267  maxCommonLength = currentScores[j];
268  lastMaxBeginIndex = i;
269  }
270  }
271  s2Char++;
272  }
273  std::swap( currentScores, previousScores );
274  s1Char++;
275  s2Char = s2Start;
276  }
277  delete [] currentScores;
278  delete [] previousScores;
279  return string1.mid( lastMaxBeginIndex - maxCommonLength + 1, maxCommonLength );
280 }
281 
282 int QgsStringUtils::hammingDistance( const QString &string1, const QString &string2, bool caseSensitive )
283 {
284  if ( string1.isEmpty() && string2.isEmpty() )
285  {
286  //empty strings, solution is trivial...
287  return 0;
288  }
289 
290  if ( string1.length() != string2.length() )
291  {
292  //invalid inputs
293  return -1;
294  }
295 
296  //handle case sensitive flag (or not)
297  QString s1( caseSensitive ? string1 : string1.toLower() );
298  QString s2( caseSensitive ? string2 : string2.toLower() );
299 
300  if ( s1 == s2 )
301  {
302  //another trivial case, identical strings
303  return 0;
304  }
305 
306  int distance = 0;
307  const QChar *s1Char = s1.constData();
308  const QChar *s2Char = s2.constData();
309 
310  for ( int i = 0; i < string1.length(); ++i )
311  {
312  if ( *s1Char != *s2Char )
313  distance++;
314  s1Char++;
315  s2Char++;
316  }
317 
318  return distance;
319 }
320 
321 QString QgsStringUtils::soundex( const QString &string )
322 {
323  if ( string.isEmpty() )
324  return QString();
325 
326  QString tmp = string.toUpper();
327 
328  //strip non character codes, and vowel like characters after the first character
329  QChar *char1 = tmp.data();
330  QChar *char2 = tmp.data();
331  int outLen = 0;
332  for ( int i = 0; i < tmp.length(); ++i, ++char2 )
333  {
334  if ( ( *char2 ).unicode() >= 0x41 && ( *char2 ).unicode() <= 0x5A && ( i == 0 || ( ( *char2 ).unicode() != 0x41 && ( *char2 ).unicode() != 0x45
335  && ( *char2 ).unicode() != 0x48 && ( *char2 ).unicode() != 0x49
336  && ( *char2 ).unicode() != 0x4F && ( *char2 ).unicode() != 0x55
337  && ( *char2 ).unicode() != 0x57 && ( *char2 ).unicode() != 0x59 ) ) )
338  {
339  *char1 = *char2;
340  char1++;
341  outLen++;
342  }
343  }
344  tmp.truncate( outLen );
345 
346  QChar *tmpChar = tmp.data();
347  tmpChar++;
348  for ( int i = 1; i < tmp.length(); ++i, ++tmpChar )
349  {
350  switch ( ( *tmpChar ).unicode() )
351  {
352  case 0x42:
353  case 0x46:
354  case 0x50:
355  case 0x56:
356  tmp.replace( i, 1, QChar( 0x31 ) );
357  break;
358 
359  case 0x43:
360  case 0x47:
361  case 0x4A:
362  case 0x4B:
363  case 0x51:
364  case 0x53:
365  case 0x58:
366  case 0x5A:
367  tmp.replace( i, 1, QChar( 0x32 ) );
368  break;
369 
370  case 0x44:
371  case 0x54:
372  tmp.replace( i, 1, QChar( 0x33 ) );
373  break;
374 
375  case 0x4C:
376  tmp.replace( i, 1, QChar( 0x34 ) );
377  break;
378 
379  case 0x4D:
380  case 0x4E:
381  tmp.replace( i, 1, QChar( 0x35 ) );
382  break;
383 
384  case 0x52:
385  tmp.replace( i, 1, QChar( 0x36 ) );
386  break;
387  }
388  }
389 
390  //remove adjacent duplicates
391  char1 = tmp.data();
392  char2 = tmp.data();
393  char2++;
394  outLen = 1;
395  for ( int i = 1; i < tmp.length(); ++i, ++char2 )
396  {
397  if ( *char2 != *char1 )
398  {
399  char1++;
400  *char1 = *char2;
401  outLen++;
402  if ( outLen == 4 )
403  break;
404  }
405  }
406  tmp.truncate( outLen );
407  if ( tmp.length() < 4 )
408  {
409  tmp.append( "000" );
410  tmp.truncate( 4 );
411  }
412 
413  return tmp;
414 }
415 
416 
417 double QgsStringUtils::fuzzyScore( const QString &candidate, const QString &search )
418 {
419  QString candidateNormalized = candidate.simplified().normalized( QString:: NormalizationForm_C ).toLower();
420  QString searchNormalized = search.simplified().normalized( QString:: NormalizationForm_C ).toLower();
421 
422  int candidateLength = candidateNormalized.length();
423  int searchLength = searchNormalized.length();
424  int score = 0;
425 
426  // if the candidate and the search term are empty, no other option than 0 score
427  if ( candidateLength == 0 || searchLength == 0 )
428  return score;
429 
430  int candidateIdx = 0;
431  int searchIdx = 0;
432  // there is always at least one word
433  int maxScore = FUZZY_SCORE_WORD_MATCH;
434 
435  bool isPreviousIndexMatching = false;
436  bool isWordOpen = true;
437 
438  // loop trough each candidate char and calculate the potential max score
439  while ( candidateIdx < candidateLength )
440  {
441  QChar candidateChar = candidateNormalized[ candidateIdx++ ];
442  bool isCandidateCharWordEnd = candidateChar == ' ' || candidateChar.isPunct();
443 
444  // the first char is always the default score
445  if ( candidateIdx == 1 )
446  maxScore += FUZZY_SCORE_NEW_MATCH;
447  // every space character or underscore is a opportunity for a new word
448  else if ( isCandidateCharWordEnd )
449  maxScore += FUZZY_SCORE_WORD_MATCH;
450  // potentially we can match every other character
451  else
452  maxScore += FUZZY_SCORE_CONSECUTIVE_MATCH;
453 
454  // we looped through all the characters
455  if ( searchIdx >= searchLength )
456  continue;
457 
458  QChar searchChar = searchNormalized[ searchIdx ];
459  bool isSearchCharWordEnd = searchChar == ' ' || searchChar.isPunct();
460 
461  // match!
462  if ( candidateChar == searchChar || ( isCandidateCharWordEnd && isSearchCharWordEnd ) )
463  {
464  searchIdx++;
465 
466  // if we have just successfully finished a word, give higher score
467  if ( isSearchCharWordEnd )
468  {
469  if ( isWordOpen )
470  score += FUZZY_SCORE_WORD_MATCH;
471  else if ( isPreviousIndexMatching )
473  else
474  score += FUZZY_SCORE_NEW_MATCH;
475 
476  isWordOpen = true;
477  }
478  // if we have consecutive characters matching, give higher score
479  else if ( isPreviousIndexMatching )
480  {
482  }
483  // normal score for new independent character that matches
484  else
485  {
486  score += FUZZY_SCORE_NEW_MATCH;
487  }
488 
489  isPreviousIndexMatching = true;
490  }
491  // if the current character does NOT match, we are sure we cannot build a word for now
492  else
493  {
494  isPreviousIndexMatching = false;
495  isWordOpen = false;
496  }
497 
498  // if the search string is covered, check if the last match is end of word
499  if ( searchIdx >= searchLength )
500  {
501  bool isEndOfWord = ( candidateIdx >= candidateLength )
502  ? true
503  : candidateNormalized[candidateIdx] == ' ' || candidateNormalized[candidateIdx].isPunct();
504 
505  if ( isEndOfWord )
506  score += FUZZY_SCORE_WORD_MATCH;
507  }
508 
509  // QgsLogger::debug( QStringLiteral( "TMP: %1 | %2 | %3 | %4 | %5" ).arg( candidateChar, searchChar, QString::number(score), QString::number(isCandidateCharWordEnd), QString::number(isSearchCharWordEnd) ) + QStringLiteral( __FILE__ ) );
510  }
511 
512  // QgsLogger::debug( QStringLiteral( "RES: %1 | %2" ).arg( QString::number(maxScore), QString::number(score) ) + QStringLiteral( __FILE__ ) );
513  // we didn't loop through all the search chars, it means, that they are not present in the current candidate
514  if ( searchIdx < searchLength )
515  score = 0;
516 
517  return static_cast<float>( std::max( score, 0 ) ) / std::max( maxScore, 1 );
518 }
519 
520 
521 QString QgsStringUtils::insertLinks( const QString &string, bool *foundLinks )
522 {
523  QString converted = string;
524 
525  // http://alanstorm.com/url_regex_explained
526  // note - there's more robust implementations available, but we need one which works within the limitation of QRegExp
527  static QRegExp urlRegEx( "(\\b(([\\w-]+://?|www[.])[^\\s()<>]+(?:\\([\\w\\d]+\\)|([^!\"#$%&'()*+,\\-./:;<=>?@[\\\\\\]^_`{|}~\\s]|/))))" );
528  static QRegExp protoRegEx( "^(?:f|ht)tps?://|file://" );
529  static QRegExp emailRegEx( "([\\w._%+-]+@[\\w.-]+\\.[A-Za-z]+)" );
530 
531  int offset = 0;
532  bool found = false;
533  while ( urlRegEx.indexIn( converted, offset ) != -1 )
534  {
535  found = true;
536  QString url = urlRegEx.cap( 1 );
537  QString protoUrl = url;
538  if ( protoRegEx.indexIn( protoUrl ) == -1 )
539  {
540  protoUrl.prepend( "http://" );
541  }
542  QString anchor = QStringLiteral( "<a href=\"%1\">%2</a>" ).arg( protoUrl.toHtmlEscaped(), url.toHtmlEscaped() );
543  converted.replace( urlRegEx.pos( 1 ), url.length(), anchor );
544  offset = urlRegEx.pos( 1 ) + anchor.length();
545  }
546  offset = 0;
547  while ( emailRegEx.indexIn( converted, offset ) != -1 )
548  {
549  found = true;
550  QString email = emailRegEx.cap( 1 );
551  QString anchor = QStringLiteral( "<a href=\"mailto:%1\">%1</a>" ).arg( email.toHtmlEscaped() );
552  converted.replace( emailRegEx.pos( 1 ), email.length(), anchor );
553  offset = emailRegEx.pos( 1 ) + anchor.length();
554  }
555 
556  if ( foundLinks )
557  *foundLinks = found;
558 
559  return converted;
560 }
561 
562 QString QgsStringUtils::htmlToMarkdown( const QString &html )
563 {
564  // Any changes in this function must be copied to qgscrashreport.cpp too
565  QString converted = html;
566  converted.replace( QLatin1String( "<br>" ), QLatin1String( "\n" ) );
567  converted.replace( QLatin1String( "<b>" ), QLatin1String( "**" ) );
568  converted.replace( QLatin1String( "</b>" ), QLatin1String( "**" ) );
569 
570  static QRegExp hrefRegEx( "<a\\s+href\\s*=\\s*([^<>]*)\\s*>([^<>]*)</a>" );
571  int offset = 0;
572  while ( hrefRegEx.indexIn( converted, offset ) != -1 )
573  {
574  QString url = hrefRegEx.cap( 1 ).replace( QLatin1String( "\"" ), QString() );
575  url.replace( '\'', QString() );
576  QString name = hrefRegEx.cap( 2 );
577  QString anchor = QStringLiteral( "[%1](%2)" ).arg( name, url );
578  converted.replace( hrefRegEx, anchor );
579  offset = hrefRegEx.pos( 1 ) + anchor.length();
580  }
581 
582  return converted;
583 }
584 
585 QString QgsStringUtils::wordWrap( const QString &string, const int length, const bool useMaxLineLength, const QString &customDelimiter )
586 {
587  if ( string.isEmpty() || length == 0 )
588  return string;
589 
590  QString newstr;
591  QRegExp rx;
592  int delimiterLength = 0;
593 
594  if ( !customDelimiter.isEmpty() )
595  {
596  rx.setPatternSyntax( QRegExp::FixedString );
597  rx.setPattern( customDelimiter );
598  delimiterLength = customDelimiter.length();
599  }
600  else
601  {
602  // \x200B is a ZERO-WIDTH SPACE, needed for worwrap to support a number of complex scripts (Indic, Arabic, etc.)
603  rx.setPattern( QStringLiteral( "[\\s\\x200B]" ) );
604  delimiterLength = 1;
605  }
606 
607  const QStringList lines = string.split( '\n' );
608  int strLength, strCurrent, strHit, lastHit;
609 
610  for ( int i = 0; i < lines.size(); i++ )
611  {
612  strLength = lines.at( i ).length();
613  strCurrent = 0;
614  strHit = 0;
615  lastHit = 0;
616 
617  while ( strCurrent < strLength )
618  {
619  // positive wrap value = desired maximum line width to wrap
620  // negative wrap value = desired minimum line width before wrap
621  if ( useMaxLineLength )
622  {
623  //first try to locate delimiter backwards
624  strHit = lines.at( i ).lastIndexOf( rx, strCurrent + length );
625  if ( strHit == lastHit || strHit == -1 )
626  {
627  //if no new backward delimiter found, try to locate forward
628  strHit = lines.at( i ).indexOf( rx, strCurrent + std::abs( length ) );
629  }
630  lastHit = strHit;
631  }
632  else
633  {
634  strHit = lines.at( i ).indexOf( rx, strCurrent + std::abs( length ) );
635  }
636  if ( strHit > -1 )
637  {
638  newstr.append( lines.at( i ).midRef( strCurrent, strHit - strCurrent ) );
639  newstr.append( '\n' );
640  strCurrent = strHit + delimiterLength;
641  }
642  else
643  {
644  newstr.append( lines.at( i ).midRef( strCurrent ) );
645  strCurrent = strLength;
646  }
647  }
648  if ( i < lines.size() - 1 )
649  newstr.append( '\n' );
650  }
651 
652  return newstr;
653 }
654 
656 {
657  string = string.replace( ',', QChar( 65040 ) ).replace( QChar( 8229 ), QChar( 65072 ) ); // comma & two-dot leader
658  string = string.replace( QChar( 12289 ), QChar( 65041 ) ).replace( QChar( 12290 ), QChar( 65042 ) ); // ideographic comma & full stop
659  string = string.replace( ':', QChar( 65043 ) ).replace( ';', QChar( 65044 ) );
660  string = string.replace( '!', QChar( 65045 ) ).replace( '?', QChar( 65046 ) );
661  string = string.replace( QChar( 12310 ), QChar( 65047 ) ).replace( QChar( 12311 ), QChar( 65048 ) ); // white lenticular brackets
662  string = string.replace( QChar( 8230 ), QChar( 65049 ) ); // three-dot ellipse
663  string = string.replace( QChar( 8212 ), QChar( 65073 ) ).replace( QChar( 8211 ), QChar( 65074 ) ); // em & en dash
664  string = string.replace( '_', QChar( 65075 ) ).replace( QChar( 65103 ), QChar( 65076 ) ); // low line & wavy low line
665  string = string.replace( '(', QChar( 65077 ) ).replace( ')', QChar( 65078 ) );
666  string = string.replace( '{', QChar( 65079 ) ).replace( '}', QChar( 65080 ) );
667  string = string.replace( '<', QChar( 65087 ) ).replace( '>', QChar( 65088 ) );
668  string = string.replace( '[', QChar( 65095 ) ).replace( ']', QChar( 65096 ) );
669  string = string.replace( QChar( 12308 ), QChar( 65081 ) ).replace( QChar( 12309 ), QChar( 65082 ) ); // tortoise shell brackets
670  string = string.replace( QChar( 12304 ), QChar( 65083 ) ).replace( QChar( 12305 ), QChar( 65084 ) ); // black lenticular brackets
671  string = string.replace( QChar( 12298 ), QChar( 65085 ) ).replace( QChar( 12299 ), QChar( 65086 ) ); // double angle brackets
672  string = string.replace( QChar( 12300 ), QChar( 65089 ) ).replace( QChar( 12301 ), QChar( 65090 ) ); // corner brackets
673  string = string.replace( QChar( 12302 ), QChar( 65091 ) ).replace( QChar( 12303 ), QChar( 65092 ) ); // white corner brackets
674  return string;
675 }
676 
677 QgsStringReplacement::QgsStringReplacement( const QString &match, const QString &replacement, bool caseSensitive, bool wholeWordOnly )
678  : mMatch( match )
679  , mReplacement( replacement )
680  , mCaseSensitive( caseSensitive )
681  , mWholeWordOnly( wholeWordOnly )
682 {
683  if ( mWholeWordOnly )
684  mRx = QRegExp( QString( "\\b%1\\b" ).arg( mMatch ),
685  mCaseSensitive ? Qt::CaseSensitive : Qt::CaseInsensitive );
686 }
687 
688 QString QgsStringReplacement::process( const QString &input ) const
689 {
690  QString result = input;
691  if ( !mWholeWordOnly )
692  {
693  return result.replace( mMatch, mReplacement, mCaseSensitive ? Qt::CaseSensitive : Qt::CaseInsensitive );
694  }
695  else
696  {
697  return result.replace( mRx, mReplacement );
698  }
699 }
700 
702 {
703  QgsStringMap map;
704  map.insert( QStringLiteral( "match" ), mMatch );
705  map.insert( QStringLiteral( "replace" ), mReplacement );
706  map.insert( QStringLiteral( "caseSensitive" ), mCaseSensitive ? "1" : "0" );
707  map.insert( QStringLiteral( "wholeWord" ), mWholeWordOnly ? "1" : "0" );
708  return map;
709 }
710 
712 {
713  return QgsStringReplacement( properties.value( QStringLiteral( "match" ) ),
714  properties.value( QStringLiteral( "replace" ) ),
715  properties.value( QStringLiteral( "caseSensitive" ), QStringLiteral( "0" ) ) == QLatin1String( "1" ),
716  properties.value( QStringLiteral( "wholeWord" ), QStringLiteral( "0" ) ) == QLatin1String( "1" ) );
717 }
718 
719 QString QgsStringReplacementCollection::process( const QString &input ) const
720 {
721  QString result = input;
722  const auto constMReplacements = mReplacements;
723  for ( const QgsStringReplacement &r : constMReplacements )
724  {
725  result = r.process( result );
726  }
727  return result;
728 }
729 
730 void QgsStringReplacementCollection::writeXml( QDomElement &elem, QDomDocument &doc ) const
731 {
732  const auto constMReplacements = mReplacements;
733  for ( const QgsStringReplacement &r : constMReplacements )
734  {
735  QgsStringMap props = r.properties();
736  QDomElement propEl = doc.createElement( QStringLiteral( "replacement" ) );
737  QgsStringMap::const_iterator it = props.constBegin();
738  for ( ; it != props.constEnd(); ++it )
739  {
740  propEl.setAttribute( it.key(), it.value() );
741  }
742  elem.appendChild( propEl );
743  }
744 }
745 
746 void QgsStringReplacementCollection::readXml( const QDomElement &elem )
747 {
748  mReplacements.clear();
749  QDomNodeList nodelist = elem.elementsByTagName( QStringLiteral( "replacement" ) );
750  for ( int i = 0; i < nodelist.count(); i++ )
751  {
752  QDomElement replacementElem = nodelist.at( i ).toElement();
753  QDomNamedNodeMap nodeMap = replacementElem.attributes();
754 
755  QgsStringMap props;
756  for ( int j = 0; j < nodeMap.count(); ++j )
757  {
758  props.insert( nodeMap.item( j ).nodeName(), nodeMap.item( j ).nodeValue() );
759  }
760  mReplacements << QgsStringReplacement::fromProperties( props );
761  }
762 
763 }
QgsStringUtils::UpperCamelCase
@ UpperCamelCase
Convert the string to upper camel case. Note that this method does not unaccent characters.
Definition: qgsstringutils.h:195
QgsStringUtils::insertLinks
static QString insertLinks(const QString &string, bool *foundLinks=nullptr)
Returns a string with any URL (e.g., http(s)/ftp) and mailto: text converted to valid HTML <a ....
Definition: qgsstringutils.cpp:521
QgsStringReplacementCollection::readXml
void readXml(const QDomElement &elem)
Reads the collection state from an XML element.
Definition: qgsstringutils.cpp:746
qgsstringutils.h
QgsStringUtils::ampersandEncode
static QString ampersandEncode(const QString &string)
Makes a raw string safe for inclusion as a HTML/XML string literal.
Definition: qgsstringutils.cpp:117
QgsStringUtils::hammingDistance
static int hammingDistance(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the Hamming distance between two strings.
Definition: qgsstringutils.cpp:282
FUZZY_SCORE_WORD_MATCH
#define FUZZY_SCORE_WORD_MATCH
Definition: qgsstringutils.h:27
QgsStringUtils::Capitalization
Capitalization
Capitalization options.
Definition: qgsstringutils.h:189
QgsStringReplacement::process
QString process(const QString &input) const
Processes a given input string, applying any valid replacements which should be made.
Definition: qgsstringutils.cpp:688
QgsStringUtils::MixedCase
@ MixedCase
Mixed case, ie no change.
Definition: qgsstringutils.h:190
QgsStringUtils::substituteVerticalCharacters
static QString substituteVerticalCharacters(QString string)
Returns a string with characters having vertical representation form substituted.
Definition: qgsstringutils.cpp:655
QgsStringReplacementCollection::process
QString process(const QString &input) const
Processes a given input string, applying any valid replacements which should be made using QgsStringR...
Definition: qgsstringutils.cpp:719
QgsStringUtils::AllLowercase
@ AllLowercase
Convert all characters to lowercase.
Definition: qgsstringutils.h:192
QgsStringReplacement
A representation of a single string replacement.
Definition: qgsstringutils.h:39
QgsStringUtils::fuzzyScore
static double fuzzyScore(const QString &candidate, const QString &search)
Tests a candidate string to see how likely it is a match for a specified search string.
Definition: qgsstringutils.cpp:417
QgsStringUtils::capitalize
static QString capitalize(const QString &string, Capitalization capitalization)
Converts a string by applying capitalization rules to the string.
Definition: qgsstringutils.cpp:25
QgsStringReplacement::QgsStringReplacement
QgsStringReplacement(const QString &match, const QString &replacement, bool caseSensitive=false, bool wholeWordOnly=false)
Constructor for QgsStringReplacement.
Definition: qgsstringutils.cpp:677
QgsStringUtils::TitleCase
@ TitleCase
Simple title case conversion - does not fully grammatically parse the text and uses simple rules only...
Definition: qgsstringutils.h:194
QgsStringUtils::soundex
static QString soundex(const QString &string)
Returns the Soundex representation of a string.
Definition: qgsstringutils.cpp:321
QgsStringMap
QMap< QString, QString > QgsStringMap
Definition: qgis.h:758
QgsStringUtils::ForceFirstLetterToCapital
@ ForceFirstLetterToCapital
Convert just the first letter of each word to uppercase, leave the rest untouched.
Definition: qgsstringutils.h:193
QgsStringUtils::wordWrap
static QString wordWrap(const QString &string, int length, bool useMaxLineLength=true, const QString &customDelimiter=QString())
Automatically wraps a string by inserting new line characters at appropriate locations in the string.
Definition: qgsstringutils.cpp:585
FUZZY_SCORE_NEW_MATCH
#define FUZZY_SCORE_NEW_MATCH
Definition: qgsstringutils.h:28
QgsStringUtils::levenshteinDistance
static int levenshteinDistance(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the Levenshtein edit distance between two strings.
Definition: qgsstringutils.cpp:137
QgsStringReplacement::fromProperties
static QgsStringReplacement fromProperties(const QgsStringMap &properties)
Creates a new QgsStringReplacement from an encoded properties map.
Definition: qgsstringutils.cpp:711
QgsStringReplacementCollection::writeXml
void writeXml(QDomElement &elem, QDomDocument &doc) const
Writes the collection state to an XML element.
Definition: qgsstringutils.cpp:730
qgslogger.h
FUZZY_SCORE_CONSECUTIVE_MATCH
#define FUZZY_SCORE_CONSECUTIVE_MATCH
Definition: qgsstringutils.h:29
QgsStringReplacement::properties
QgsStringMap properties() const
Returns a map of the replacement properties.
Definition: qgsstringutils.cpp:701
QgsStringUtils::AllUppercase
@ AllUppercase
Convert all characters to uppercase.
Definition: qgsstringutils.h:191
QgsStringUtils::htmlToMarkdown
static QString htmlToMarkdown(const QString &html)
Convert simple HTML to markdown.
Definition: qgsstringutils.cpp:562
QgsStringUtils::longestCommonSubstring
static QString longestCommonSubstring(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the longest common substring between two strings.
Definition: qgsstringutils.cpp:219