QGIS API Documentation  3.26.3-Buenos Aires (65e4edfdad)
qgsstringutils.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  qgsstringutils.cpp
3  ------------------
4  begin : June 2015
5  copyright : (C) 2015 by Nyall Dawson
6  email : nyall dot dawson at gmail dot com
7  ***************************************************************************
8  * *
9  * This program is free software; you can redistribute it and/or modify *
10  * it under the terms of the GNU General Public License as published by *
11  * the Free Software Foundation; either version 2 of the License, or *
12  * (at your option) any later version. *
13  * *
14  ***************************************************************************/
15 
16 #include "qgsstringutils.h"
17 #include "qgslogger.h"
18 #include <QVector>
19 #include <QStringList>
20 #include <QTextBoundaryFinder>
21 #include <QRegularExpression>
22 #include <cstdlib> // for std::abs
23 
24 QString QgsStringUtils::capitalize( const QString &string, Qgis::Capitalization capitalization )
25 {
26  if ( string.isEmpty() )
27  return QString();
28 
29  switch ( capitalization )
30  {
33  return string;
34 
36  return string.toUpper();
37 
40  return string.toLower();
41 
43  {
44  QString temp = string;
45 
46  QTextBoundaryFinder wordSplitter( QTextBoundaryFinder::Word, string.constData(), string.length(), nullptr, 0 );
47  QTextBoundaryFinder letterSplitter( QTextBoundaryFinder::Grapheme, string.constData(), string.length(), nullptr, 0 );
48 
49  wordSplitter.setPosition( 0 );
50  bool first = true;
51  while ( ( first && wordSplitter.boundaryReasons() & QTextBoundaryFinder::StartOfItem )
52  || wordSplitter.toNextBoundary() >= 0 )
53  {
54  first = false;
55  letterSplitter.setPosition( wordSplitter.position() );
56  letterSplitter.toNextBoundary();
57  QString substr = string.mid( wordSplitter.position(), letterSplitter.position() - wordSplitter.position() );
58  temp.replace( wordSplitter.position(), substr.length(), substr.toUpper() );
59  }
60  return temp;
61  }
62 
64  {
65  // yes, this is MASSIVELY simplifying the problem!!
66 
67  static QStringList smallWords;
68  static QStringList newPhraseSeparators;
69  static QRegularExpression splitWords;
70  if ( smallWords.empty() )
71  {
72  smallWords = QObject::tr( "a|an|and|as|at|but|by|en|for|if|in|nor|of|on|or|per|s|the|to|vs.|vs|via" ).split( '|' );
73  newPhraseSeparators = QObject::tr( ".|:" ).split( '|' );
74  splitWords = QRegularExpression( QStringLiteral( "\\b" ), QRegularExpression::UseUnicodePropertiesOption );
75  }
76 
77  const bool allSameCase = string.toLower() == string || string.toUpper() == string;
78 #if QT_VERSION < QT_VERSION_CHECK(5, 15, 0)
79  const QStringList parts = ( allSameCase ? string.toLower() : string ).split( splitWords, QString::SkipEmptyParts );
80 #else
81  const QStringList parts = ( allSameCase ? string.toLower() : string ).split( splitWords, Qt::SkipEmptyParts );
82 #endif
83  QString result;
84  bool firstWord = true;
85  int i = 0;
86  int lastWord = parts.count() - 1;
87  for ( const QString &word : std::as_const( parts ) )
88  {
89  if ( newPhraseSeparators.contains( word.trimmed() ) )
90  {
91  firstWord = true;
92  result += word;
93  }
94  else if ( firstWord || ( i == lastWord ) || !smallWords.contains( word ) )
95  {
96  result += word.at( 0 ).toUpper() + word.mid( 1 );
97  firstWord = false;
98  }
99  else
100  {
101  result += word;
102  }
103  i++;
104  }
105  return result;
106  }
107 
109  QString result = QgsStringUtils::capitalize( string.toLower(), Qgis::Capitalization::ForceFirstLetterToCapital ).simplified();
110  result.remove( ' ' );
111  return result;
112  }
113  // no warnings
114  return string;
115 }
116 
117 // original code from http://www.qtcentre.org/threads/52456-HTML-Unicode-ampersand-encoding
118 QString QgsStringUtils::ampersandEncode( const QString &string )
119 {
120  QString encoded;
121  for ( int i = 0; i < string.size(); ++i )
122  {
123  QChar ch = string.at( i );
124  if ( ch.unicode() > 160 )
125  encoded += QStringLiteral( "&#%1;" ).arg( static_cast< int >( ch.unicode() ) );
126  else if ( ch.unicode() == 38 )
127  encoded += QLatin1String( "&amp;" );
128  else if ( ch.unicode() == 60 )
129  encoded += QLatin1String( "&lt;" );
130  else if ( ch.unicode() == 62 )
131  encoded += QLatin1String( "&gt;" );
132  else
133  encoded += ch;
134  }
135  return encoded;
136 }
137 
138 int QgsStringUtils::levenshteinDistance( const QString &string1, const QString &string2, bool caseSensitive )
139 {
140  int length1 = string1.length();
141  int length2 = string2.length();
142 
143  //empty strings? solution is trivial...
144  if ( string1.isEmpty() )
145  {
146  return length2;
147  }
148  else if ( string2.isEmpty() )
149  {
150  return length1;
151  }
152 
153  //handle case sensitive flag (or not)
154  QString s1( caseSensitive ? string1 : string1.toLower() );
155  QString s2( caseSensitive ? string2 : string2.toLower() );
156 
157  const QChar *s1Char = s1.constData();
158  const QChar *s2Char = s2.constData();
159 
160  //strip out any common prefix
161  int commonPrefixLen = 0;
162  while ( length1 > 0 && length2 > 0 && *s1Char == *s2Char )
163  {
164  commonPrefixLen++;
165  length1--;
166  length2--;
167  s1Char++;
168  s2Char++;
169  }
170 
171  //strip out any common suffix
172  while ( length1 > 0 && length2 > 0 && s1.at( commonPrefixLen + length1 - 1 ) == s2.at( commonPrefixLen + length2 - 1 ) )
173  {
174  length1--;
175  length2--;
176  }
177 
178  //fully checked either string? if so, the answer is easy...
179  if ( length1 == 0 )
180  {
181  return length2;
182  }
183  else if ( length2 == 0 )
184  {
185  return length1;
186  }
187 
188  //ensure the inner loop is longer
189  if ( length1 > length2 )
190  {
191  std::swap( s1, s2 );
192  std::swap( length1, length2 );
193  }
194 
195  //levenshtein algorithm begins here
196  QVector< int > col;
197  col.fill( 0, length2 + 1 );
198  QVector< int > prevCol;
199  prevCol.reserve( length2 + 1 );
200  for ( int i = 0; i < length2 + 1; ++i )
201  {
202  prevCol << i;
203  }
204  const QChar *s2start = s2Char;
205  for ( int i = 0; i < length1; ++i )
206  {
207  col[0] = i + 1;
208  s2Char = s2start;
209  for ( int j = 0; j < length2; ++j )
210  {
211  col[j + 1] = std::min( std::min( 1 + col[j], 1 + prevCol[1 + j] ), prevCol[j] + ( ( *s1Char == *s2Char ) ? 0 : 1 ) );
212  s2Char++;
213  }
214  col.swap( prevCol );
215  s1Char++;
216  }
217  return prevCol[length2];
218 }
219 
220 QString QgsStringUtils::longestCommonSubstring( const QString &string1, const QString &string2, bool caseSensitive )
221 {
222  if ( string1.isEmpty() || string2.isEmpty() )
223  {
224  //empty strings, solution is trivial...
225  return QString();
226  }
227 
228  //handle case sensitive flag (or not)
229  QString s1( caseSensitive ? string1 : string1.toLower() );
230  QString s2( caseSensitive ? string2 : string2.toLower() );
231 
232  if ( s1 == s2 )
233  {
234  //another trivial case, identical strings
235  return s1;
236  }
237 
238  int *currentScores = new int [ s2.length()];
239  int *previousScores = new int [ s2.length()];
240  int maxCommonLength = 0;
241  int lastMaxBeginIndex = 0;
242 
243  const QChar *s1Char = s1.constData();
244  const QChar *s2Char = s2.constData();
245  const QChar *s2Start = s2Char;
246 
247  for ( int i = 0; i < s1.length(); ++i )
248  {
249  for ( int j = 0; j < s2.length(); ++j )
250  {
251  if ( *s1Char != *s2Char )
252  {
253  currentScores[j] = 0;
254  }
255  else
256  {
257  if ( i == 0 || j == 0 )
258  {
259  currentScores[j] = 1;
260  }
261  else
262  {
263  currentScores[j] = 1 + previousScores[j - 1];
264  }
265 
266  if ( maxCommonLength < currentScores[j] )
267  {
268  maxCommonLength = currentScores[j];
269  lastMaxBeginIndex = i;
270  }
271  }
272  s2Char++;
273  }
274  std::swap( currentScores, previousScores );
275  s1Char++;
276  s2Char = s2Start;
277  }
278  delete [] currentScores;
279  delete [] previousScores;
280  return string1.mid( lastMaxBeginIndex - maxCommonLength + 1, maxCommonLength );
281 }
282 
283 int QgsStringUtils::hammingDistance( const QString &string1, const QString &string2, bool caseSensitive )
284 {
285  if ( string1.isEmpty() && string2.isEmpty() )
286  {
287  //empty strings, solution is trivial...
288  return 0;
289  }
290 
291  if ( string1.length() != string2.length() )
292  {
293  //invalid inputs
294  return -1;
295  }
296 
297  //handle case sensitive flag (or not)
298  QString s1( caseSensitive ? string1 : string1.toLower() );
299  QString s2( caseSensitive ? string2 : string2.toLower() );
300 
301  if ( s1 == s2 )
302  {
303  //another trivial case, identical strings
304  return 0;
305  }
306 
307  int distance = 0;
308  const QChar *s1Char = s1.constData();
309  const QChar *s2Char = s2.constData();
310 
311  for ( int i = 0; i < string1.length(); ++i )
312  {
313  if ( *s1Char != *s2Char )
314  distance++;
315  s1Char++;
316  s2Char++;
317  }
318 
319  return distance;
320 }
321 
322 QString QgsStringUtils::soundex( const QString &string )
323 {
324  if ( string.isEmpty() )
325  return QString();
326 
327  QString tmp = string.toUpper();
328 
329  //strip non character codes, and vowel like characters after the first character
330  QChar *char1 = tmp.data();
331  QChar *char2 = tmp.data();
332  int outLen = 0;
333  for ( int i = 0; i < tmp.length(); ++i, ++char2 )
334  {
335  if ( ( *char2 ).unicode() >= 0x41 && ( *char2 ).unicode() <= 0x5A && ( i == 0 || ( ( *char2 ).unicode() != 0x41 && ( *char2 ).unicode() != 0x45
336  && ( *char2 ).unicode() != 0x48 && ( *char2 ).unicode() != 0x49
337  && ( *char2 ).unicode() != 0x4F && ( *char2 ).unicode() != 0x55
338  && ( *char2 ).unicode() != 0x57 && ( *char2 ).unicode() != 0x59 ) ) )
339  {
340  *char1 = *char2;
341  char1++;
342  outLen++;
343  }
344  }
345  tmp.truncate( outLen );
346 
347  QChar *tmpChar = tmp.data();
348  tmpChar++;
349  for ( int i = 1; i < tmp.length(); ++i, ++tmpChar )
350  {
351  switch ( ( *tmpChar ).unicode() )
352  {
353  case 0x42:
354  case 0x46:
355  case 0x50:
356  case 0x56:
357  tmp.replace( i, 1, QChar( 0x31 ) );
358  break;
359 
360  case 0x43:
361  case 0x47:
362  case 0x4A:
363  case 0x4B:
364  case 0x51:
365  case 0x53:
366  case 0x58:
367  case 0x5A:
368  tmp.replace( i, 1, QChar( 0x32 ) );
369  break;
370 
371  case 0x44:
372  case 0x54:
373  tmp.replace( i, 1, QChar( 0x33 ) );
374  break;
375 
376  case 0x4C:
377  tmp.replace( i, 1, QChar( 0x34 ) );
378  break;
379 
380  case 0x4D:
381  case 0x4E:
382  tmp.replace( i, 1, QChar( 0x35 ) );
383  break;
384 
385  case 0x52:
386  tmp.replace( i, 1, QChar( 0x36 ) );
387  break;
388  }
389  }
390 
391  //remove adjacent duplicates
392  char1 = tmp.data();
393  char2 = tmp.data();
394  char2++;
395  outLen = 1;
396  for ( int i = 1; i < tmp.length(); ++i, ++char2 )
397  {
398  if ( *char2 != *char1 )
399  {
400  char1++;
401  *char1 = *char2;
402  outLen++;
403  if ( outLen == 4 )
404  break;
405  }
406  }
407  tmp.truncate( outLen );
408  if ( tmp.length() < 4 )
409  {
410  tmp.append( "000" );
411  tmp.truncate( 4 );
412  }
413 
414  return tmp;
415 }
416 
417 
418 double QgsStringUtils::fuzzyScore( const QString &candidate, const QString &search )
419 {
420  QString candidateNormalized = candidate.simplified().normalized( QString:: NormalizationForm_C ).toLower();
421  QString searchNormalized = search.simplified().normalized( QString:: NormalizationForm_C ).toLower();
422 
423  int candidateLength = candidateNormalized.length();
424  int searchLength = searchNormalized.length();
425  int score = 0;
426 
427  // if the candidate and the search term are empty, no other option than 0 score
428  if ( candidateLength == 0 || searchLength == 0 )
429  return score;
430 
431  int candidateIdx = 0;
432  int searchIdx = 0;
433  // there is always at least one word
434  int maxScore = FUZZY_SCORE_WORD_MATCH;
435 
436  bool isPreviousIndexMatching = false;
437  bool isWordOpen = true;
438 
439  // loop trough each candidate char and calculate the potential max score
440  while ( candidateIdx < candidateLength )
441  {
442  QChar candidateChar = candidateNormalized[ candidateIdx++ ];
443  bool isCandidateCharWordEnd = candidateChar == ' ' || candidateChar.isPunct();
444 
445  // the first char is always the default score
446  if ( candidateIdx == 1 )
447  maxScore += FUZZY_SCORE_NEW_MATCH;
448  // every space character or underscore is a opportunity for a new word
449  else if ( isCandidateCharWordEnd )
450  maxScore += FUZZY_SCORE_WORD_MATCH;
451  // potentially we can match every other character
452  else
453  maxScore += FUZZY_SCORE_CONSECUTIVE_MATCH;
454 
455  // we looped through all the characters
456  if ( searchIdx >= searchLength )
457  continue;
458 
459  QChar searchChar = searchNormalized[ searchIdx ];
460  bool isSearchCharWordEnd = searchChar == ' ' || searchChar.isPunct();
461 
462  // match!
463  if ( candidateChar == searchChar || ( isCandidateCharWordEnd && isSearchCharWordEnd ) )
464  {
465  searchIdx++;
466 
467  // if we have just successfully finished a word, give higher score
468  if ( isSearchCharWordEnd )
469  {
470  if ( isWordOpen )
471  score += FUZZY_SCORE_WORD_MATCH;
472  else if ( isPreviousIndexMatching )
474  else
475  score += FUZZY_SCORE_NEW_MATCH;
476 
477  isWordOpen = true;
478  }
479  // if we have consecutive characters matching, give higher score
480  else if ( isPreviousIndexMatching )
481  {
483  }
484  // normal score for new independent character that matches
485  else
486  {
487  score += FUZZY_SCORE_NEW_MATCH;
488  }
489 
490  isPreviousIndexMatching = true;
491  }
492  // if the current character does NOT match, we are sure we cannot build a word for now
493  else
494  {
495  isPreviousIndexMatching = false;
496  isWordOpen = false;
497  }
498 
499  // if the search string is covered, check if the last match is end of word
500  if ( searchIdx >= searchLength )
501  {
502  bool isEndOfWord = ( candidateIdx >= candidateLength )
503  ? true
504  : candidateNormalized[candidateIdx] == ' ' || candidateNormalized[candidateIdx].isPunct();
505 
506  if ( isEndOfWord )
507  score += FUZZY_SCORE_WORD_MATCH;
508  }
509 
510  // QgsLogger::debug( QStringLiteral( "TMP: %1 | %2 | %3 | %4 | %5" ).arg( candidateChar, searchChar, QString::number(score), QString::number(isCandidateCharWordEnd), QString::number(isSearchCharWordEnd) ) + QStringLiteral( __FILE__ ) );
511  }
512 
513  // QgsLogger::debug( QStringLiteral( "RES: %1 | %2" ).arg( QString::number(maxScore), QString::number(score) ) + QStringLiteral( __FILE__ ) );
514  // we didn't loop through all the search chars, it means, that they are not present in the current candidate
515  if ( searchIdx < searchLength )
516  score = 0;
517 
518  return static_cast<float>( std::max( score, 0 ) ) / std::max( maxScore, 1 );
519 }
520 
521 
522 QString QgsStringUtils::insertLinks( const QString &string, bool *foundLinks )
523 {
524  QString converted = string;
525 
526  // http://alanstorm.com/url_regex_explained
527  // note - there's more robust implementations available
528  static thread_local QRegularExpression urlRegEx( QStringLiteral( "(\\b(([\\w-]+://?|www[.])[^\\s()<>]+(?:\\([\\w\\d]+\\)|([^!\"#$%&'()*+,\\-./:;<=>?@[\\\\\\]^_`{|}~\\s]|/))))" ) );
529  static thread_local QRegularExpression protoRegEx( QStringLiteral( "^(?:f|ht)tps?://|file://" ) );
530  static thread_local QRegularExpression emailRegEx( QStringLiteral( "([\\w._%+-]+@[\\w.-]+\\.[A-Za-z]+)" ) );
531 
532  int offset = 0;
533  bool found = false;
534  QRegularExpressionMatch match = urlRegEx.match( converted );
535  while ( match.hasMatch() )
536  {
537  found = true;
538  QString url = match.captured( 1 );
539  QString protoUrl = url;
540  if ( !protoRegEx.match( protoUrl ).hasMatch() )
541  {
542  protoUrl.prepend( "http://" );
543  }
544  QString anchor = QStringLiteral( "<a href=\"%1\">%2</a>" ).arg( protoUrl.toHtmlEscaped(), url.toHtmlEscaped() );
545  converted.replace( match.capturedStart( 1 ), url.length(), anchor );
546  offset = match.capturedStart( 1 ) + anchor.length();
547  match = urlRegEx.match( converted, offset );
548  }
549 
550  offset = 0;
551  match = emailRegEx.match( converted );
552  while ( match.hasMatch() )
553  {
554  found = true;
555  QString email = match.captured( 1 );
556  QString anchor = QStringLiteral( "<a href=\"mailto:%1\">%1</a>" ).arg( email.toHtmlEscaped() );
557  converted.replace( match.capturedStart( 1 ), email.length(), anchor );
558  offset = match.capturedStart( 1 ) + anchor.length();
559  match = emailRegEx.match( converted, offset );
560  }
561 
562  if ( foundLinks )
563  *foundLinks = found;
564 
565  return converted;
566 }
567 
568 bool QgsStringUtils::isUrl( const QString &string )
569 {
570  const thread_local QRegularExpression rxUrl( QStringLiteral( "^(http|https|ftp|file)://\\S+$" ) );
571  return rxUrl.match( string ).hasMatch();
572 }
573 
574 QString QgsStringUtils::htmlToMarkdown( const QString &html )
575 {
576  // Any changes in this function must be copied to qgscrashreport.cpp too
577  QString converted = html;
578  converted.replace( QLatin1String( "<br>" ), QLatin1String( "\n" ) );
579  converted.replace( QLatin1String( "<b>" ), QLatin1String( "**" ) );
580  converted.replace( QLatin1String( "</b>" ), QLatin1String( "**" ) );
581  converted.replace( QLatin1String( "<pre>" ), QLatin1String( "\n```\n" ) );
582  converted.replace( QLatin1String( "</pre>" ), QLatin1String( "```\n" ) );
583 
584  static thread_local QRegularExpression hrefRegEx( QStringLiteral( "<a\\s+href\\s*=\\s*([^<>]*)\\s*>([^<>]*)</a>" ) );
585 
586  int offset = 0;
587  QRegularExpressionMatch match = hrefRegEx.match( converted );
588  while ( match.hasMatch() )
589  {
590  QString url = match.captured( 1 ).replace( QLatin1String( "\"" ), QString() );
591  url.replace( '\'', QString() );
592  QString name = match.captured( 2 );
593  QString anchor = QStringLiteral( "[%1](%2)" ).arg( name, url );
594  converted.replace( match.capturedStart(), match.capturedLength(), anchor );
595  offset = match.capturedStart() + anchor.length();
596  match = hrefRegEx.match( converted, offset );
597  }
598 
599  return converted;
600 }
601 
602 QString QgsStringUtils::wordWrap( const QString &string, const int length, const bool useMaxLineLength, const QString &customDelimiter )
603 {
604  if ( string.isEmpty() || length == 0 )
605  return string;
606 
607  QString newstr;
608  QRegularExpression rx;
609  int delimiterLength = 0;
610 
611  if ( !customDelimiter.isEmpty() )
612  {
613  rx.setPattern( QRegularExpression::escape( customDelimiter ) );
614  delimiterLength = customDelimiter.length();
615  }
616  else
617  {
618  // \x{200B} is a ZERO-WIDTH SPACE, needed for worwrap to support a number of complex scripts (Indic, Arabic, etc.)
619  rx.setPattern( QStringLiteral( "[\\x{200B}\\s]" ) );
620  delimiterLength = 1;
621  }
622 
623  const QStringList lines = string.split( '\n' );
624  int strLength, strCurrent, strHit, lastHit;
625 
626  for ( int i = 0; i < lines.size(); i++ )
627  {
628  const QString line = lines.at( i );
629  strLength = line.length();
630  if ( strLength <= length )
631  {
632  // shortcut, no wrapping required
633  newstr.append( line );
634  if ( i < lines.size() - 1 )
635  newstr.append( '\n' );
636  continue;
637  }
638  strCurrent = 0;
639  strHit = 0;
640  lastHit = 0;
641 
642  while ( strCurrent < strLength )
643  {
644  // positive wrap value = desired maximum line width to wrap
645  // negative wrap value = desired minimum line width before wrap
646  if ( useMaxLineLength )
647  {
648  //first try to locate delimiter backwards
649  strHit = ( strCurrent + length >= strLength ) ? -1 : line.lastIndexOf( rx, strCurrent + length );
650  if ( strHit == lastHit || strHit == -1 )
651  {
652  //if no new backward delimiter found, try to locate forward
653  strHit = ( strCurrent + std::abs( length ) >= strLength ) ? -1 : line.indexOf( rx, strCurrent + std::abs( length ) );
654  }
655  lastHit = strHit;
656  }
657  else
658  {
659  strHit = ( strCurrent + std::abs( length ) >= strLength ) ? -1 : line.indexOf( rx, strCurrent + std::abs( length ) );
660  }
661  if ( strHit > -1 )
662  {
663 #if QT_VERSION < QT_VERSION_CHECK(5, 15, 2)
664  newstr.append( line.midRef( strCurrent, strHit - strCurrent ) );
665 #else
666  newstr.append( QStringView {line} .mid( strCurrent, strHit - strCurrent ) );
667 #endif
668  newstr.append( '\n' );
669  strCurrent = strHit + delimiterLength;
670  }
671  else
672  {
673 #if QT_VERSION < QT_VERSION_CHECK(5, 15, 2)
674  newstr.append( line.midRef( strCurrent ) );
675 #else
676  newstr.append( QStringView {line} .mid( strCurrent ) );
677 #endif
678  strCurrent = strLength;
679  }
680  }
681  if ( i < lines.size() - 1 )
682  newstr.append( '\n' );
683  }
684 
685  return newstr;
686 }
687 
689 {
690  string = string.replace( ',', QChar( 65040 ) ).replace( QChar( 8229 ), QChar( 65072 ) ); // comma & two-dot leader
691  string = string.replace( QChar( 12289 ), QChar( 65041 ) ).replace( QChar( 12290 ), QChar( 65042 ) ); // ideographic comma & full stop
692  string = string.replace( ':', QChar( 65043 ) ).replace( ';', QChar( 65044 ) );
693  string = string.replace( '!', QChar( 65045 ) ).replace( '?', QChar( 65046 ) );
694  string = string.replace( QChar( 12310 ), QChar( 65047 ) ).replace( QChar( 12311 ), QChar( 65048 ) ); // white lenticular brackets
695  string = string.replace( QChar( 8230 ), QChar( 65049 ) ); // three-dot ellipse
696  string = string.replace( QChar( 8212 ), QChar( 65073 ) ).replace( QChar( 8211 ), QChar( 65074 ) ); // em & en dash
697  string = string.replace( '_', QChar( 65075 ) ).replace( QChar( 65103 ), QChar( 65076 ) ); // low line & wavy low line
698  string = string.replace( '(', QChar( 65077 ) ).replace( ')', QChar( 65078 ) );
699  string = string.replace( '{', QChar( 65079 ) ).replace( '}', QChar( 65080 ) );
700  string = string.replace( '<', QChar( 65087 ) ).replace( '>', QChar( 65088 ) );
701  string = string.replace( '[', QChar( 65095 ) ).replace( ']', QChar( 65096 ) );
702  string = string.replace( QChar( 12308 ), QChar( 65081 ) ).replace( QChar( 12309 ), QChar( 65082 ) ); // tortoise shell brackets
703  string = string.replace( QChar( 12304 ), QChar( 65083 ) ).replace( QChar( 12305 ), QChar( 65084 ) ); // black lenticular brackets
704  string = string.replace( QChar( 12298 ), QChar( 65085 ) ).replace( QChar( 12299 ), QChar( 65086 ) ); // double angle brackets
705  string = string.replace( QChar( 12300 ), QChar( 65089 ) ).replace( QChar( 12301 ), QChar( 65090 ) ); // corner brackets
706  string = string.replace( QChar( 12302 ), QChar( 65091 ) ).replace( QChar( 12303 ), QChar( 65092 ) ); // white corner brackets
707  return string;
708 }
709 
710 QString QgsStringUtils::qRegExpEscape( const QString &string )
711 {
712  // code and logic taken from the Qt source code
713  const QLatin1Char backslash( '\\' );
714  const int count = string.count();
715 
716  QString escaped;
717  escaped.reserve( count * 2 );
718  for ( int i = 0; i < count; i++ )
719  {
720  switch ( string.at( i ).toLatin1() )
721  {
722  case '$':
723  case '(':
724  case ')':
725  case '*':
726  case '+':
727  case '.':
728  case '?':
729  case '[':
730  case '\\':
731  case ']':
732  case '^':
733  case '{':
734  case '|':
735  case '}':
736  escaped.append( backslash );
737  }
738  escaped.append( string.at( i ) );
739  }
740  return escaped;
741 }
742 
743 QString QgsStringUtils::truncateMiddleOfString( const QString &string, int maxLength )
744 {
745  const int charactersToTruncate = string.length() - maxLength;
746  if ( charactersToTruncate <= 0 )
747  return string;
748 
749  // note we actually truncate an extra character, as we'll be replacing it with the ... character
750  const int truncateFrom = string.length() / 2 - ( charactersToTruncate + 1 ) / 2;
751 
752 #if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
753  return string.leftRef( truncateFrom ) + QString( QChar( 0x2026 ) ) + string.midRef( truncateFrom + charactersToTruncate + 1 );
754 #else
755  return QStringView( string ).first( truncateFrom ) + QString( QChar( 0x2026 ) ) + QStringView( string ).sliced( truncateFrom + charactersToTruncate + 1 );
756 #endif
757 }
758 
759 QgsStringReplacement::QgsStringReplacement( const QString &match, const QString &replacement, bool caseSensitive, bool wholeWordOnly )
760  : mMatch( match )
761  , mReplacement( replacement )
762  , mCaseSensitive( caseSensitive )
763  , mWholeWordOnly( wholeWordOnly )
764 {
765  if ( mWholeWordOnly )
766  {
767  mRx.setPattern( QStringLiteral( "\\b%1\\b" ).arg( mMatch ) );
768  mRx.setPatternOptions( mCaseSensitive ? QRegularExpression::NoPatternOption : QRegularExpression::CaseInsensitiveOption );
769  }
770 }
771 
772 QString QgsStringReplacement::process( const QString &input ) const
773 {
774  QString result = input;
775  if ( !mWholeWordOnly )
776  {
777  return result.replace( mMatch, mReplacement, mCaseSensitive ? Qt::CaseSensitive : Qt::CaseInsensitive );
778  }
779  else
780  {
781  return result.replace( mRx, mReplacement );
782  }
783 }
784 
786 {
787  QgsStringMap map;
788  map.insert( QStringLiteral( "match" ), mMatch );
789  map.insert( QStringLiteral( "replace" ), mReplacement );
790  map.insert( QStringLiteral( "caseSensitive" ), mCaseSensitive ? QStringLiteral( "1" ) : QStringLiteral( "0" ) );
791  map.insert( QStringLiteral( "wholeWord" ), mWholeWordOnly ? QStringLiteral( "1" ) : QStringLiteral( "0" ) );
792  return map;
793 }
794 
796 {
797  return QgsStringReplacement( properties.value( QStringLiteral( "match" ) ),
798  properties.value( QStringLiteral( "replace" ) ),
799  properties.value( QStringLiteral( "caseSensitive" ), QStringLiteral( "0" ) ) == QLatin1String( "1" ),
800  properties.value( QStringLiteral( "wholeWord" ), QStringLiteral( "0" ) ) == QLatin1String( "1" ) );
801 }
802 
803 QString QgsStringReplacementCollection::process( const QString &input ) const
804 {
805  QString result = input;
806  for ( const QgsStringReplacement &r : mReplacements )
807  {
808  result = r.process( result );
809  }
810  return result;
811 }
812 
813 void QgsStringReplacementCollection::writeXml( QDomElement &elem, QDomDocument &doc ) const
814 {
815  for ( const QgsStringReplacement &r : mReplacements )
816  {
817  QgsStringMap props = r.properties();
818  QDomElement propEl = doc.createElement( QStringLiteral( "replacement" ) );
819  QgsStringMap::const_iterator it = props.constBegin();
820  for ( ; it != props.constEnd(); ++it )
821  {
822  propEl.setAttribute( it.key(), it.value() );
823  }
824  elem.appendChild( propEl );
825  }
826 }
827 
828 void QgsStringReplacementCollection::readXml( const QDomElement &elem )
829 {
830  mReplacements.clear();
831  QDomNodeList nodelist = elem.elementsByTagName( QStringLiteral( "replacement" ) );
832  for ( int i = 0; i < nodelist.count(); i++ )
833  {
834  QDomElement replacementElem = nodelist.at( i ).toElement();
835  QDomNamedNodeMap nodeMap = replacementElem.attributes();
836 
837  QgsStringMap props;
838  for ( int j = 0; j < nodeMap.count(); ++j )
839  {
840  props.insert( nodeMap.item( j ).nodeName(), nodeMap.item( j ).nodeValue() );
841  }
842  mReplacements << QgsStringReplacement::fromProperties( props );
843  }
844 
845 }
QgsStringUtils::insertLinks
static QString insertLinks(const QString &string, bool *foundLinks=nullptr)
Returns a string with any URL (e.g., http(s)/ftp) and mailto: text converted to valid HTML <a ....
Definition: qgsstringutils.cpp:522
QgsStringReplacementCollection::readXml
void readXml(const QDomElement &elem)
Reads the collection state from an XML element.
Definition: qgsstringutils.cpp:828
QgsStringUtils::capitalize
static QString capitalize(const QString &string, Qgis::Capitalization capitalization)
Converts a string by applying capitalization rules to the string.
Definition: qgsstringutils.cpp:24
Qgis::Capitalization::AllUppercase
@ AllUppercase
Convert all characters to uppercase.
qgsstringutils.h
QgsStringUtils::ampersandEncode
static QString ampersandEncode(const QString &string)
Makes a raw string safe for inclusion as a HTML/XML string literal.
Definition: qgsstringutils.cpp:118
QgsStringUtils::hammingDistance
static int hammingDistance(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the Hamming distance between two strings.
Definition: qgsstringutils.cpp:283
QgsStringUtils::truncateMiddleOfString
static QString truncateMiddleOfString(const QString &string, int maxLength)
Truncates a string to the specified maximum character length.
Definition: qgsstringutils.cpp:743
QgsStringUtils::qRegExpEscape
static QString qRegExpEscape(const QString &string)
Returns an escaped string matching the behavior of QRegExp::escape.
Definition: qgsstringutils.cpp:710
FUZZY_SCORE_WORD_MATCH
#define FUZZY_SCORE_WORD_MATCH
Definition: qgsstringutils.h:27
Qgis::Capitalization::TitleCase
@ TitleCase
Simple title case conversion - does not fully grammatically parse the text and uses simple rules only...
QgsStringReplacement::process
QString process(const QString &input) const
Processes a given input string, applying any valid replacements which should be made.
Definition: qgsstringutils.cpp:772
QgsStringUtils::substituteVerticalCharacters
static QString substituteVerticalCharacters(QString string)
Returns a string with characters having vertical representation form substituted.
Definition: qgsstringutils.cpp:688
QgsStringReplacementCollection::process
QString process(const QString &input) const
Processes a given input string, applying any valid replacements which should be made using QgsStringR...
Definition: qgsstringutils.cpp:803
Qgis::Capitalization::AllLowercase
@ AllLowercase
Convert all characters to lowercase.
Qgis::Capitalization::MixedCase
@ MixedCase
Mixed case, ie no change.
Qgis::Capitalization
Capitalization
String capitalization options.
Definition: qgis.h:1592
QgsStringReplacement
A representation of a single string replacement.
Definition: qgsstringutils.h:38
Qgis::Capitalization::SmallCaps
@ SmallCaps
Mixed case small caps (since QGIS 3.24)
QgsStringUtils::fuzzyScore
static double fuzzyScore(const QString &candidate, const QString &search)
Tests a candidate string to see how likely it is a match for a specified search string.
Definition: qgsstringutils.cpp:418
Qgis::Capitalization::AllSmallCaps
@ AllSmallCaps
Force all characters to small caps (since QGIS 3.24)
QgsStringReplacement::QgsStringReplacement
QgsStringReplacement(const QString &match, const QString &replacement, bool caseSensitive=false, bool wholeWordOnly=false)
Constructor for QgsStringReplacement.
Definition: qgsstringutils.cpp:759
Qgis::Capitalization::ForceFirstLetterToCapital
@ ForceFirstLetterToCapital
Convert just the first letter of each word to uppercase, leave the rest untouched.
QgsStringUtils::soundex
static QString soundex(const QString &string)
Returns the Soundex representation of a string.
Definition: qgsstringutils.cpp:322
QgsStringMap
QMap< QString, QString > QgsStringMap
Definition: qgis.h:2781
QgsStringUtils::wordWrap
static QString wordWrap(const QString &string, int length, bool useMaxLineLength=true, const QString &customDelimiter=QString())
Automatically wraps a string by inserting new line characters at appropriate locations in the string.
Definition: qgsstringutils.cpp:602
QgsStringUtils::isUrl
static bool isUrl(const QString &string)
Returns whether the string is a URL (http,https,ftp,file)
Definition: qgsstringutils.cpp:568
Qgis::Capitalization::UpperCamelCase
@ UpperCamelCase
Convert the string to upper camel case. Note that this method does not unaccent characters.
FUZZY_SCORE_NEW_MATCH
#define FUZZY_SCORE_NEW_MATCH
Definition: qgsstringutils.h:28
QgsStringUtils::levenshteinDistance
static int levenshteinDistance(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the Levenshtein edit distance between two strings.
Definition: qgsstringutils.cpp:138
QgsStringReplacement::fromProperties
static QgsStringReplacement fromProperties(const QgsStringMap &properties)
Creates a new QgsStringReplacement from an encoded properties map.
Definition: qgsstringutils.cpp:795
QgsStringReplacementCollection::writeXml
void writeXml(QDomElement &elem, QDomDocument &doc) const
Writes the collection state to an XML element.
Definition: qgsstringutils.cpp:813
qgslogger.h
FUZZY_SCORE_CONSECUTIVE_MATCH
#define FUZZY_SCORE_CONSECUTIVE_MATCH
Definition: qgsstringutils.h:29
QgsStringReplacement::properties
QgsStringMap properties() const
Returns a map of the replacement properties.
Definition: qgsstringutils.cpp:785
QgsStringUtils::htmlToMarkdown
static QString htmlToMarkdown(const QString &html)
Convert simple HTML to markdown.
Definition: qgsstringutils.cpp:574
QgsStringUtils::longestCommonSubstring
static QString longestCommonSubstring(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the longest common substring between two strings.
Definition: qgsstringutils.cpp:220