QGIS API Documentation 3.34.0-Prizren (ffbdd678812)
Loading...
Searching...
No Matches
qgsstringutils.cpp
Go to the documentation of this file.
1/***************************************************************************
2 qgsstringutils.cpp
3 ------------------
4 begin : June 2015
5 copyright : (C) 2015 by Nyall Dawson
6 email : nyall dot dawson at gmail dot com
7 ***************************************************************************
8 * *
9 * This program is free software; you can redistribute it and/or modify *
10 * it under the terms of the GNU General Public License as published by *
11 * the Free Software Foundation; either version 2 of the License, or *
12 * (at your option) any later version. *
13 * *
14 ***************************************************************************/
15
16#include "qgsstringutils.h"
17#include "qgslogger.h"
18#include <QVector>
19#include <QStringList>
20#include <QTextBoundaryFinder>
21#include <QRegularExpression>
22#include <cstdlib> // for std::abs
23
24QString QgsStringUtils::capitalize( const QString &string, Qgis::Capitalization capitalization )
25{
26 if ( string.isEmpty() )
27 return QString();
28
29 switch ( capitalization )
30 {
33 return string;
34
36 return string.toUpper();
37
40 return string.toLower();
41
43 {
44 QString temp = string;
45
46 QTextBoundaryFinder wordSplitter( QTextBoundaryFinder::Word, string.constData(), string.length(), nullptr, 0 );
47 QTextBoundaryFinder letterSplitter( QTextBoundaryFinder::Grapheme, string.constData(), string.length(), nullptr, 0 );
48
49 wordSplitter.setPosition( 0 );
50 bool first = true;
51 while ( ( first && wordSplitter.boundaryReasons() & QTextBoundaryFinder::StartOfItem )
52 || wordSplitter.toNextBoundary() >= 0 )
53 {
54 first = false;
55 letterSplitter.setPosition( wordSplitter.position() );
56 letterSplitter.toNextBoundary();
57 QString substr = string.mid( wordSplitter.position(), letterSplitter.position() - wordSplitter.position() );
58 temp.replace( wordSplitter.position(), substr.length(), substr.toUpper() );
59 }
60 return temp;
61 }
62
64 {
65 // yes, this is MASSIVELY simplifying the problem!!
66
67 static QStringList smallWords;
68 static QStringList newPhraseSeparators;
69 static QRegularExpression splitWords;
70 if ( smallWords.empty() )
71 {
72 smallWords = QObject::tr( "a|an|and|as|at|but|by|en|for|if|in|nor|of|on|or|per|s|the|to|vs.|vs|via" ).split( '|' );
73 newPhraseSeparators = QObject::tr( ".|:" ).split( '|' );
74 splitWords = QRegularExpression( QStringLiteral( "\\b" ), QRegularExpression::UseUnicodePropertiesOption );
75 }
76
77 const bool allSameCase = string.toLower() == string || string.toUpper() == string;
78#if QT_VERSION < QT_VERSION_CHECK(5, 15, 0)
79 const QStringList parts = ( allSameCase ? string.toLower() : string ).split( splitWords, QString::SkipEmptyParts );
80#else
81 const QStringList parts = ( allSameCase ? string.toLower() : string ).split( splitWords, Qt::SkipEmptyParts );
82#endif
83 QString result;
84 bool firstWord = true;
85 int i = 0;
86 int lastWord = parts.count() - 1;
87 for ( const QString &word : std::as_const( parts ) )
88 {
89 if ( newPhraseSeparators.contains( word.trimmed() ) )
90 {
91 firstWord = true;
92 result += word;
93 }
94 else if ( firstWord || ( i == lastWord ) || !smallWords.contains( word ) )
95 {
96 result += word.at( 0 ).toUpper() + word.mid( 1 );
97 firstWord = false;
98 }
99 else
100 {
101 result += word;
102 }
103 i++;
104 }
105 return result;
106 }
107
109 QString result = QgsStringUtils::capitalize( string.toLower(), Qgis::Capitalization::ForceFirstLetterToCapital ).simplified();
110 result.remove( ' ' );
111 return result;
112 }
113 // no warnings
114 return string;
115}
116
117// original code from http://www.qtcentre.org/threads/52456-HTML-Unicode-ampersand-encoding
118QString QgsStringUtils::ampersandEncode( const QString &string )
119{
120 QString encoded;
121 for ( int i = 0; i < string.size(); ++i )
122 {
123 QChar ch = string.at( i );
124 if ( ch.unicode() > 160 )
125 encoded += QStringLiteral( "&#%1;" ).arg( static_cast< int >( ch.unicode() ) );
126 else if ( ch.unicode() == 38 )
127 encoded += QLatin1String( "&amp;" );
128 else if ( ch.unicode() == 60 )
129 encoded += QLatin1String( "&lt;" );
130 else if ( ch.unicode() == 62 )
131 encoded += QLatin1String( "&gt;" );
132 else
133 encoded += ch;
134 }
135 return encoded;
136}
137
138int QgsStringUtils::levenshteinDistance( const QString &string1, const QString &string2, bool caseSensitive )
139{
140 int length1 = string1.length();
141 int length2 = string2.length();
142
143 //empty strings? solution is trivial...
144 if ( string1.isEmpty() )
145 {
146 return length2;
147 }
148 else if ( string2.isEmpty() )
149 {
150 return length1;
151 }
152
153 //handle case sensitive flag (or not)
154 QString s1( caseSensitive ? string1 : string1.toLower() );
155 QString s2( caseSensitive ? string2 : string2.toLower() );
156
157 const QChar *s1Char = s1.constData();
158 const QChar *s2Char = s2.constData();
159
160 //strip out any common prefix
161 int commonPrefixLen = 0;
162 while ( length1 > 0 && length2 > 0 && *s1Char == *s2Char )
163 {
164 commonPrefixLen++;
165 length1--;
166 length2--;
167 s1Char++;
168 s2Char++;
169 }
170
171 //strip out any common suffix
172 while ( length1 > 0 && length2 > 0 && s1.at( commonPrefixLen + length1 - 1 ) == s2.at( commonPrefixLen + length2 - 1 ) )
173 {
174 length1--;
175 length2--;
176 }
177
178 //fully checked either string? if so, the answer is easy...
179 if ( length1 == 0 )
180 {
181 return length2;
182 }
183 else if ( length2 == 0 )
184 {
185 return length1;
186 }
187
188 //ensure the inner loop is longer
189 if ( length1 > length2 )
190 {
191 std::swap( s1, s2 );
192 std::swap( length1, length2 );
193 }
194
195 //levenshtein algorithm begins here
196 std::vector< int > col( length2 + 1, 0 );
197 std::vector< int > prevCol;
198 prevCol.reserve( length2 + 1 );
199 for ( int i = 0; i < length2 + 1; ++i )
200 {
201 prevCol.emplace_back( i );
202 }
203 const QChar *s2start = s2Char;
204 for ( int i = 0; i < length1; ++i )
205 {
206 col[0] = i + 1;
207 s2Char = s2start;
208 for ( int j = 0; j < length2; ++j )
209 {
210 col[j + 1] = std::min( std::min( 1 + col[j], 1 + prevCol[1 + j] ), prevCol[j] + ( ( *s1Char == *s2Char ) ? 0 : 1 ) );
211 s2Char++;
212 }
213 col.swap( prevCol );
214 s1Char++;
215 }
216 return prevCol[length2];
217}
218
219QString QgsStringUtils::longestCommonSubstring( const QString &string1, const QString &string2, bool caseSensitive )
220{
221 if ( string1.isEmpty() || string2.isEmpty() )
222 {
223 //empty strings, solution is trivial...
224 return QString();
225 }
226
227 //handle case sensitive flag (or not)
228 QString s1( caseSensitive ? string1 : string1.toLower() );
229 QString s2( caseSensitive ? string2 : string2.toLower() );
230
231 if ( s1 == s2 )
232 {
233 //another trivial case, identical strings
234 return s1;
235 }
236
237 int *currentScores = new int [ s2.length()];
238 int *previousScores = new int [ s2.length()];
239 int maxCommonLength = 0;
240 int lastMaxBeginIndex = 0;
241
242 const QChar *s1Char = s1.constData();
243 const QChar *s2Char = s2.constData();
244 const QChar *s2Start = s2Char;
245
246 for ( int i = 0; i < s1.length(); ++i )
247 {
248 for ( int j = 0; j < s2.length(); ++j )
249 {
250 if ( *s1Char != *s2Char )
251 {
252 currentScores[j] = 0;
253 }
254 else
255 {
256 if ( i == 0 || j == 0 )
257 {
258 currentScores[j] = 1;
259 }
260 else
261 {
262 currentScores[j] = 1 + previousScores[j - 1];
263 }
264
265 if ( maxCommonLength < currentScores[j] )
266 {
267 maxCommonLength = currentScores[j];
268 lastMaxBeginIndex = i;
269 }
270 }
271 s2Char++;
272 }
273 std::swap( currentScores, previousScores );
274 s1Char++;
275 s2Char = s2Start;
276 }
277 delete [] currentScores;
278 delete [] previousScores;
279 return string1.mid( lastMaxBeginIndex - maxCommonLength + 1, maxCommonLength );
280}
281
282int QgsStringUtils::hammingDistance( const QString &string1, const QString &string2, bool caseSensitive )
283{
284 if ( string1.isEmpty() && string2.isEmpty() )
285 {
286 //empty strings, solution is trivial...
287 return 0;
288 }
289
290 if ( string1.length() != string2.length() )
291 {
292 //invalid inputs
293 return -1;
294 }
295
296 //handle case sensitive flag (or not)
297 QString s1( caseSensitive ? string1 : string1.toLower() );
298 QString s2( caseSensitive ? string2 : string2.toLower() );
299
300 if ( s1 == s2 )
301 {
302 //another trivial case, identical strings
303 return 0;
304 }
305
306 int distance = 0;
307 const QChar *s1Char = s1.constData();
308 const QChar *s2Char = s2.constData();
309
310 for ( int i = 0; i < string1.length(); ++i )
311 {
312 if ( *s1Char != *s2Char )
313 distance++;
314 s1Char++;
315 s2Char++;
316 }
317
318 return distance;
319}
320
321QString QgsStringUtils::soundex( const QString &string )
322{
323 if ( string.isEmpty() )
324 return QString();
325
326 QString tmp = string.toUpper();
327
328 //strip non character codes, and vowel like characters after the first character
329 QChar *char1 = tmp.data();
330 QChar *char2 = tmp.data();
331 int outLen = 0;
332 for ( int i = 0; i < tmp.length(); ++i, ++char2 )
333 {
334 if ( ( *char2 ).unicode() >= 0x41 && ( *char2 ).unicode() <= 0x5A && ( i == 0 || ( ( *char2 ).unicode() != 0x41 && ( *char2 ).unicode() != 0x45
335 && ( *char2 ).unicode() != 0x48 && ( *char2 ).unicode() != 0x49
336 && ( *char2 ).unicode() != 0x4F && ( *char2 ).unicode() != 0x55
337 && ( *char2 ).unicode() != 0x57 && ( *char2 ).unicode() != 0x59 ) ) )
338 {
339 *char1 = *char2;
340 char1++;
341 outLen++;
342 }
343 }
344 tmp.truncate( outLen );
345
346 QChar *tmpChar = tmp.data();
347 tmpChar++;
348 for ( int i = 1; i < tmp.length(); ++i, ++tmpChar )
349 {
350 switch ( ( *tmpChar ).unicode() )
351 {
352 case 0x42:
353 case 0x46:
354 case 0x50:
355 case 0x56:
356 tmp.replace( i, 1, QChar( 0x31 ) );
357 break;
358
359 case 0x43:
360 case 0x47:
361 case 0x4A:
362 case 0x4B:
363 case 0x51:
364 case 0x53:
365 case 0x58:
366 case 0x5A:
367 tmp.replace( i, 1, QChar( 0x32 ) );
368 break;
369
370 case 0x44:
371 case 0x54:
372 tmp.replace( i, 1, QChar( 0x33 ) );
373 break;
374
375 case 0x4C:
376 tmp.replace( i, 1, QChar( 0x34 ) );
377 break;
378
379 case 0x4D:
380 case 0x4E:
381 tmp.replace( i, 1, QChar( 0x35 ) );
382 break;
383
384 case 0x52:
385 tmp.replace( i, 1, QChar( 0x36 ) );
386 break;
387 }
388 }
389
390 //remove adjacent duplicates
391 char1 = tmp.data();
392 char2 = tmp.data();
393 char2++;
394 outLen = 1;
395 for ( int i = 1; i < tmp.length(); ++i, ++char2 )
396 {
397 if ( *char2 != *char1 )
398 {
399 char1++;
400 *char1 = *char2;
401 outLen++;
402 if ( outLen == 4 )
403 break;
404 }
405 }
406 tmp.truncate( outLen );
407 if ( tmp.length() < 4 )
408 {
409 tmp.append( "000" );
410 tmp.truncate( 4 );
411 }
412
413 return tmp;
414}
415
416
417double QgsStringUtils::fuzzyScore( const QString &candidate, const QString &search )
418{
419 QString candidateNormalized = candidate.simplified().normalized( QString:: NormalizationForm_C ).toLower();
420 QString searchNormalized = search.simplified().normalized( QString:: NormalizationForm_C ).toLower();
421
422 int candidateLength = candidateNormalized.length();
423 int searchLength = searchNormalized.length();
424 int score = 0;
425
426 // if the candidate and the search term are empty, no other option than 0 score
427 if ( candidateLength == 0 || searchLength == 0 )
428 return score;
429
430 int candidateIdx = 0;
431 int searchIdx = 0;
432 // there is always at least one word
433 int maxScore = FUZZY_SCORE_WORD_MATCH;
434
435 bool isPreviousIndexMatching = false;
436 bool isWordOpen = true;
437
438 // loop trough each candidate char and calculate the potential max score
439 while ( candidateIdx < candidateLength )
440 {
441 QChar candidateChar = candidateNormalized[ candidateIdx++ ];
442 bool isCandidateCharWordEnd = candidateChar == ' ' || candidateChar.isPunct();
443
444 // the first char is always the default score
445 if ( candidateIdx == 1 )
446 maxScore += FUZZY_SCORE_NEW_MATCH;
447 // every space character or underscore is a opportunity for a new word
448 else if ( isCandidateCharWordEnd )
449 maxScore += FUZZY_SCORE_WORD_MATCH;
450 // potentially we can match every other character
451 else
453
454 // we looped through all the characters
455 if ( searchIdx >= searchLength )
456 continue;
457
458 QChar searchChar = searchNormalized[ searchIdx ];
459 bool isSearchCharWordEnd = searchChar == ' ' || searchChar.isPunct();
460
461 // match!
462 if ( candidateChar == searchChar || ( isCandidateCharWordEnd && isSearchCharWordEnd ) )
463 {
464 searchIdx++;
465
466 // if we have just successfully finished a word, give higher score
467 if ( isSearchCharWordEnd )
468 {
469 if ( isWordOpen )
470 score += FUZZY_SCORE_WORD_MATCH;
471 else if ( isPreviousIndexMatching )
473 else
474 score += FUZZY_SCORE_NEW_MATCH;
475
476 isWordOpen = true;
477 }
478 // if we have consecutive characters matching, give higher score
479 else if ( isPreviousIndexMatching )
480 {
482 }
483 // normal score for new independent character that matches
484 else
485 {
486 score += FUZZY_SCORE_NEW_MATCH;
487 }
488
489 isPreviousIndexMatching = true;
490 }
491 // if the current character does NOT match, we are sure we cannot build a word for now
492 else
493 {
494 isPreviousIndexMatching = false;
495 isWordOpen = false;
496 }
497
498 // if the search string is covered, check if the last match is end of word
499 if ( searchIdx >= searchLength )
500 {
501 bool isEndOfWord = ( candidateIdx >= candidateLength )
502 ? true
503 : candidateNormalized[candidateIdx] == ' ' || candidateNormalized[candidateIdx].isPunct();
504
505 if ( isEndOfWord )
506 score += FUZZY_SCORE_WORD_MATCH;
507 }
508
509 // QgsLogger::debug( QStringLiteral( "TMP: %1 | %2 | %3 | %4 | %5" ).arg( candidateChar, searchChar, QString::number(score), QString::number(isCandidateCharWordEnd), QString::number(isSearchCharWordEnd) ) + QStringLiteral( __FILE__ ) );
510 }
511
512 // QgsLogger::debug( QStringLiteral( "RES: %1 | %2" ).arg( QString::number(maxScore), QString::number(score) ) + QStringLiteral( __FILE__ ) );
513 // we didn't loop through all the search chars, it means, that they are not present in the current candidate
514 if ( searchIdx < searchLength )
515 score = 0;
516
517 return static_cast<float>( std::max( score, 0 ) ) / std::max( maxScore, 1 );
518}
519
520
521QString QgsStringUtils::insertLinks( const QString &string, bool *foundLinks )
522{
523 QString converted = string;
524
525 // http://alanstorm.com/url_regex_explained
526 // note - there's more robust implementations available
527 const thread_local QRegularExpression urlRegEx( QStringLiteral( "(\\b(([\\w-]+://?|www[.])[^\\s()<>]+(?:\\([\\w\\d]+\\)|([^!\"#$%&'()*+,\\-./:;<=>?@[\\\\\\]^_`{|}~\\s]|/))))" ) );
528 const thread_local QRegularExpression protoRegEx( QStringLiteral( "^(?:f|ht)tps?://|file://" ) );
529 const thread_local QRegularExpression emailRegEx( QStringLiteral( "([\\w._%+-]+@[\\w.-]+\\.[A-Za-z]+)" ) );
530
531 int offset = 0;
532 bool found = false;
533 QRegularExpressionMatch match = urlRegEx.match( converted );
534 while ( match.hasMatch() )
535 {
536 found = true;
537 QString url = match.captured( 1 );
538 QString protoUrl = url;
539 if ( !protoRegEx.match( protoUrl ).hasMatch() )
540 {
541 protoUrl.prepend( "http://" );
542 }
543 QString anchor = QStringLiteral( "<a href=\"%1\">%2</a>" ).arg( protoUrl.toHtmlEscaped(), url.toHtmlEscaped() );
544 converted.replace( match.capturedStart( 1 ), url.length(), anchor );
545 offset = match.capturedStart( 1 ) + anchor.length();
546 match = urlRegEx.match( converted, offset );
547 }
548
549 offset = 0;
550 match = emailRegEx.match( converted );
551 while ( match.hasMatch() )
552 {
553 found = true;
554 QString email = match.captured( 1 );
555 QString anchor = QStringLiteral( "<a href=\"mailto:%1\">%1</a>" ).arg( email.toHtmlEscaped() );
556 converted.replace( match.capturedStart( 1 ), email.length(), anchor );
557 offset = match.capturedStart( 1 ) + anchor.length();
558 match = emailRegEx.match( converted, offset );
559 }
560
561 if ( foundLinks )
562 *foundLinks = found;
563
564 return converted;
565}
566
567bool QgsStringUtils::isUrl( const QString &string )
568{
569 const thread_local QRegularExpression rxUrl( QStringLiteral( "^(http|https|ftp|file)://\\S+$" ) );
570 return rxUrl.match( string ).hasMatch();
571}
572
573QString QgsStringUtils::htmlToMarkdown( const QString &html )
574{
575 // Any changes in this function must be copied to qgscrashreport.cpp too
576 QString converted = html;
577 converted.replace( QLatin1String( "<br>" ), QLatin1String( "\n" ) );
578 converted.replace( QLatin1String( "<b>" ), QLatin1String( "**" ) );
579 converted.replace( QLatin1String( "</b>" ), QLatin1String( "**" ) );
580 converted.replace( QLatin1String( "<pre>" ), QLatin1String( "\n```\n" ) );
581 converted.replace( QLatin1String( "</pre>" ), QLatin1String( "```\n" ) );
582
583 const thread_local QRegularExpression hrefRegEx( QStringLiteral( "<a\\s+href\\s*=\\s*([^<>]*)\\s*>([^<>]*)</a>" ) );
584
585 int offset = 0;
586 QRegularExpressionMatch match = hrefRegEx.match( converted );
587 while ( match.hasMatch() )
588 {
589 QString url = match.captured( 1 ).replace( QLatin1String( "\"" ), QString() );
590 url.replace( '\'', QString() );
591 QString name = match.captured( 2 );
592 QString anchor = QStringLiteral( "[%1](%2)" ).arg( name, url );
593 converted.replace( match.capturedStart(), match.capturedLength(), anchor );
594 offset = match.capturedStart() + anchor.length();
595 match = hrefRegEx.match( converted, offset );
596 }
597
598 return converted;
599}
600
601QString QgsStringUtils::wordWrap( const QString &string, const int length, const bool useMaxLineLength, const QString &customDelimiter )
602{
603 if ( string.isEmpty() || length == 0 )
604 return string;
605
606 QString newstr;
607 QRegularExpression rx;
608 int delimiterLength = 0;
609
610 if ( !customDelimiter.isEmpty() )
611 {
612 rx.setPattern( QRegularExpression::escape( customDelimiter ) );
613 delimiterLength = customDelimiter.length();
614 }
615 else
616 {
617 // \x{200B} is a ZERO-WIDTH SPACE, needed for worwrap to support a number of complex scripts (Indic, Arabic, etc.)
618 rx.setPattern( QStringLiteral( "[\\x{200B}\\s]" ) );
619 delimiterLength = 1;
620 }
621
622 const QStringList lines = string.split( '\n' );
623 int strLength, strCurrent, strHit, lastHit;
624
625 for ( int i = 0; i < lines.size(); i++ )
626 {
627 const QString line = lines.at( i );
628 strLength = line.length();
629 if ( strLength <= length )
630 {
631 // shortcut, no wrapping required
632 newstr.append( line );
633 if ( i < lines.size() - 1 )
634 newstr.append( '\n' );
635 continue;
636 }
637 strCurrent = 0;
638 strHit = 0;
639 lastHit = 0;
640
641 while ( strCurrent < strLength )
642 {
643 // positive wrap value = desired maximum line width to wrap
644 // negative wrap value = desired minimum line width before wrap
645 if ( useMaxLineLength )
646 {
647 //first try to locate delimiter backwards
648 strHit = ( strCurrent + length >= strLength ) ? -1 : line.lastIndexOf( rx, strCurrent + length );
649 if ( strHit == lastHit || strHit == -1 )
650 {
651 //if no new backward delimiter found, try to locate forward
652 strHit = ( strCurrent + std::abs( length ) >= strLength ) ? -1 : line.indexOf( rx, strCurrent + std::abs( length ) );
653 }
654 lastHit = strHit;
655 }
656 else
657 {
658 strHit = ( strCurrent + std::abs( length ) >= strLength ) ? -1 : line.indexOf( rx, strCurrent + std::abs( length ) );
659 }
660 if ( strHit > -1 )
661 {
662#if QT_VERSION < QT_VERSION_CHECK(5, 15, 2)
663 newstr.append( line.midRef( strCurrent, strHit - strCurrent ) );
664#else
665 newstr.append( QStringView {line} .mid( strCurrent, strHit - strCurrent ) );
666#endif
667 newstr.append( '\n' );
668 strCurrent = strHit + delimiterLength;
669 }
670 else
671 {
672#if QT_VERSION < QT_VERSION_CHECK(5, 15, 2)
673 newstr.append( line.midRef( strCurrent ) );
674#else
675 newstr.append( QStringView {line} .mid( strCurrent ) );
676#endif
677 strCurrent = strLength;
678 }
679 }
680 if ( i < lines.size() - 1 )
681 newstr.append( '\n' );
682 }
683
684 return newstr;
685}
686
688{
689 string = string.replace( ',', QChar( 65040 ) ).replace( QChar( 8229 ), QChar( 65072 ) ); // comma & two-dot leader
690 string = string.replace( QChar( 12289 ), QChar( 65041 ) ).replace( QChar( 12290 ), QChar( 65042 ) ); // ideographic comma & full stop
691 string = string.replace( ':', QChar( 65043 ) ).replace( ';', QChar( 65044 ) );
692 string = string.replace( '!', QChar( 65045 ) ).replace( '?', QChar( 65046 ) );
693 string = string.replace( QChar( 12310 ), QChar( 65047 ) ).replace( QChar( 12311 ), QChar( 65048 ) ); // white lenticular brackets
694 string = string.replace( QChar( 8230 ), QChar( 65049 ) ); // three-dot ellipse
695 string = string.replace( QChar( 8212 ), QChar( 65073 ) ).replace( QChar( 8211 ), QChar( 65074 ) ); // em & en dash
696 string = string.replace( '_', QChar( 65075 ) ).replace( QChar( 65103 ), QChar( 65076 ) ); // low line & wavy low line
697 string = string.replace( '(', QChar( 65077 ) ).replace( ')', QChar( 65078 ) );
698 string = string.replace( '{', QChar( 65079 ) ).replace( '}', QChar( 65080 ) );
699 string = string.replace( '<', QChar( 65087 ) ).replace( '>', QChar( 65088 ) );
700 string = string.replace( '[', QChar( 65095 ) ).replace( ']', QChar( 65096 ) );
701 string = string.replace( QChar( 12308 ), QChar( 65081 ) ).replace( QChar( 12309 ), QChar( 65082 ) ); // tortoise shell brackets
702 string = string.replace( QChar( 12304 ), QChar( 65083 ) ).replace( QChar( 12305 ), QChar( 65084 ) ); // black lenticular brackets
703 string = string.replace( QChar( 12298 ), QChar( 65085 ) ).replace( QChar( 12299 ), QChar( 65086 ) ); // double angle brackets
704 string = string.replace( QChar( 12300 ), QChar( 65089 ) ).replace( QChar( 12301 ), QChar( 65090 ) ); // corner brackets
705 string = string.replace( QChar( 12302 ), QChar( 65091 ) ).replace( QChar( 12303 ), QChar( 65092 ) ); // white corner brackets
706 return string;
707}
708
709QString QgsStringUtils::qRegExpEscape( const QString &string )
710{
711 // code and logic taken from the Qt source code
712 const QLatin1Char backslash( '\\' );
713 const int count = string.count();
714
715 QString escaped;
716 escaped.reserve( count * 2 );
717 for ( int i = 0; i < count; i++ )
718 {
719 switch ( string.at( i ).toLatin1() )
720 {
721 case '$':
722 case '(':
723 case ')':
724 case '*':
725 case '+':
726 case '.':
727 case '?':
728 case '[':
729 case '\\':
730 case ']':
731 case '^':
732 case '{':
733 case '|':
734 case '}':
735 escaped.append( backslash );
736 }
737 escaped.append( string.at( i ) );
738 }
739 return escaped;
740}
741
742QString QgsStringUtils::truncateMiddleOfString( const QString &string, int maxLength )
743{
744 const int charactersToTruncate = string.length() - maxLength;
745 if ( charactersToTruncate <= 0 )
746 return string;
747
748 // note we actually truncate an extra character, as we'll be replacing it with the ... character
749 const int truncateFrom = string.length() / 2 - ( charactersToTruncate + 1 ) / 2;
750
751#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
752 return string.leftRef( truncateFrom ) + QString( QChar( 0x2026 ) ) + string.midRef( truncateFrom + charactersToTruncate + 1 );
753#else
754 return QStringView( string ).first( truncateFrom ) + QString( QChar( 0x2026 ) ) + QStringView( string ).sliced( truncateFrom + charactersToTruncate + 1 );
755#endif
756}
757
758QgsStringReplacement::QgsStringReplacement( const QString &match, const QString &replacement, bool caseSensitive, bool wholeWordOnly )
759 : mMatch( match )
760 , mReplacement( replacement )
761 , mCaseSensitive( caseSensitive )
762 , mWholeWordOnly( wholeWordOnly )
763{
764 if ( mWholeWordOnly )
765 {
766 mRx.setPattern( QStringLiteral( "\\b%1\\b" ).arg( mMatch ) );
767 mRx.setPatternOptions( mCaseSensitive ? QRegularExpression::NoPatternOption : QRegularExpression::CaseInsensitiveOption );
768 }
769}
770
771QString QgsStringReplacement::process( const QString &input ) const
772{
773 QString result = input;
774 if ( !mWholeWordOnly )
775 {
776 return result.replace( mMatch, mReplacement, mCaseSensitive ? Qt::CaseSensitive : Qt::CaseInsensitive );
777 }
778 else
779 {
780 return result.replace( mRx, mReplacement );
781 }
782}
783
785{
786 QgsStringMap map;
787 map.insert( QStringLiteral( "match" ), mMatch );
788 map.insert( QStringLiteral( "replace" ), mReplacement );
789 map.insert( QStringLiteral( "caseSensitive" ), mCaseSensitive ? QStringLiteral( "1" ) : QStringLiteral( "0" ) );
790 map.insert( QStringLiteral( "wholeWord" ), mWholeWordOnly ? QStringLiteral( "1" ) : QStringLiteral( "0" ) );
791 return map;
792}
793
795{
796 return QgsStringReplacement( properties.value( QStringLiteral( "match" ) ),
797 properties.value( QStringLiteral( "replace" ) ),
798 properties.value( QStringLiteral( "caseSensitive" ), QStringLiteral( "0" ) ) == QLatin1String( "1" ),
799 properties.value( QStringLiteral( "wholeWord" ), QStringLiteral( "0" ) ) == QLatin1String( "1" ) );
800}
801
802QString QgsStringReplacementCollection::process( const QString &input ) const
803{
804 QString result = input;
805 for ( const QgsStringReplacement &r : mReplacements )
806 {
807 result = r.process( result );
808 }
809 return result;
810}
811
812void QgsStringReplacementCollection::writeXml( QDomElement &elem, QDomDocument &doc ) const
813{
814 for ( const QgsStringReplacement &r : mReplacements )
815 {
816 QgsStringMap props = r.properties();
817 QDomElement propEl = doc.createElement( QStringLiteral( "replacement" ) );
818 QgsStringMap::const_iterator it = props.constBegin();
819 for ( ; it != props.constEnd(); ++it )
820 {
821 propEl.setAttribute( it.key(), it.value() );
822 }
823 elem.appendChild( propEl );
824 }
825}
826
827void QgsStringReplacementCollection::readXml( const QDomElement &elem )
828{
829 mReplacements.clear();
830 QDomNodeList nodelist = elem.elementsByTagName( QStringLiteral( "replacement" ) );
831 for ( int i = 0; i < nodelist.count(); i++ )
832 {
833 QDomElement replacementElem = nodelist.at( i ).toElement();
834 QDomNamedNodeMap nodeMap = replacementElem.attributes();
835
836 QgsStringMap props;
837 for ( int j = 0; j < nodeMap.count(); ++j )
838 {
839 props.insert( nodeMap.item( j ).nodeName(), nodeMap.item( j ).nodeValue() );
840 }
841 mReplacements << QgsStringReplacement::fromProperties( props );
842 }
843
844}
Capitalization
String capitalization options.
Definition qgis.h:2493
@ AllSmallCaps
Force all characters to small caps (since QGIS 3.24)
@ MixedCase
Mixed case, ie no change.
@ UpperCamelCase
Convert the string to upper camel case. Note that this method does not unaccent characters.
@ AllLowercase
Convert all characters to lowercase.
@ TitleCase
Simple title case conversion - does not fully grammatically parse the text and uses simple rules only...
@ SmallCaps
Mixed case small caps (since QGIS 3.24)
@ ForceFirstLetterToCapital
Convert just the first letter of each word to uppercase, leave the rest untouched.
@ AllUppercase
Convert all characters to uppercase.
void readXml(const QDomElement &elem)
Reads the collection state from an XML element.
QString process(const QString &input) const
Processes a given input string, applying any valid replacements which should be made using QgsStringR...
void writeXml(QDomElement &elem, QDomDocument &doc) const
Writes the collection state to an XML element.
A representation of a single string replacement.
static QgsStringReplacement fromProperties(const QgsStringMap &properties)
Creates a new QgsStringReplacement from an encoded properties map.
QString process(const QString &input) const
Processes a given input string, applying any valid replacements which should be made.
QgsStringReplacement(const QString &match, const QString &replacement, bool caseSensitive=false, bool wholeWordOnly=false)
Constructor for QgsStringReplacement.
QgsStringMap properties() const
Returns a map of the replacement properties.
static int hammingDistance(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the Hamming distance between two strings.
static QString soundex(const QString &string)
Returns the Soundex representation of a string.
static int levenshteinDistance(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the Levenshtein edit distance between two strings.
static QString htmlToMarkdown(const QString &html)
Convert simple HTML to markdown.
static QString longestCommonSubstring(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the longest common substring between two strings.
static QString capitalize(const QString &string, Qgis::Capitalization capitalization)
Converts a string by applying capitalization rules to the string.
static QString substituteVerticalCharacters(QString string)
Returns a string with characters having vertical representation form substituted.
static QString insertLinks(const QString &string, bool *foundLinks=nullptr)
Returns a string with any URL (e.g., http(s)/ftp) and mailto: text converted to valid HTML <a ....
static double fuzzyScore(const QString &candidate, const QString &search)
Tests a candidate string to see how likely it is a match for a specified search string.
static QString qRegExpEscape(const QString &string)
Returns an escaped string matching the behavior of QRegExp::escape.
static QString ampersandEncode(const QString &string)
Makes a raw string safe for inclusion as a HTML/XML string literal.
static QString wordWrap(const QString &string, int length, bool useMaxLineLength=true, const QString &customDelimiter=QString())
Automatically wraps a string by inserting new line characters at appropriate locations in the string.
static bool isUrl(const QString &string)
Returns whether the string is a URL (http,https,ftp,file)
static QString truncateMiddleOfString(const QString &string, int maxLength)
Truncates a string to the specified maximum character length.
QMap< QString, QString > QgsStringMap
Definition qgis.h:4877
#define FUZZY_SCORE_CONSECUTIVE_MATCH
#define FUZZY_SCORE_WORD_MATCH
#define FUZZY_SCORE_NEW_MATCH