QGIS API Documentation 3.99.0-Master (26c88405ac0)
Loading...
Searching...
No Matches
qgsstringutils.cpp
Go to the documentation of this file.
1/***************************************************************************
2 qgsstringutils.cpp
3 ------------------
4 begin : June 2015
5 copyright : (C) 2015 by Nyall Dawson
6 email : nyall dot dawson at gmail dot com
7 ***************************************************************************
8 * *
9 * This program is free software; you can redistribute it and/or modify *
10 * it under the terms of the GNU General Public License as published by *
11 * the Free Software Foundation; either version 2 of the License, or *
12 * (at your option) any later version. *
13 * *
14 ***************************************************************************/
15
16#include "qgsstringutils.h"
17
18#include <cstdlib>
19
20#include "qgslogger.h"
21
22#include <QRegularExpression>
23#include <QStringList>
24#include <QTextBoundaryFinder>
25#include <QVector>
26
27QString QgsStringUtils::capitalize( const QString &string, Qgis::Capitalization capitalization )
28{
29 if ( string.isEmpty() )
30 return QString();
31
32 switch ( capitalization )
33 {
36 return string;
37
39 return string.toUpper();
40
43 return string.toLower();
44
46 {
47 QString temp = string;
48
49 QTextBoundaryFinder wordSplitter( QTextBoundaryFinder::Word, string.constData(), string.length(), nullptr, 0 );
50 QTextBoundaryFinder letterSplitter( QTextBoundaryFinder::Grapheme, string.constData(), string.length(), nullptr, 0 );
51
52 wordSplitter.setPosition( 0 );
53 bool first = true;
54 while ( ( first && wordSplitter.boundaryReasons() & QTextBoundaryFinder::StartOfItem )
55 || wordSplitter.toNextBoundary() >= 0 )
56 {
57 first = false;
58 letterSplitter.setPosition( wordSplitter.position() );
59 ( void )letterSplitter.toNextBoundary();
60 QString substr = string.mid( wordSplitter.position(), letterSplitter.position() - wordSplitter.position() );
61 temp.replace( wordSplitter.position(), substr.length(), substr.toUpper() );
62 }
63 return temp;
64 }
65
67 {
68 // yes, this is MASSIVELY simplifying the problem!!
69
70 static QStringList smallWords;
71 static QStringList newPhraseSeparators;
72 static QRegularExpression splitWords;
73 if ( smallWords.empty() )
74 {
75 smallWords = QObject::tr( "a|an|and|as|at|but|by|en|for|if|in|nor|of|on|or|per|s|the|to|vs.|vs|via" ).split( '|' );
76 newPhraseSeparators = QObject::tr( ".|:" ).split( '|' );
77 splitWords = QRegularExpression( QStringLiteral( "\\b" ), QRegularExpression::UseUnicodePropertiesOption );
78 }
79
80 const bool allSameCase = string.toLower() == string || string.toUpper() == string;
81 const QStringList parts = ( allSameCase ? string.toLower() : string ).split( splitWords, Qt::SkipEmptyParts );
82 QString result;
83 bool firstWord = true;
84 int i = 0;
85 int lastWord = parts.count() - 1;
86 for ( const QString &word : std::as_const( parts ) )
87 {
88 if ( newPhraseSeparators.contains( word.trimmed() ) )
89 {
90 firstWord = true;
91 result += word;
92 }
93 else if ( firstWord || ( i == lastWord ) || !smallWords.contains( word ) )
94 {
95 result += word.at( 0 ).toUpper() + word.mid( 1 );
96 firstWord = false;
97 }
98 else
99 {
100 result += word;
101 }
102 i++;
103 }
104 return result;
105 }
106
108 QString result = QgsStringUtils::capitalize( string.toLower(), Qgis::Capitalization::ForceFirstLetterToCapital ).simplified();
109 result.remove( ' ' );
110 return result;
111 }
112 // no warnings
113 return string;
114}
115
116// original code from http://www.qtcentre.org/threads/52456-HTML-Unicode-ampersand-encoding
117QString QgsStringUtils::ampersandEncode( const QString &string )
118{
119 QString encoded;
120 for ( int i = 0; i < string.size(); ++i )
121 {
122 QChar ch = string.at( i );
123 if ( ch.unicode() > 160 )
124 encoded += QStringLiteral( "&#%1;" ).arg( static_cast< int >( ch.unicode() ) );
125 else if ( ch.unicode() == 38 )
126 encoded += QLatin1String( "&amp;" );
127 else if ( ch.unicode() == 60 )
128 encoded += QLatin1String( "&lt;" );
129 else if ( ch.unicode() == 62 )
130 encoded += QLatin1String( "&gt;" );
131 else
132 encoded += ch;
133 }
134 return encoded;
135}
136
137int QgsStringUtils::levenshteinDistance( const QString &string1, const QString &string2, bool caseSensitive )
138{
139 int length1 = string1.length();
140 int length2 = string2.length();
141
142 //empty strings? solution is trivial...
143 if ( string1.isEmpty() )
144 {
145 return length2;
146 }
147 else if ( string2.isEmpty() )
148 {
149 return length1;
150 }
151
152 //handle case sensitive flag (or not)
153 QString s1( caseSensitive ? string1 : string1.toLower() );
154 QString s2( caseSensitive ? string2 : string2.toLower() );
155
156 const QChar *s1Char = s1.constData();
157 const QChar *s2Char = s2.constData();
158
159 //strip out any common prefix
160 int commonPrefixLen = 0;
161 while ( length1 > 0 && length2 > 0 && *s1Char == *s2Char )
162 {
163 commonPrefixLen++;
164 length1--;
165 length2--;
166 s1Char++;
167 s2Char++;
168 }
169
170 //strip out any common suffix
171 while ( length1 > 0 && length2 > 0 && s1.at( commonPrefixLen + length1 - 1 ) == s2.at( commonPrefixLen + length2 - 1 ) )
172 {
173 length1--;
174 length2--;
175 }
176
177 //fully checked either string? if so, the answer is easy...
178 if ( length1 == 0 )
179 {
180 return length2;
181 }
182 else if ( length2 == 0 )
183 {
184 return length1;
185 }
186
187 //ensure the inner loop is longer
188 if ( length1 > length2 )
189 {
190 std::swap( s1, s2 );
191 std::swap( length1, length2 );
192 }
193
194 //levenshtein algorithm begins here
195 std::vector< int > col( length2 + 1, 0 );
196 std::vector< int > prevCol;
197 prevCol.reserve( length2 + 1 );
198 for ( int i = 0; i < length2 + 1; ++i )
199 {
200 prevCol.emplace_back( i );
201 }
202 const QChar *s2start = s2Char;
203 for ( int i = 0; i < length1; ++i )
204 {
205 col[0] = i + 1;
206 s2Char = s2start;
207 for ( int j = 0; j < length2; ++j )
208 {
209 col[j + 1] = std::min( std::min( 1 + col[j], 1 + prevCol[1 + j] ), prevCol[j] + ( ( *s1Char == *s2Char ) ? 0 : 1 ) );
210 s2Char++;
211 }
212 col.swap( prevCol );
213 s1Char++;
214 }
215 return prevCol[length2];
216}
217
218QString QgsStringUtils::longestCommonSubstring( const QString &string1, const QString &string2, bool caseSensitive )
219{
220 if ( string1.isEmpty() || string2.isEmpty() )
221 {
222 //empty strings, solution is trivial...
223 return QString();
224 }
225
226 //handle case sensitive flag (or not)
227 QString s1( caseSensitive ? string1 : string1.toLower() );
228 QString s2( caseSensitive ? string2 : string2.toLower() );
229
230 if ( s1 == s2 )
231 {
232 //another trivial case, identical strings
233 return s1;
234 }
235
236 int *currentScores = new int [ s2.length()];
237 int *previousScores = new int [ s2.length()];
238 int maxCommonLength = 0;
239 int lastMaxBeginIndex = 0;
240
241 const QChar *s1Char = s1.constData();
242 const QChar *s2Char = s2.constData();
243 const QChar *s2Start = s2Char;
244
245 for ( int i = 0; i < s1.length(); ++i )
246 {
247 for ( int j = 0; j < s2.length(); ++j )
248 {
249 if ( *s1Char != *s2Char )
250 {
251 currentScores[j] = 0;
252 }
253 else
254 {
255 if ( i == 0 || j == 0 )
256 {
257 currentScores[j] = 1;
258 }
259 else
260 {
261 currentScores[j] = 1 + previousScores[j - 1];
262 }
263
264 if ( maxCommonLength < currentScores[j] )
265 {
266 maxCommonLength = currentScores[j];
267 lastMaxBeginIndex = i;
268 }
269 }
270 s2Char++;
271 }
272 std::swap( currentScores, previousScores );
273 s1Char++;
274 s2Char = s2Start;
275 }
276 delete [] currentScores;
277 delete [] previousScores;
278 return string1.mid( lastMaxBeginIndex - maxCommonLength + 1, maxCommonLength );
279}
280
281int QgsStringUtils::hammingDistance( const QString &string1, const QString &string2, bool caseSensitive )
282{
283 if ( string1.isEmpty() && string2.isEmpty() )
284 {
285 //empty strings, solution is trivial...
286 return 0;
287 }
288
289 if ( string1.length() != string2.length() )
290 {
291 //invalid inputs
292 return -1;
293 }
294
295 //handle case sensitive flag (or not)
296 QString s1( caseSensitive ? string1 : string1.toLower() );
297 QString s2( caseSensitive ? string2 : string2.toLower() );
298
299 if ( s1 == s2 )
300 {
301 //another trivial case, identical strings
302 return 0;
303 }
304
305 int distance = 0;
306 const QChar *s1Char = s1.constData();
307 const QChar *s2Char = s2.constData();
308
309 for ( int i = 0; i < string1.length(); ++i )
310 {
311 if ( *s1Char != *s2Char )
312 distance++;
313 s1Char++;
314 s2Char++;
315 }
316
317 return distance;
318}
319
320QString QgsStringUtils::soundex( const QString &string )
321{
322 if ( string.isEmpty() )
323 return QString();
324
325 QString tmp = string.toUpper();
326
327 //strip non character codes, and vowel like characters after the first character
328 QChar *char1 = tmp.data();
329 QChar *char2 = tmp.data();
330 int outLen = 0;
331 for ( int i = 0; i < tmp.length(); ++i, ++char2 )
332 {
333 if ( ( *char2 ).unicode() >= 0x41 && ( *char2 ).unicode() <= 0x5A && ( i == 0 || ( ( *char2 ).unicode() != 0x41 && ( *char2 ).unicode() != 0x45
334 && ( *char2 ).unicode() != 0x48 && ( *char2 ).unicode() != 0x49
335 && ( *char2 ).unicode() != 0x4F && ( *char2 ).unicode() != 0x55
336 && ( *char2 ).unicode() != 0x57 && ( *char2 ).unicode() != 0x59 ) ) )
337 {
338 *char1 = *char2;
339 char1++;
340 outLen++;
341 }
342 }
343 tmp.truncate( outLen );
344
345 QChar *tmpChar = tmp.data();
346 tmpChar++;
347 for ( int i = 1; i < tmp.length(); ++i, ++tmpChar )
348 {
349 switch ( ( *tmpChar ).unicode() )
350 {
351 case 0x42:
352 case 0x46:
353 case 0x50:
354 case 0x56:
355 tmp.replace( i, 1, QChar( 0x31 ) );
356 break;
357
358 case 0x43:
359 case 0x47:
360 case 0x4A:
361 case 0x4B:
362 case 0x51:
363 case 0x53:
364 case 0x58:
365 case 0x5A:
366 tmp.replace( i, 1, QChar( 0x32 ) );
367 break;
368
369 case 0x44:
370 case 0x54:
371 tmp.replace( i, 1, QChar( 0x33 ) );
372 break;
373
374 case 0x4C:
375 tmp.replace( i, 1, QChar( 0x34 ) );
376 break;
377
378 case 0x4D:
379 case 0x4E:
380 tmp.replace( i, 1, QChar( 0x35 ) );
381 break;
382
383 case 0x52:
384 tmp.replace( i, 1, QChar( 0x36 ) );
385 break;
386 }
387 }
388
389 //remove adjacent duplicates
390 char1 = tmp.data();
391 char2 = tmp.data();
392 char2++;
393 outLen = 1;
394 for ( int i = 1; i < tmp.length(); ++i, ++char2 )
395 {
396 if ( *char2 != *char1 )
397 {
398 char1++;
399 *char1 = *char2;
400 outLen++;
401 if ( outLen == 4 )
402 break;
403 }
404 }
405 tmp.truncate( outLen );
406 if ( tmp.length() < 4 )
407 {
408 tmp.append( "000" );
409 tmp.truncate( 4 );
410 }
411
412 return tmp;
413}
414
415
416double QgsStringUtils::fuzzyScore( const QString &candidate, const QString &search )
417{
418 QString candidateNormalized = candidate.simplified().normalized( QString:: NormalizationForm_C ).toLower();
419 QString searchNormalized = search.simplified().normalized( QString:: NormalizationForm_C ).toLower();
420
421 int candidateLength = candidateNormalized.length();
422 int searchLength = searchNormalized.length();
423 int score = 0;
424
425 // if the candidate and the search term are empty, no other option than 0 score
426 if ( candidateLength == 0 || searchLength == 0 )
427 return score;
428
429 int candidateIdx = 0;
430 int searchIdx = 0;
431 // there is always at least one word
432 int maxScore = FUZZY_SCORE_WORD_MATCH;
433
434 bool isPreviousIndexMatching = false;
435 bool isWordOpen = true;
436
437 // loop trough each candidate char and calculate the potential max score
438 while ( candidateIdx < candidateLength )
439 {
440 QChar candidateChar = candidateNormalized[ candidateIdx++ ];
441 bool isCandidateCharWordEnd = candidateChar == ' ' || candidateChar.isPunct();
442
443 // the first char is always the default score
444 if ( candidateIdx == 1 )
445 maxScore += FUZZY_SCORE_NEW_MATCH;
446 // every space character or underscore is a opportunity for a new word
447 else if ( isCandidateCharWordEnd )
448 maxScore += FUZZY_SCORE_WORD_MATCH;
449 // potentially we can match every other character
450 else
452
453 // we looped through all the characters
454 if ( searchIdx >= searchLength )
455 continue;
456
457 QChar searchChar = searchNormalized[ searchIdx ];
458 bool isSearchCharWordEnd = searchChar == ' ' || searchChar.isPunct();
459
460 // match!
461 if ( candidateChar == searchChar || ( isCandidateCharWordEnd && isSearchCharWordEnd ) )
462 {
463 searchIdx++;
464
465 // if we have just successfully finished a word, give higher score
466 if ( isSearchCharWordEnd )
467 {
468 if ( isWordOpen )
469 score += FUZZY_SCORE_WORD_MATCH;
470 else if ( isPreviousIndexMatching )
472 else
473 score += FUZZY_SCORE_NEW_MATCH;
474
475 isWordOpen = true;
476 }
477 // if we have consecutive characters matching, give higher score
478 else if ( isPreviousIndexMatching )
479 {
481 }
482 // normal score for new independent character that matches
483 else
484 {
485 score += FUZZY_SCORE_NEW_MATCH;
486 }
487
488 isPreviousIndexMatching = true;
489 }
490 // if the current character does NOT match, we are sure we cannot build a word for now
491 else
492 {
493 isPreviousIndexMatching = false;
494 isWordOpen = false;
495 }
496
497 // if the search string is covered, check if the last match is end of word
498 if ( searchIdx >= searchLength )
499 {
500 bool isEndOfWord = ( candidateIdx >= candidateLength )
501 ? true
502 : candidateNormalized[candidateIdx] == ' ' || candidateNormalized[candidateIdx].isPunct();
503
504 if ( isEndOfWord )
505 score += FUZZY_SCORE_WORD_MATCH;
506 }
507
508 // QgsLogger::debug( QStringLiteral( "TMP: %1 | %2 | %3 | %4 | %5" ).arg( candidateChar, searchChar, QString::number(score), QString::number(isCandidateCharWordEnd), QString::number(isSearchCharWordEnd) ) + QStringLiteral( __FILE__ ) );
509 }
510
511 // QgsLogger::debug( QStringLiteral( "RES: %1 | %2" ).arg( QString::number(maxScore), QString::number(score) ) + QStringLiteral( __FILE__ ) );
512 // we didn't loop through all the search chars, it means, that they are not present in the current candidate
513 if ( searchIdx < searchLength )
514 score = 0;
515
516 return static_cast<float>( std::max( score, 0 ) ) / std::max( maxScore, 1 );
517}
518
519
520QString QgsStringUtils::insertLinks( const QString &string, bool *foundLinks )
521{
522 QString converted = string;
523
524 // http://alanstorm.com/url_regex_explained
525 // note - there's more robust implementations available
526 const thread_local QRegularExpression urlRegEx( QStringLiteral( "((?:(?:http|https|ftp|file)://[^\\s]+[^\\s,.]+)|(?:\\b(([\\w-]+://?|www[.])[^\\s()<>]+(?:\\([\\w\\d]+\\)|([^!\"#$%&'()*+,\\-./:;<=>?@[\\\\\\]^_`{|}~\\s]|/)))))" ) );
527 const thread_local QRegularExpression protoRegEx( QStringLiteral( "^(?:f|ht)tps?://|file://" ) );
528 const thread_local QRegularExpression emailRegEx( QStringLiteral( "([\\w._%+-]+@[\\w.-]+\\.[A-Za-z]+)" ) );
529
530 int offset = 0;
531 bool found = false;
532 QRegularExpressionMatch match = urlRegEx.match( converted );
533 while ( match.hasMatch() )
534 {
535 found = true;
536 QString url = match.captured( 1 );
537 QString protoUrl = url;
538 if ( !protoRegEx.match( protoUrl ).hasMatch() )
539 {
540 protoUrl.prepend( "http://" );
541 }
542 QString anchor = QStringLiteral( "<a href=\"%1\">%2</a>" ).arg( protoUrl.toHtmlEscaped(), url.toHtmlEscaped() );
543 converted.replace( match.capturedStart( 1 ), url.length(), anchor );
544 offset = match.capturedStart( 1 ) + anchor.length();
545 match = urlRegEx.match( converted, offset );
546 }
547
548 offset = 0;
549 match = emailRegEx.match( converted );
550 while ( match.hasMatch() )
551 {
552 found = true;
553 QString email = match.captured( 1 );
554 QString anchor = QStringLiteral( "<a href=\"mailto:%1\">%1</a>" ).arg( email.toHtmlEscaped() );
555 converted.replace( match.capturedStart( 1 ), email.length(), anchor );
556 offset = match.capturedStart( 1 ) + anchor.length();
557 match = emailRegEx.match( converted, offset );
558 }
559
560 if ( foundLinks )
561 *foundLinks = found;
562
563 return converted;
564}
565
566bool QgsStringUtils::isUrl( const QString &string )
567{
568 const thread_local QRegularExpression rxUrl( QStringLiteral( "^(http|https|ftp|file)://\\S+$" ) );
569 return rxUrl.match( string ).hasMatch();
570}
571
572QString QgsStringUtils::htmlToMarkdown( const QString &html )
573{
574 // Any changes in this function must be copied to qgscrashreport.cpp too
575 QString converted = html;
576 converted.replace( QLatin1String( "<br>" ), QLatin1String( "\n" ) );
577 converted.replace( QLatin1String( "<b>" ), QLatin1String( "**" ) );
578 converted.replace( QLatin1String( "</b>" ), QLatin1String( "**" ) );
579 converted.replace( QLatin1String( "<pre>" ), QLatin1String( "\n```\n" ) );
580 converted.replace( QLatin1String( "</pre>" ), QLatin1String( "```\n" ) );
581
582 const thread_local QRegularExpression hrefRegEx( QStringLiteral( "<a\\s+href\\s*=\\s*([^<>]*)\\s*>([^<>]*)</a>" ) );
583
584 int offset = 0;
585 QRegularExpressionMatch match = hrefRegEx.match( converted );
586 while ( match.hasMatch() )
587 {
588 QString url = match.captured( 1 ).replace( QLatin1String( "\"" ), QString() );
589 url.replace( '\'', QString() );
590 QString name = match.captured( 2 );
591 QString anchor = QStringLiteral( "[%1](%2)" ).arg( name, url );
592 converted.replace( match.capturedStart(), match.capturedLength(), anchor );
593 offset = match.capturedStart() + anchor.length();
594 match = hrefRegEx.match( converted, offset );
595 }
596
597 return converted;
598}
599
600QString QgsStringUtils::wordWrap( const QString &string, const int length, const bool useMaxLineLength, const QString &customDelimiter )
601{
602 if ( string.isEmpty() || length == 0 )
603 return string;
604
605 QString newstr;
606 QRegularExpression rx;
607 int delimiterLength = 0;
608
609 if ( !customDelimiter.isEmpty() )
610 {
611 rx.setPattern( QRegularExpression::escape( customDelimiter ) );
612 delimiterLength = customDelimiter.length();
613 }
614 else
615 {
616 // \x{200B} is a ZERO-WIDTH SPACE, needed for worwrap to support a number of complex scripts (Indic, Arabic, etc.)
617 rx.setPattern( QStringLiteral( "[\\x{200B}\\s]" ) );
618 delimiterLength = 1;
619 }
620
621 const QStringList lines = string.split( '\n' );
622 int strLength, strCurrent, strHit, lastHit;
623
624 for ( int i = 0; i < lines.size(); i++ )
625 {
626 const QString line = lines.at( i );
627 strLength = line.length();
628 if ( strLength <= length )
629 {
630 // shortcut, no wrapping required
631 newstr.append( line );
632 if ( i < lines.size() - 1 )
633 newstr.append( '\n' );
634 continue;
635 }
636 strCurrent = 0;
637 strHit = 0;
638 lastHit = 0;
639
640 while ( strCurrent < strLength )
641 {
642 // positive wrap value = desired maximum line width to wrap
643 // negative wrap value = desired minimum line width before wrap
644 if ( useMaxLineLength )
645 {
646 //first try to locate delimiter backwards
647 strHit = ( strCurrent + length >= strLength ) ? -1 : line.lastIndexOf( rx, strCurrent + length );
648 if ( strHit == lastHit || strHit == -1 )
649 {
650 //if no new backward delimiter found, try to locate forward
651 strHit = ( strCurrent + std::abs( length ) >= strLength ) ? -1 : line.indexOf( rx, strCurrent + std::abs( length ) );
652 }
653 lastHit = strHit;
654 }
655 else
656 {
657 strHit = ( strCurrent + std::abs( length ) >= strLength ) ? -1 : line.indexOf( rx, strCurrent + std::abs( length ) );
658 }
659 if ( strHit > -1 )
660 {
661 newstr.append( QStringView {line} .mid( strCurrent, strHit - strCurrent ) );
662 newstr.append( '\n' );
663 strCurrent = strHit + delimiterLength;
664 }
665 else
666 {
667 newstr.append( QStringView {line} .mid( strCurrent ) );
668 strCurrent = strLength;
669 }
670 }
671 if ( i < lines.size() - 1 )
672 newstr.append( '\n' );
673 }
674
675 return newstr;
676}
677
679{
680 string = string.replace( ',', QChar( 65040 ) ).replace( QChar( 8229 ), QChar( 65072 ) ); // comma & two-dot leader
681 string = string.replace( QChar( 12289 ), QChar( 65041 ) ).replace( QChar( 12290 ), QChar( 65042 ) ); // ideographic comma & full stop
682 string = string.replace( ':', QChar( 65043 ) ).replace( ';', QChar( 65044 ) );
683 string = string.replace( '!', QChar( 65045 ) ).replace( '?', QChar( 65046 ) );
684 string = string.replace( QChar( 12310 ), QChar( 65047 ) ).replace( QChar( 12311 ), QChar( 65048 ) ); // white lenticular brackets
685 string = string.replace( QChar( 8230 ), QChar( 65049 ) ); // three-dot ellipse
686 string = string.replace( QChar( 8212 ), QChar( 65073 ) ).replace( QChar( 8211 ), QChar( 65074 ) ); // em & en dash
687 string = string.replace( '_', QChar( 65075 ) ).replace( QChar( 65103 ), QChar( 65076 ) ); // low line & wavy low line
688 string = string.replace( '(', QChar( 65077 ) ).replace( ')', QChar( 65078 ) );
689 string = string.replace( '{', QChar( 65079 ) ).replace( '}', QChar( 65080 ) );
690 string = string.replace( '<', QChar( 65087 ) ).replace( '>', QChar( 65088 ) );
691 string = string.replace( '[', QChar( 65095 ) ).replace( ']', QChar( 65096 ) );
692 string = string.replace( QChar( 12308 ), QChar( 65081 ) ).replace( QChar( 12309 ), QChar( 65082 ) ); // tortoise shell brackets
693 string = string.replace( QChar( 12304 ), QChar( 65083 ) ).replace( QChar( 12305 ), QChar( 65084 ) ); // black lenticular brackets
694 string = string.replace( QChar( 12298 ), QChar( 65085 ) ).replace( QChar( 12299 ), QChar( 65086 ) ); // double angle brackets
695 string = string.replace( QChar( 12300 ), QChar( 65089 ) ).replace( QChar( 12301 ), QChar( 65090 ) ); // corner brackets
696 string = string.replace( QChar( 12302 ), QChar( 65091 ) ).replace( QChar( 12303 ), QChar( 65092 ) ); // white corner brackets
697 return string;
698}
699
700QString QgsStringUtils::qRegExpEscape( const QString &string )
701{
702 // code and logic taken from the Qt source code
703 const QLatin1Char backslash( '\\' );
704 const int count = string.count();
705
706 QString escaped;
707 escaped.reserve( count * 2 );
708 for ( int i = 0; i < count; i++ )
709 {
710 switch ( string.at( i ).toLatin1() )
711 {
712 case '$':
713 case '(':
714 case ')':
715 case '*':
716 case '+':
717 case '.':
718 case '?':
719 case '[':
720 case '\\':
721 case ']':
722 case '^':
723 case '{':
724 case '|':
725 case '}':
726 escaped.append( backslash );
727 }
728 escaped.append( string.at( i ) );
729 }
730 return escaped;
731}
732
733QString QgsStringUtils::truncateMiddleOfString( const QString &string, int maxLength )
734{
735 const int charactersToTruncate = string.length() - maxLength;
736 if ( charactersToTruncate <= 0 )
737 return string;
738
739 // note we actually truncate an extra character, as we'll be replacing it with the ... character
740 const int truncateFrom = string.length() / 2 - ( charactersToTruncate + 1 ) / 2;
741 if ( truncateFrom <= 0 )
742 return QChar( 0x2026 );
743
744#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
745 return string.leftRef( truncateFrom ) + QString( QChar( 0x2026 ) ) + string.midRef( truncateFrom + charactersToTruncate + 1 );
746#else
747 return QStringView( string ).first( truncateFrom ) + QString( QChar( 0x2026 ) ) + QStringView( string ).sliced( truncateFrom + charactersToTruncate + 1 );
748#endif
749}
750
751bool QgsStringUtils::containsByWord( const QString &candidate, const QString &words, Qt::CaseSensitivity sensitivity )
752{
753 if ( candidate.trimmed().isEmpty() )
754 return false;
755
756 const thread_local QRegularExpression rxWhitespace( QStringLiteral( "\\s+" ) );
757 const QStringList parts = words.split( rxWhitespace, Qt::SkipEmptyParts );
758 if ( parts.empty() )
759 return false;
760 for ( const QString &word : parts )
761 {
762 if ( !candidate.contains( word, sensitivity ) )
763 return false;
764 }
765 return true;
766}
767
769 : mMatch( match )
770 , mReplacement( replacement )
771 , mCaseSensitive( caseSensitive )
772 , mWholeWordOnly( wholeWordOnly )
773{
774 if ( mWholeWordOnly )
775 {
776 mRx.setPattern( QStringLiteral( "\\b%1\\b" ).arg( mMatch ) );
777 mRx.setPatternOptions( mCaseSensitive ? QRegularExpression::NoPatternOption : QRegularExpression::CaseInsensitiveOption );
778 }
779}
780
781QString QgsStringReplacement::process( const QString &input ) const
782{
783 QString result = input;
784 if ( !mWholeWordOnly )
785 {
786 return result.replace( mMatch, mReplacement, mCaseSensitive ? Qt::CaseSensitive : Qt::CaseInsensitive );
787 }
788 else
789 {
790 return result.replace( mRx, mReplacement );
791 }
792}
793
795{
796 QgsStringMap map;
797 map.insert( QStringLiteral( "match" ), mMatch );
798 map.insert( QStringLiteral( "replace" ), mReplacement );
799 map.insert( QStringLiteral( "caseSensitive" ), mCaseSensitive ? QStringLiteral( "1" ) : QStringLiteral( "0" ) );
800 map.insert( QStringLiteral( "wholeWord" ), mWholeWordOnly ? QStringLiteral( "1" ) : QStringLiteral( "0" ) );
801 return map;
802}
803
805{
806 return QgsStringReplacement( properties.value( QStringLiteral( "match" ) ),
807 properties.value( QStringLiteral( "replace" ) ),
808 properties.value( QStringLiteral( "caseSensitive" ), QStringLiteral( "0" ) ) == QLatin1String( "1" ),
809 properties.value( QStringLiteral( "wholeWord" ), QStringLiteral( "0" ) ) == QLatin1String( "1" ) );
810}
811
812QString QgsStringReplacementCollection::process( const QString &input ) const
813{
814 QString result = input;
815 for ( const QgsStringReplacement &r : mReplacements )
816 {
817 result = r.process( result );
818 }
819 return result;
820}
821
822void QgsStringReplacementCollection::writeXml( QDomElement &elem, QDomDocument &doc ) const
823{
824 for ( const QgsStringReplacement &r : mReplacements )
825 {
826 QgsStringMap props = r.properties();
827 QDomElement propEl = doc.createElement( QStringLiteral( "replacement" ) );
828 QgsStringMap::const_iterator it = props.constBegin();
829 for ( ; it != props.constEnd(); ++it )
830 {
831 propEl.setAttribute( it.key(), it.value() );
832 }
833 elem.appendChild( propEl );
834 }
835}
836
837void QgsStringReplacementCollection::readXml( const QDomElement &elem )
838{
839 mReplacements.clear();
840 QDomNodeList nodelist = elem.elementsByTagName( QStringLiteral( "replacement" ) );
841 for ( int i = 0; i < nodelist.count(); i++ )
842 {
843 QDomElement replacementElem = nodelist.at( i ).toElement();
844 QDomNamedNodeMap nodeMap = replacementElem.attributes();
845
846 QgsStringMap props;
847 for ( int j = 0; j < nodeMap.count(); ++j )
848 {
849 props.insert( nodeMap.item( j ).nodeName(), nodeMap.item( j ).nodeValue() );
850 }
851 mReplacements << QgsStringReplacement::fromProperties( props );
852 }
853
854}
Capitalization
String capitalization options.
Definition qgis.h:3389
@ AllSmallCaps
Force all characters to small caps.
Definition qgis.h:3397
@ MixedCase
Mixed case, ie no change.
Definition qgis.h:3390
@ UpperCamelCase
Convert the string to upper camel case. Note that this method does not unaccent characters.
Definition qgis.h:3396
@ AllLowercase
Convert all characters to lowercase.
Definition qgis.h:3392
@ TitleCase
Simple title case conversion - does not fully grammatically parse the text and uses simple rules only...
Definition qgis.h:3395
@ SmallCaps
Mixed case small caps.
Definition qgis.h:3394
@ ForceFirstLetterToCapital
Convert just the first letter of each word to uppercase, leave the rest untouched.
Definition qgis.h:3393
@ AllUppercase
Convert all characters to uppercase.
Definition qgis.h:3391
void readXml(const QDomElement &elem)
Reads the collection state from an XML element.
QString process(const QString &input) const
Processes a given input string, applying any valid replacements which should be made using QgsStringR...
void writeXml(QDomElement &elem, QDomDocument &doc) const
Writes the collection state to an XML element.
A representation of a single string replacement.
static QgsStringReplacement fromProperties(const QgsStringMap &properties)
Creates a new QgsStringReplacement from an encoded properties map.
QString process(const QString &input) const
Processes a given input string, applying any valid replacements which should be made.
bool wholeWordOnly() const
Returns true if match only applies to whole words, or false if partial word matches are permitted.
QString replacement() const
Returns the string to replace matches with.
bool caseSensitive() const
Returns true if match is case sensitive.
QgsStringReplacement(const QString &match, const QString &replacement, bool caseSensitive=false, bool wholeWordOnly=false)
Constructor for QgsStringReplacement.
QString match() const
Returns the string matched by this object.
QgsStringMap properties() const
Returns a map of the replacement properties.
static int hammingDistance(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the Hamming distance between two strings.
static QString soundex(const QString &string)
Returns the Soundex representation of a string.
static int levenshteinDistance(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the Levenshtein edit distance between two strings.
static QString htmlToMarkdown(const QString &html)
Convert simple HTML to markdown.
static QString longestCommonSubstring(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the longest common substring between two strings.
static QString capitalize(const QString &string, Qgis::Capitalization capitalization)
Converts a string by applying capitalization rules to the string.
static QString substituteVerticalCharacters(QString string)
Returns a string with characters having vertical representation form substituted.
static bool containsByWord(const QString &candidate, const QString &words, Qt::CaseSensitivity sensitivity=Qt::CaseInsensitive)
Given a candidate string, returns true if the candidate contains all the individual words from anothe...
static QString insertLinks(const QString &string, bool *foundLinks=nullptr)
Returns a string with any URL (e.g., http(s)/ftp) and mailto: text converted to valid HTML <a ....
static double fuzzyScore(const QString &candidate, const QString &search)
Tests a candidate string to see how likely it is a match for a specified search string.
static QString qRegExpEscape(const QString &string)
Returns an escaped string matching the behavior of QRegExp::escape.
static QString ampersandEncode(const QString &string)
Makes a raw string safe for inclusion as a HTML/XML string literal.
static QString wordWrap(const QString &string, int length, bool useMaxLineLength=true, const QString &customDelimiter=QString())
Automatically wraps a string by inserting new line characters at appropriate locations in the string.
static bool isUrl(const QString &string)
Returns whether the string is a URL (http,https,ftp,file).
static QString truncateMiddleOfString(const QString &string, int maxLength)
Truncates a string to the specified maximum character length.
QMap< QString, QString > QgsStringMap
Definition qgis.h:7132
#define FUZZY_SCORE_CONSECUTIVE_MATCH
#define FUZZY_SCORE_WORD_MATCH
#define FUZZY_SCORE_NEW_MATCH