QGIS API Documentation 4.1.0-Master (60fea48833c)
Loading...
Searching...
No Matches
qgsstringutils.cpp
Go to the documentation of this file.
1/***************************************************************************
2 qgsstringutils.cpp
3 ------------------
4 begin : June 2015
5 copyright : (C) 2015 by Nyall Dawson
6 email : nyall dot dawson at gmail dot com
7 ***************************************************************************
8 * *
9 * This program is free software; you can redistribute it and/or modify *
10 * it under the terms of the GNU General Public License as published by *
11 * the Free Software Foundation; either version 2 of the License, or *
12 * (at your option) any later version. *
13 * *
14 ***************************************************************************/
15
16#include "qgsstringutils.h"
17
18#include <cstdlib>
19
20#include "qgslogger.h"
21
22#include <QRegularExpression>
23#include <QString>
24#include <QStringList>
25#include <QTextBoundaryFinder>
26#include <QVector>
27
28using namespace Qt::StringLiterals;
29
31
32QString QgsStringUtils::unaccent( const QString &input )
33{
34 // Normalize input to NFC so that Unicode characters composed of base +
35 // combining marks are converted to their canonical composed form.
36 // This ensures lookups match the keys in UNACCENT_MAP, which are stored
37 // in NFC (e.g. "e" + U+0301 becomes "é", as PostgreSQL does it.).
38 const QString in = input.normalized( QString::NormalizationForm_C );
39 QString out;
40 out.reserve( in.size() );
41
42 qsizetype i = 0;
43 const qsizetype n = in.size();
44
45 while ( i < n )
46 {
47 const QChar c = in.at( i );
48 int len = 1;
49
50 // Detect surrogate pair (non-BMP)
51 if ( c.isHighSurrogate() && i + 1 < n )
52 {
53 const QChar c2 = in.at( i + 1 );
54 if ( c2.isLowSurrogate() )
55 len = 2;
56 }
57
58 const QString key = in.mid( i, len ).normalized( QString::NormalizationForm_C );
59
60 auto it = UNACCENT_MAP.constFind( key );
61 if ( it != UNACCENT_MAP.constEnd() )
62 out.append( it.value() );
63 else
64 out.append( key );
65
66 i += len;
67 }
68
69 return out;
70}
71
72QString QgsStringUtils::capitalize( const QString &string, Qgis::Capitalization capitalization )
73{
74 if ( string.isEmpty() )
75 return QString();
76
77 switch ( capitalization )
78 {
81 return string;
82
84 return string.toUpper();
85
88 return string.toLower();
89
91 {
92 QString temp = string;
93
94 QTextBoundaryFinder wordSplitter( QTextBoundaryFinder::Word, string.constData(), string.length(), nullptr, 0 );
95 QTextBoundaryFinder letterSplitter( QTextBoundaryFinder::Grapheme, string.constData(), string.length(), nullptr, 0 );
96
97 wordSplitter.setPosition( 0 );
98 bool first = true;
99 while ( ( first && wordSplitter.boundaryReasons() & QTextBoundaryFinder::StartOfItem ) || wordSplitter.toNextBoundary() >= 0 )
100 {
101 first = false;
102 letterSplitter.setPosition( wordSplitter.position() );
103 ( void ) letterSplitter.toNextBoundary();
104 QString substr = string.mid( wordSplitter.position(), letterSplitter.position() - wordSplitter.position() );
105 temp.replace( wordSplitter.position(), substr.length(), substr.toUpper() );
106 }
107 return temp;
108 }
109
111 {
112 // yes, this is MASSIVELY simplifying the problem!!
113
114 static QStringList smallWords;
115 static QStringList newPhraseSeparators;
116 static QRegularExpression splitWords;
117 if ( smallWords.empty() )
118 {
119 smallWords = QObject::tr( "a|an|and|as|at|but|by|en|for|if|in|nor|of|on|or|per|s|the|to|vs.|vs|via" ).split( '|' );
120 newPhraseSeparators = QObject::tr( ".|:" ).split( '|' );
121 splitWords = QRegularExpression( u"\\b"_s, QRegularExpression::UseUnicodePropertiesOption );
122 }
123
124 const bool allSameCase = string.toLower() == string || string.toUpper() == string;
125 const QStringList parts = ( allSameCase ? string.toLower() : string ).split( splitWords, Qt::SkipEmptyParts );
126 QString result;
127 bool firstWord = true;
128 int i = 0;
129 int lastWord = parts.count() - 1;
130 for ( const QString &word : std::as_const( parts ) )
131 {
132 if ( newPhraseSeparators.contains( word.trimmed() ) )
133 {
134 firstWord = true;
135 result += word;
136 }
137 else if ( firstWord || ( i == lastWord ) || !smallWords.contains( word ) )
138 {
139 result += word.at( 0 ).toUpper() + word.mid( 1 );
140 firstWord = false;
141 }
142 else
143 {
144 result += word;
145 }
146 i++;
147 }
148 return result;
149 }
150
152 QString result = QgsStringUtils::capitalize( string.toLower(), Qgis::Capitalization::ForceFirstLetterToCapital ).simplified();
153 result.remove( ' ' );
154 return result;
155 }
156 // no warnings
157 return string;
158}
159
160// original code from http://www.qtcentre.org/threads/52456-HTML-Unicode-ampersand-encoding
161QString QgsStringUtils::ampersandEncode( const QString &string )
162{
163 QString encoded;
164 for ( int i = 0; i < string.size(); ++i )
165 {
166 QChar ch = string.at( i );
167 if ( ch.unicode() > 160 )
168 encoded += u"&#%1;"_s.arg( static_cast< int >( ch.unicode() ) );
169 else if ( ch.unicode() == 38 )
170 encoded += "&amp;"_L1;
171 else if ( ch.unicode() == 60 )
172 encoded += "&lt;"_L1;
173 else if ( ch.unicode() == 62 )
174 encoded += "&gt;"_L1;
175 else
176 encoded += ch;
177 }
178 return encoded;
179}
180
181int QgsStringUtils::levenshteinDistance( const QString &string1, const QString &string2, bool caseSensitive )
182{
183 int length1 = string1.length();
184 int length2 = string2.length();
185
186 //empty strings? solution is trivial...
187 if ( string1.isEmpty() )
188 {
189 return length2;
190 }
191 else if ( string2.isEmpty() )
192 {
193 return length1;
194 }
195
196 //handle case sensitive flag (or not)
197 QString s1( caseSensitive ? string1 : string1.toLower() );
198 QString s2( caseSensitive ? string2 : string2.toLower() );
199
200 const QChar *s1Char = s1.constData();
201 const QChar *s2Char = s2.constData();
202
203 //strip out any common prefix
204 int commonPrefixLen = 0;
205 while ( length1 > 0 && length2 > 0 && *s1Char == *s2Char )
206 {
207 commonPrefixLen++;
208 length1--;
209 length2--;
210 s1Char++;
211 s2Char++;
212 }
213
214 //strip out any common suffix
215 while ( length1 > 0 && length2 > 0 && s1.at( commonPrefixLen + length1 - 1 ) == s2.at( commonPrefixLen + length2 - 1 ) )
216 {
217 length1--;
218 length2--;
219 }
220
221 //fully checked either string? if so, the answer is easy...
222 if ( length1 == 0 )
223 {
224 return length2;
225 }
226 else if ( length2 == 0 )
227 {
228 return length1;
229 }
230
231 //ensure the inner loop is longer
232 if ( length1 > length2 )
233 {
234 std::swap( s1, s2 );
235 std::swap( length1, length2 );
236 }
237
238 //levenshtein algorithm begins here
239 std::vector< int > col( length2 + 1, 0 );
240 std::vector< int > prevCol;
241 prevCol.reserve( length2 + 1 );
242 for ( int i = 0; i < length2 + 1; ++i )
243 {
244 prevCol.emplace_back( i );
245 }
246 const QChar *s2start = s2Char;
247 for ( int i = 0; i < length1; ++i )
248 {
249 col[0] = i + 1;
250 s2Char = s2start;
251 for ( int j = 0; j < length2; ++j )
252 {
253 col[j + 1] = std::min( std::min( 1 + col[j], 1 + prevCol[1 + j] ), prevCol[j] + ( ( *s1Char == *s2Char ) ? 0 : 1 ) );
254 s2Char++;
255 }
256 col.swap( prevCol );
257 s1Char++;
258 }
259 return prevCol[length2];
260}
261
262QString QgsStringUtils::longestCommonSubstring( const QString &string1, const QString &string2, bool caseSensitive )
263{
264 if ( string1.isEmpty() || string2.isEmpty() )
265 {
266 //empty strings, solution is trivial...
267 return QString();
268 }
269
270 //handle case sensitive flag (or not)
271 QString s1( caseSensitive ? string1 : string1.toLower() );
272 QString s2( caseSensitive ? string2 : string2.toLower() );
273
274 if ( s1 == s2 )
275 {
276 //another trivial case, identical strings
277 return s1;
278 }
279
280 int *currentScores = new int[s2.length()];
281 int *previousScores = new int[s2.length()];
282 int maxCommonLength = 0;
283 int lastMaxBeginIndex = 0;
284
285 const QChar *s1Char = s1.constData();
286 const QChar *s2Char = s2.constData();
287 const QChar *s2Start = s2Char;
288
289 for ( int i = 0; i < s1.length(); ++i )
290 {
291 for ( int j = 0; j < s2.length(); ++j )
292 {
293 if ( *s1Char != *s2Char )
294 {
295 currentScores[j] = 0;
296 }
297 else
298 {
299 if ( i == 0 || j == 0 )
300 {
301 currentScores[j] = 1;
302 }
303 else
304 {
305 currentScores[j] = 1 + previousScores[j - 1];
306 }
307
308 if ( maxCommonLength < currentScores[j] )
309 {
310 maxCommonLength = currentScores[j];
311 lastMaxBeginIndex = i;
312 }
313 }
314 s2Char++;
315 }
316 std::swap( currentScores, previousScores );
317 s1Char++;
318 s2Char = s2Start;
319 }
320 delete[] currentScores;
321 delete[] previousScores;
322 return string1.mid( lastMaxBeginIndex - maxCommonLength + 1, maxCommonLength );
323}
324
325int QgsStringUtils::hammingDistance( const QString &string1, const QString &string2, bool caseSensitive )
326{
327 if ( string1.isEmpty() && string2.isEmpty() )
328 {
329 //empty strings, solution is trivial...
330 return 0;
331 }
332
333 if ( string1.length() != string2.length() )
334 {
335 //invalid inputs
336 return -1;
337 }
338
339 //handle case sensitive flag (or not)
340 QString s1( caseSensitive ? string1 : string1.toLower() );
341 QString s2( caseSensitive ? string2 : string2.toLower() );
342
343 if ( s1 == s2 )
344 {
345 //another trivial case, identical strings
346 return 0;
347 }
348
349 int distance = 0;
350 const QChar *s1Char = s1.constData();
351 const QChar *s2Char = s2.constData();
352
353 for ( int i = 0; i < string1.length(); ++i )
354 {
355 if ( *s1Char != *s2Char )
356 distance++;
357 s1Char++;
358 s2Char++;
359 }
360
361 return distance;
362}
363
364QString QgsStringUtils::soundex( const QString &string )
365{
366 if ( string.isEmpty() )
367 return QString();
368
369 QString tmp = string.toUpper();
370
371 //strip non character codes, and vowel like characters after the first character
372 QChar *char1 = tmp.data();
373 QChar *char2 = tmp.data();
374 int outLen = 0;
375 for ( int i = 0; i < tmp.length(); ++i, ++char2 )
376 {
377 if ( ( *char2 ).unicode() >= 0x41 && ( *char2 ).unicode() <= 0x5A && ( i == 0 || ( ( *char2 ).unicode() != 0x41 && ( *char2 ).unicode() != 0x45
378 && ( *char2 ).unicode() != 0x48 && ( *char2 ).unicode() != 0x49
379 && ( *char2 ).unicode() != 0x4F && ( *char2 ).unicode() != 0x55
380 && ( *char2 ).unicode() != 0x57 && ( *char2 ).unicode() != 0x59 ) ) )
381 {
382 *char1 = *char2;
383 char1++;
384 outLen++;
385 }
386 }
387 tmp.truncate( outLen );
388
389 QChar *tmpChar = tmp.data();
390 tmpChar++;
391 for ( int i = 1; i < tmp.length(); ++i, ++tmpChar )
392 {
393 switch ( ( *tmpChar ).unicode() )
394 {
395 case 0x42:
396 case 0x46:
397 case 0x50:
398 case 0x56:
399 tmp.replace( i, 1, QChar( 0x31 ) );
400 break;
401
402 case 0x43:
403 case 0x47:
404 case 0x4A:
405 case 0x4B:
406 case 0x51:
407 case 0x53:
408 case 0x58:
409 case 0x5A:
410 tmp.replace( i, 1, QChar( 0x32 ) );
411 break;
412
413 case 0x44:
414 case 0x54:
415 tmp.replace( i, 1, QChar( 0x33 ) );
416 break;
417
418 case 0x4C:
419 tmp.replace( i, 1, QChar( 0x34 ) );
420 break;
421
422 case 0x4D:
423 case 0x4E:
424 tmp.replace( i, 1, QChar( 0x35 ) );
425 break;
426
427 case 0x52:
428 tmp.replace( i, 1, QChar( 0x36 ) );
429 break;
430 }
431 }
432
433 //remove adjacent duplicates
434 char1 = tmp.data();
435 char2 = tmp.data();
436 char2++;
437 outLen = 1;
438 for ( int i = 1; i < tmp.length(); ++i, ++char2 )
439 {
440 if ( *char2 != *char1 )
441 {
442 char1++;
443 *char1 = *char2;
444 outLen++;
445 if ( outLen == 4 )
446 break;
447 }
448 }
449 tmp.truncate( outLen );
450 if ( tmp.length() < 4 )
451 {
452 tmp.append( "000" );
453 tmp.truncate( 4 );
454 }
455
456 return tmp;
457}
458
459
460double QgsStringUtils::fuzzyScore( const QString &candidate, const QString &search )
461{
462 QString candidateNormalized = candidate.simplified().normalized( QString::NormalizationForm_C ).toLower();
463 QString searchNormalized = search.simplified().normalized( QString::NormalizationForm_C ).toLower();
464
465 int candidateLength = candidateNormalized.length();
466 int searchLength = searchNormalized.length();
467 int score = 0;
468
469 // if the candidate and the search term are empty, no other option than 0 score
470 if ( candidateLength == 0 || searchLength == 0 )
471 return score;
472
473 int candidateIdx = 0;
474 int searchIdx = 0;
475 // there is always at least one word
476 int maxScore = FUZZY_SCORE_WORD_MATCH;
477
478 bool isPreviousIndexMatching = false;
479 bool isWordOpen = true;
480
481 // loop trough each candidate char and calculate the potential max score
482 while ( candidateIdx < candidateLength )
483 {
484 QChar candidateChar = candidateNormalized[candidateIdx++];
485 bool isCandidateCharWordEnd = candidateChar == ' ' || candidateChar.isPunct();
486
487 // the first char is always the default score
488 if ( candidateIdx == 1 )
489 maxScore += FUZZY_SCORE_NEW_MATCH;
490 // every space character or underscore is a opportunity for a new word
491 else if ( isCandidateCharWordEnd )
492 maxScore += FUZZY_SCORE_WORD_MATCH;
493 // potentially we can match every other character
494 else
496
497 // we looped through all the characters
498 if ( searchIdx >= searchLength )
499 continue;
500
501 QChar searchChar = searchNormalized[searchIdx];
502 bool isSearchCharWordEnd = searchChar == ' ' || searchChar.isPunct();
503
504 // match!
505 if ( candidateChar == searchChar || ( isCandidateCharWordEnd && isSearchCharWordEnd ) )
506 {
507 searchIdx++;
508
509 // if we have just successfully finished a word, give higher score
510 if ( isSearchCharWordEnd )
511 {
512 if ( isWordOpen )
513 score += FUZZY_SCORE_WORD_MATCH;
514 else if ( isPreviousIndexMatching )
516 else
517 score += FUZZY_SCORE_NEW_MATCH;
518
519 isWordOpen = true;
520 }
521 // if we have consecutive characters matching, give higher score
522 else if ( isPreviousIndexMatching )
523 {
525 }
526 // normal score for new independent character that matches
527 else
528 {
529 score += FUZZY_SCORE_NEW_MATCH;
530 }
531
532 isPreviousIndexMatching = true;
533 }
534 // if the current character does NOT match, we are sure we cannot build a word for now
535 else
536 {
537 isPreviousIndexMatching = false;
538 isWordOpen = false;
539 }
540
541 // if the search string is covered, check if the last match is end of word
542 if ( searchIdx >= searchLength )
543 {
544 bool isEndOfWord = ( candidateIdx >= candidateLength ) ? true : candidateNormalized[candidateIdx] == ' ' || candidateNormalized[candidateIdx].isPunct();
545
546 if ( isEndOfWord )
547 score += FUZZY_SCORE_WORD_MATCH;
548 }
549
550 // QgsLogger::debug( u"TMP: %1 | %2 | %3 | %4 | %5"_s.arg( candidateChar, searchChar, QString::number(score), QString::number(isCandidateCharWordEnd), QString::number(isSearchCharWordEnd) ) + QStringLiteral( __FILE__ ) );
551 }
552
553 // QgsLogger::debug( u"RES: %1 | %2"_s.arg( QString::number(maxScore), QString::number(score) ) + QStringLiteral( __FILE__ ) );
554 // we didn't loop through all the search chars, it means, that they are not present in the current candidate
555 if ( searchIdx < searchLength )
556 score = 0;
557
558 return static_cast<float>( std::max( score, 0 ) ) / std::max( maxScore, 1 );
559}
560
561
562QString QgsStringUtils::insertLinks( const QString &string, bool *foundLinks )
563{
564 QString converted = string;
565
566 // http://alanstorm.com/url_regex_explained
567 // note - there's more robust implementations available
568 const thread_local QRegularExpression urlRegEx(
569 u"((?:(?:http|https|ftp|file)://[^\\s]+[^\\s,.]+)|(?:\\b(([\\w-]+://?|www[.])[^\\s()<>]+(?:\\([\\w\\d]+\\)|([^!\"#$%&'()*+,\\-./:;<=>?@[\\\\\\]^_`{|}~\\s]|/)))))"_s
570 );
571 const thread_local QRegularExpression protoRegEx( u"^(?:f|ht)tps?://|file://"_s );
572 const thread_local QRegularExpression emailRegEx( u"([\\w._%+-]+@[\\w.-]+\\.[A-Za-z]+)"_s );
573
574 int offset = 0;
575 bool found = false;
576 QRegularExpressionMatch match = urlRegEx.match( converted );
577 while ( match.hasMatch() )
578 {
579 found = true;
580 QString url = match.captured( 1 );
581 QString protoUrl = url;
582 if ( !protoRegEx.match( protoUrl ).hasMatch() )
583 {
584 protoUrl.prepend( "http://" );
585 }
586 QString anchor = u"<a href=\"%1\">%2</a>"_s.arg( protoUrl.toHtmlEscaped(), url.toHtmlEscaped() );
587 converted.replace( match.capturedStart( 1 ), url.length(), anchor );
588 offset = match.capturedStart( 1 ) + anchor.length();
589 match = urlRegEx.match( converted, offset );
590 }
591
592 offset = 0;
593 match = emailRegEx.match( converted );
594 while ( match.hasMatch() )
595 {
596 found = true;
597 QString email = match.captured( 1 );
598 QString anchor = u"<a href=\"mailto:%1\">%1</a>"_s.arg( email.toHtmlEscaped() );
599 converted.replace( match.capturedStart( 1 ), email.length(), anchor );
600 offset = match.capturedStart( 1 ) + anchor.length();
601 match = emailRegEx.match( converted, offset );
602 }
603
604 if ( foundLinks )
605 *foundLinks = found;
606
607 return converted;
608}
609
610bool QgsStringUtils::isUrl( const QString &string )
611{
612 const thread_local QRegularExpression rxUrl( u"^(http|https|ftp|file)://\\S+$"_s );
613 return rxUrl.match( string ).hasMatch();
614}
615
616QString QgsStringUtils::htmlToMarkdown( const QString &html )
617{
618 // Any changes in this function must be copied to qgscrashreport.cpp too
619 QString converted = html;
620 converted.replace( "<br>"_L1, "\n"_L1 );
621 converted.replace( "<b>"_L1, "**"_L1 );
622 converted.replace( "</b>"_L1, "**"_L1 );
623 converted.replace( "<pre>"_L1, "\n```\n"_L1 );
624 converted.replace( "</pre>"_L1, "```\n"_L1 );
625
626 const thread_local QRegularExpression hrefRegEx( u"<a\\s+href\\s*=\\s*([^<>]*)\\s*>([^<>]*)</a>"_s );
627
628 int offset = 0;
629 QRegularExpressionMatch match = hrefRegEx.match( converted );
630 while ( match.hasMatch() )
631 {
632 QString url = match.captured( 1 ).replace( "\""_L1, QString() );
633 url.replace( '\'', QString() );
634 QString name = match.captured( 2 );
635 QString anchor = u"[%1](%2)"_s.arg( name, url );
636 converted.replace( match.capturedStart(), match.capturedLength(), anchor );
637 offset = match.capturedStart() + anchor.length();
638 match = hrefRegEx.match( converted, offset );
639 }
640
641 return converted;
642}
643
644QString QgsStringUtils::wordWrap( const QString &string, const int length, const bool useMaxLineLength, const QString &customDelimiter )
645{
646 if ( string.isEmpty() || length == 0 )
647 return string;
648
649 QString newstr;
650 QRegularExpression rx;
651 int delimiterLength = 0;
652
653 if ( !customDelimiter.isEmpty() )
654 {
655 rx.setPattern( QRegularExpression::escape( customDelimiter ) );
656 delimiterLength = customDelimiter.length();
657 }
658 else
659 {
660 // \x{200B} is a ZERO-WIDTH SPACE, needed for worwrap to support a number of complex scripts (Indic, Arabic, etc.)
661 rx.setPattern( u"[\\x{200B}\\s]"_s );
662 delimiterLength = 1;
663 }
664
665 const QStringList lines = string.split( '\n' );
666 int strLength, strCurrent, strHit, lastHit;
667
668 for ( int i = 0; i < lines.size(); i++ )
669 {
670 const QString line = lines.at( i );
671 strLength = line.length();
672 if ( strLength <= length )
673 {
674 // shortcut, no wrapping required
675 newstr.append( line );
676 if ( i < lines.size() - 1 )
677 newstr.append( '\n' );
678 continue;
679 }
680 strCurrent = 0;
681 strHit = 0;
682 lastHit = 0;
683
684 while ( strCurrent < strLength )
685 {
686 // positive wrap value = desired maximum line width to wrap
687 // negative wrap value = desired minimum line width before wrap
688 if ( useMaxLineLength )
689 {
690 //first try to locate delimiter backwards
691 strHit = ( strCurrent + length >= strLength ) ? -1 : line.lastIndexOf( rx, strCurrent + length );
692 if ( strHit == lastHit || strHit == -1 )
693 {
694 //if no new backward delimiter found, try to locate forward
695 strHit = ( strCurrent + std::abs( length ) >= strLength ) ? -1 : line.indexOf( rx, strCurrent + std::abs( length ) );
696 }
697 lastHit = strHit;
698 }
699 else
700 {
701 strHit = ( strCurrent + std::abs( length ) >= strLength ) ? -1 : line.indexOf( rx, strCurrent + std::abs( length ) );
702 }
703 if ( strHit > -1 )
704 {
705 newstr.append( QStringView { line }.mid( strCurrent, strHit - strCurrent ) );
706 newstr.append( '\n' );
707 strCurrent = strHit + delimiterLength;
708 }
709 else
710 {
711 newstr.append( QStringView { line }.mid( strCurrent ) );
712 strCurrent = strLength;
713 }
714 }
715 if ( i < lines.size() - 1 )
716 newstr.append( '\n' );
717 }
718
719 return newstr;
720}
721
723{
724 string = string.replace( ',', QChar( 65040 ) ).replace( QChar( 8229 ), QChar( 65072 ) ); // comma & two-dot leader
725 string = string.replace( QChar( 12289 ), QChar( 65041 ) ).replace( QChar( 12290 ), QChar( 65042 ) ); // ideographic comma & full stop
726 string = string.replace( ':', QChar( 65043 ) ).replace( ';', QChar( 65044 ) );
727 string = string.replace( '!', QChar( 65045 ) ).replace( '?', QChar( 65046 ) );
728 string = string.replace( QChar( 12310 ), QChar( 65047 ) ).replace( QChar( 12311 ), QChar( 65048 ) ); // white lenticular brackets
729 string = string.replace( QChar( 8230 ), QChar( 65049 ) ); // three-dot ellipse
730 string = string.replace( QChar( 8212 ), QChar( 65073 ) ).replace( QChar( 8211 ), QChar( 65074 ) ); // em & en dash
731 string = string.replace( '_', QChar( 65075 ) ).replace( QChar( 65103 ), QChar( 65076 ) ); // low line & wavy low line
732 string = string.replace( '(', QChar( 65077 ) ).replace( ')', QChar( 65078 ) );
733 string = string.replace( '{', QChar( 65079 ) ).replace( '}', QChar( 65080 ) );
734 string = string.replace( '<', QChar( 65087 ) ).replace( '>', QChar( 65088 ) );
735 string = string.replace( '[', QChar( 65095 ) ).replace( ']', QChar( 65096 ) );
736 string = string.replace( QChar( 12308 ), QChar( 65081 ) ).replace( QChar( 12309 ), QChar( 65082 ) ); // tortoise shell brackets
737 string = string.replace( QChar( 12304 ), QChar( 65083 ) ).replace( QChar( 12305 ), QChar( 65084 ) ); // black lenticular brackets
738 string = string.replace( QChar( 12298 ), QChar( 65085 ) ).replace( QChar( 12299 ), QChar( 65086 ) ); // double angle brackets
739 string = string.replace( QChar( 12300 ), QChar( 65089 ) ).replace( QChar( 12301 ), QChar( 65090 ) ); // corner brackets
740 string = string.replace( QChar( 12302 ), QChar( 65091 ) ).replace( QChar( 12303 ), QChar( 65092 ) ); // white corner brackets
741 return string;
742}
743
744QString QgsStringUtils::qRegExpEscape( const QString &string )
745{
746 // code and logic taken from the Qt source code
747 const QLatin1Char backslash( '\\' );
748 const int count = string.count();
749
750 QString escaped;
751 escaped.reserve( count * 2 );
752 for ( int i = 0; i < count; i++ )
753 {
754 switch ( string.at( i ).toLatin1() )
755 {
756 case '$':
757 case '(':
758 case ')':
759 case '*':
760 case '+':
761 case '.':
762 case '?':
763 case '[':
764 case '\\':
765 case ']':
766 case '^':
767 case '{':
768 case '|':
769 case '}':
770 escaped.append( backslash );
771 }
772 escaped.append( string.at( i ) );
773 }
774 return escaped;
775}
776
777QString QgsStringUtils::truncateMiddleOfString( const QString &string, int maxLength )
778{
779 const int charactersToTruncate = string.length() - maxLength;
780 if ( charactersToTruncate <= 0 )
781 return string;
782
783 // note we actually truncate an extra character, as we'll be replacing it with the ... character
784 const int truncateFrom = string.length() / 2 - ( charactersToTruncate + 1 ) / 2;
785 if ( truncateFrom <= 0 )
786 return QChar( 0x2026 );
787
788 return QStringView( string ).first( truncateFrom ) + QString( QChar( 0x2026 ) ) + QStringView( string ).sliced( truncateFrom + charactersToTruncate + 1 );
789}
790
791bool QgsStringUtils::containsByWord( const QString &candidate, const QString &words, Qt::CaseSensitivity sensitivity )
792{
793 if ( candidate.trimmed().isEmpty() )
794 return false;
795
796 const thread_local QRegularExpression rxWhitespace( u"\\s+"_s );
797 const QStringList parts = words.split( rxWhitespace, Qt::SkipEmptyParts );
798 if ( parts.empty() )
799 return false;
800 for ( const QString &word : parts )
801 {
802 if ( !candidate.contains( word, sensitivity ) )
803 return false;
804 }
805 return true;
806}
807
809 : mMatch( match )
810 , mReplacement( replacement )
811 , mCaseSensitive( caseSensitive )
812 , mWholeWordOnly( wholeWordOnly )
813{
814 if ( mWholeWordOnly )
815 {
816 mRx.setPattern( u"\\b%1\\b"_s.arg( mMatch ) );
817 mRx.setPatternOptions( mCaseSensitive ? QRegularExpression::NoPatternOption : QRegularExpression::CaseInsensitiveOption );
818 }
819}
820
821QString QgsStringReplacement::process( const QString &input ) const
822{
823 QString result = input;
824 if ( !mWholeWordOnly )
825 {
826 return result.replace( mMatch, mReplacement, mCaseSensitive ? Qt::CaseSensitive : Qt::CaseInsensitive );
827 }
828 else
829 {
830 return result.replace( mRx, mReplacement );
831 }
832}
833
835{
836 QgsStringMap map;
837 map.insert( u"match"_s, mMatch );
838 map.insert( u"replace"_s, mReplacement );
839 map.insert( u"caseSensitive"_s, mCaseSensitive ? u"1"_s : u"0"_s );
840 map.insert( u"wholeWord"_s, mWholeWordOnly ? u"1"_s : u"0"_s );
841 return map;
842}
843
845{
846 return QgsStringReplacement( properties.value( u"match"_s ), properties.value( u"replace"_s ), properties.value( u"caseSensitive"_s, u"0"_s ) == "1"_L1, properties.value( u"wholeWord"_s, u"0"_s ) == "1"_L1 );
847}
848
849QString QgsStringReplacementCollection::process( const QString &input ) const
850{
851 QString result = input;
852 for ( const QgsStringReplacement &r : mReplacements )
853 {
854 result = r.process( result );
855 }
856 return result;
857}
858
859void QgsStringReplacementCollection::writeXml( QDomElement &elem, QDomDocument &doc ) const
860{
861 for ( const QgsStringReplacement &r : mReplacements )
862 {
863 QgsStringMap props = r.properties();
864 QDomElement propEl = doc.createElement( u"replacement"_s );
865 QgsStringMap::const_iterator it = props.constBegin();
866 for ( ; it != props.constEnd(); ++it )
867 {
868 propEl.setAttribute( it.key(), it.value() );
869 }
870 elem.appendChild( propEl );
871 }
872}
873
874void QgsStringReplacementCollection::readXml( const QDomElement &elem )
875{
876 mReplacements.clear();
877 QDomNodeList nodelist = elem.elementsByTagName( u"replacement"_s );
878 for ( int i = 0; i < nodelist.count(); i++ )
879 {
880 QDomElement replacementElem = nodelist.at( i ).toElement();
881 QDomNamedNodeMap nodeMap = replacementElem.attributes();
882
883 QgsStringMap props;
884 for ( int j = 0; j < nodeMap.count(); ++j )
885 {
886 props.insert( nodeMap.item( j ).nodeName(), nodeMap.item( j ).nodeValue() );
887 }
888 mReplacements << QgsStringReplacement::fromProperties( props );
889 }
890}
Capitalization
String capitalization options.
Definition qgis.h:3503
@ AllSmallCaps
Force all characters to small caps.
Definition qgis.h:3511
@ MixedCase
Mixed case, ie no change.
Definition qgis.h:3504
@ UpperCamelCase
Convert the string to upper camel case. Note that this method does not unaccent characters.
Definition qgis.h:3510
@ AllLowercase
Convert all characters to lowercase.
Definition qgis.h:3506
@ TitleCase
Simple title case conversion - does not fully grammatically parse the text and uses simple rules only...
Definition qgis.h:3509
@ SmallCaps
Mixed case small caps.
Definition qgis.h:3508
@ ForceFirstLetterToCapital
Convert just the first letter of each word to uppercase, leave the rest untouched.
Definition qgis.h:3507
@ AllUppercase
Convert all characters to uppercase.
Definition qgis.h:3505
void readXml(const QDomElement &elem)
Reads the collection state from an XML element.
QString process(const QString &input) const
Processes a given input string, applying any valid replacements which should be made using QgsStringR...
void writeXml(QDomElement &elem, QDomDocument &doc) const
Writes the collection state to an XML element.
A representation of a single string replacement.
static QgsStringReplacement fromProperties(const QgsStringMap &properties)
Creates a new QgsStringReplacement from an encoded properties map.
QString process(const QString &input) const
Processes a given input string, applying any valid replacements which should be made.
bool wholeWordOnly() const
Returns true if match only applies to whole words, or false if partial word matches are permitted.
QString replacement() const
Returns the string to replace matches with.
bool caseSensitive() const
Returns true if match is case sensitive.
QgsStringReplacement(const QString &match, const QString &replacement, bool caseSensitive=false, bool wholeWordOnly=false)
Constructor for QgsStringReplacement.
QString match() const
Returns the string matched by this object.
QgsStringMap properties() const
Returns a map of the replacement properties.
static int hammingDistance(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the Hamming distance between two strings.
static QString soundex(const QString &string)
Returns the Soundex representation of a string.
static QHash< QString, QString > UNACCENT_MAP
Lookup table used by unaccent().
static int levenshteinDistance(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the Levenshtein edit distance between two strings.
static QString htmlToMarkdown(const QString &html)
Convert simple HTML to markdown.
static QString longestCommonSubstring(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the longest common substring between two strings.
static QString capitalize(const QString &string, Qgis::Capitalization capitalization)
Converts a string by applying capitalization rules to the string.
static QString substituteVerticalCharacters(QString string)
Returns a string with characters having vertical representation form substituted.
static QString unaccent(const QString &input)
Removes accents and other diacritical marks from a string, replacing accented characters with their u...
static bool containsByWord(const QString &candidate, const QString &words, Qt::CaseSensitivity sensitivity=Qt::CaseInsensitive)
Given a candidate string, returns true if the candidate contains all the individual words from anothe...
static QString insertLinks(const QString &string, bool *foundLinks=nullptr)
Returns a string with any URL (e.g., http(s)/ftp) and mailto: text converted to valid HTML <a ....
static double fuzzyScore(const QString &candidate, const QString &search)
Tests a candidate string to see how likely it is a match for a specified search string.
static QString qRegExpEscape(const QString &string)
Returns an escaped string matching the behavior of QRegExp::escape.
static QString ampersandEncode(const QString &string)
Makes a raw string safe for inclusion as a HTML/XML string literal.
static QString wordWrap(const QString &string, int length, bool useMaxLineLength=true, const QString &customDelimiter=QString())
Automatically wraps a string by inserting new line characters at appropriate locations in the string.
static bool isUrl(const QString &string)
Returns whether the string is a URL (http,https,ftp,file).
static QString truncateMiddleOfString(const QString &string, int maxLength)
Truncates a string to the specified maximum character length.
static QHash< QString, QString > createUnaccentMap()
Generates the unaccent mapping table (auto-generated by script at build time).
As part of the API refactoring and improvements which landed in the Processing API was substantially reworked from the x version This was done in order to allow much of the underlying Processing framework to be ported into c
QMap< QString, QString > QgsStringMap
Definition qgis.h:7475
#define FUZZY_SCORE_CONSECUTIVE_MATCH
#define FUZZY_SCORE_WORD_MATCH
#define FUZZY_SCORE_NEW_MATCH