22#include <QRegularExpression>
24#include <QTextBoundaryFinder>
29 if (
string.isEmpty() )
32 switch ( capitalization )
39 return string.toUpper();
43 return string.toLower();
47 QString temp = string;
49 QTextBoundaryFinder wordSplitter( QTextBoundaryFinder::Word,
string.constData(),
string.length(),
nullptr, 0 );
50 QTextBoundaryFinder letterSplitter( QTextBoundaryFinder::Grapheme,
string.constData(),
string.length(),
nullptr, 0 );
52 wordSplitter.setPosition( 0 );
54 while ( ( first && wordSplitter.boundaryReasons() & QTextBoundaryFinder::StartOfItem )
55 || wordSplitter.toNextBoundary() >= 0 )
58 letterSplitter.setPosition( wordSplitter.position() );
59 ( void )letterSplitter.toNextBoundary();
60 QString substr =
string.mid( wordSplitter.position(), letterSplitter.position() - wordSplitter.position() );
61 temp.replace( wordSplitter.position(), substr.length(), substr.toUpper() );
70 static QStringList smallWords;
71 static QStringList newPhraseSeparators;
72 static QRegularExpression splitWords;
73 if ( smallWords.empty() )
75 smallWords = QObject::tr(
"a|an|and|as|at|but|by|en|for|if|in|nor|of|on|or|per|s|the|to|vs.|vs|via" ).split(
'|' );
76 newPhraseSeparators = QObject::tr(
".|:" ).split(
'|' );
77 splitWords = QRegularExpression( QStringLiteral(
"\\b" ), QRegularExpression::UseUnicodePropertiesOption );
80 const bool allSameCase =
string.toLower() ==
string ||
string.toUpper() == string;
81 const QStringList parts = ( allSameCase ?
string.toLower() :
string ).split( splitWords, Qt::SkipEmptyParts );
83 bool firstWord =
true;
85 int lastWord = parts.count() - 1;
86 for (
const QString &word : std::as_const( parts ) )
88 if ( newPhraseSeparators.contains( word.trimmed() ) )
93 else if ( firstWord || ( i == lastWord ) || !smallWords.contains( word ) )
95 result += word.at( 0 ).toUpper() + word.mid( 1 );
109 result.remove(
' ' );
120 for (
int i = 0; i <
string.size(); ++i )
122 QChar ch =
string.at( i );
123 if ( ch.unicode() > 160 )
124 encoded += QStringLiteral(
"&#%1;" ).arg(
static_cast< int >( ch.unicode() ) );
125 else if ( ch.unicode() == 38 )
126 encoded += QLatin1String(
"&" );
127 else if ( ch.unicode() == 60 )
128 encoded += QLatin1String(
"<" );
129 else if ( ch.unicode() == 62 )
130 encoded += QLatin1String(
">" );
139 int length1 = string1.length();
140 int length2 = string2.length();
143 if ( string1.isEmpty() )
147 else if ( string2.isEmpty() )
153 QString s1( caseSensitive ? string1 : string1.toLower() );
154 QString s2( caseSensitive ? string2 : string2.toLower() );
156 const QChar *s1Char = s1.constData();
157 const QChar *s2Char = s2.constData();
160 int commonPrefixLen = 0;
161 while ( length1 > 0 && length2 > 0 && *s1Char == *s2Char )
171 while ( length1 > 0 && length2 > 0 && s1.at( commonPrefixLen + length1 - 1 ) == s2.at( commonPrefixLen + length2 - 1 ) )
182 else if ( length2 == 0 )
188 if ( length1 > length2 )
191 std::swap( length1, length2 );
195 std::vector< int > col( length2 + 1, 0 );
196 std::vector< int > prevCol;
197 prevCol.reserve( length2 + 1 );
198 for (
int i = 0; i < length2 + 1; ++i )
200 prevCol.emplace_back( i );
202 const QChar *s2start = s2Char;
203 for (
int i = 0; i < length1; ++i )
207 for (
int j = 0; j < length2; ++j )
209 col[j + 1] = std::min( std::min( 1 + col[j], 1 + prevCol[1 + j] ), prevCol[j] + ( ( *s1Char == *s2Char ) ? 0 : 1 ) );
215 return prevCol[length2];
220 if ( string1.isEmpty() || string2.isEmpty() )
227 QString s1( caseSensitive ? string1 : string1.toLower() );
228 QString s2( caseSensitive ? string2 : string2.toLower() );
236 int *currentScores =
new int [ s2.length()];
237 int *previousScores =
new int [ s2.length()];
238 int maxCommonLength = 0;
239 int lastMaxBeginIndex = 0;
241 const QChar *s1Char = s1.constData();
242 const QChar *s2Char = s2.constData();
243 const QChar *s2Start = s2Char;
245 for (
int i = 0; i < s1.length(); ++i )
247 for (
int j = 0; j < s2.length(); ++j )
249 if ( *s1Char != *s2Char )
251 currentScores[j] = 0;
255 if ( i == 0 || j == 0 )
257 currentScores[j] = 1;
261 currentScores[j] = 1 + previousScores[j - 1];
264 if ( maxCommonLength < currentScores[j] )
266 maxCommonLength = currentScores[j];
267 lastMaxBeginIndex = i;
272 std::swap( currentScores, previousScores );
276 delete [] currentScores;
277 delete [] previousScores;
278 return string1.mid( lastMaxBeginIndex - maxCommonLength + 1, maxCommonLength );
283 if ( string1.isEmpty() && string2.isEmpty() )
289 if ( string1.length() != string2.length() )
296 QString s1( caseSensitive ? string1 : string1.toLower() );
297 QString s2( caseSensitive ? string2 : string2.toLower() );
306 const QChar *s1Char = s1.constData();
307 const QChar *s2Char = s2.constData();
309 for (
int i = 0; i < string1.length(); ++i )
311 if ( *s1Char != *s2Char )
322 if (
string.isEmpty() )
325 QString tmp =
string.toUpper();
328 QChar *char1 = tmp.data();
329 QChar *char2 = tmp.data();
331 for (
int i = 0; i < tmp.length(); ++i, ++char2 )
333 if ( ( *char2 ).unicode() >= 0x41 && ( *char2 ).unicode() <= 0x5A && ( i == 0 || ( ( *char2 ).unicode() != 0x41 && ( *char2 ).unicode() != 0x45
334 && ( *char2 ).unicode() != 0x48 && ( *char2 ).unicode() != 0x49
335 && ( *char2 ).unicode() != 0x4F && ( *char2 ).unicode() != 0x55
336 && ( *char2 ).unicode() != 0x57 && ( *char2 ).unicode() != 0x59 ) ) )
343 tmp.truncate( outLen );
345 QChar *tmpChar = tmp.data();
347 for (
int i = 1; i < tmp.length(); ++i, ++tmpChar )
349 switch ( ( *tmpChar ).unicode() )
355 tmp.replace( i, 1, QChar( 0x31 ) );
366 tmp.replace( i, 1, QChar( 0x32 ) );
371 tmp.replace( i, 1, QChar( 0x33 ) );
375 tmp.replace( i, 1, QChar( 0x34 ) );
380 tmp.replace( i, 1, QChar( 0x35 ) );
384 tmp.replace( i, 1, QChar( 0x36 ) );
394 for (
int i = 1; i < tmp.length(); ++i, ++char2 )
396 if ( *char2 != *char1 )
405 tmp.truncate( outLen );
406 if ( tmp.length() < 4 )
418 QString candidateNormalized = candidate.simplified().normalized( QString:: NormalizationForm_C ).toLower();
419 QString searchNormalized = search.simplified().normalized( QString:: NormalizationForm_C ).toLower();
421 int candidateLength = candidateNormalized.length();
422 int searchLength = searchNormalized.length();
426 if ( candidateLength == 0 || searchLength == 0 )
429 int candidateIdx = 0;
434 bool isPreviousIndexMatching =
false;
435 bool isWordOpen =
true;
438 while ( candidateIdx < candidateLength )
440 QChar candidateChar = candidateNormalized[ candidateIdx++ ];
441 bool isCandidateCharWordEnd = candidateChar ==
' ' || candidateChar.isPunct();
444 if ( candidateIdx == 1 )
447 else if ( isCandidateCharWordEnd )
454 if ( searchIdx >= searchLength )
457 QChar searchChar = searchNormalized[ searchIdx ];
458 bool isSearchCharWordEnd = searchChar ==
' ' || searchChar.isPunct();
461 if ( candidateChar == searchChar || ( isCandidateCharWordEnd && isSearchCharWordEnd ) )
466 if ( isSearchCharWordEnd )
470 else if ( isPreviousIndexMatching )
478 else if ( isPreviousIndexMatching )
488 isPreviousIndexMatching =
true;
493 isPreviousIndexMatching =
false;
498 if ( searchIdx >= searchLength )
500 bool isEndOfWord = ( candidateIdx >= candidateLength )
502 : candidateNormalized[candidateIdx] ==
' ' || candidateNormalized[candidateIdx].isPunct();
513 if ( searchIdx < searchLength )
516 return static_cast<float>( std::max( score, 0 ) ) / std::max( maxScore, 1 );
522 QString converted = string;
526 const thread_local QRegularExpression urlRegEx( QStringLiteral(
"((?:(?:http|https|ftp|file)://[^\\s]+[^\\s,.]+)|(?:\\b(([\\w-]+://?|www[.])[^\\s()<>]+(?:\\([\\w\\d]+\\)|([^!\"#$%&'()*+,\\-./:;<=>?@[\\\\\\]^_`{|}~\\s]|/)))))" ) );
527 const thread_local QRegularExpression protoRegEx( QStringLiteral(
"^(?:f|ht)tps?://|file://" ) );
528 const thread_local QRegularExpression emailRegEx( QStringLiteral(
"([\\w._%+-]+@[\\w.-]+\\.[A-Za-z]+)" ) );
532 QRegularExpressionMatch match = urlRegEx.match( converted );
533 while ( match.hasMatch() )
536 QString url = match.captured( 1 );
537 QString protoUrl = url;
538 if ( !protoRegEx.match( protoUrl ).hasMatch() )
540 protoUrl.prepend(
"http://" );
542 QString anchor = QStringLiteral(
"<a href=\"%1\">%2</a>" ).arg( protoUrl.toHtmlEscaped(), url.toHtmlEscaped() );
543 converted.replace( match.capturedStart( 1 ), url.length(), anchor );
544 offset = match.capturedStart( 1 ) + anchor.length();
545 match = urlRegEx.match( converted, offset );
549 match = emailRegEx.match( converted );
550 while ( match.hasMatch() )
553 QString email = match.captured( 1 );
554 QString anchor = QStringLiteral(
"<a href=\"mailto:%1\">%1</a>" ).arg( email.toHtmlEscaped() );
555 converted.replace( match.capturedStart( 1 ), email.length(), anchor );
556 offset = match.capturedStart( 1 ) + anchor.length();
557 match = emailRegEx.match( converted, offset );
568 const thread_local QRegularExpression rxUrl( QStringLiteral(
"^(http|https|ftp|file)://\\S+$" ) );
569 return rxUrl.match(
string ).hasMatch();
575 QString converted = html;
576 converted.replace( QLatin1String(
"<br>" ), QLatin1String(
"\n" ) );
577 converted.replace( QLatin1String(
"<b>" ), QLatin1String(
"**" ) );
578 converted.replace( QLatin1String(
"</b>" ), QLatin1String(
"**" ) );
579 converted.replace( QLatin1String(
"<pre>" ), QLatin1String(
"\n```\n" ) );
580 converted.replace( QLatin1String(
"</pre>" ), QLatin1String(
"```\n" ) );
582 const thread_local QRegularExpression hrefRegEx( QStringLiteral(
"<a\\s+href\\s*=\\s*([^<>]*)\\s*>([^<>]*)</a>" ) );
585 QRegularExpressionMatch match = hrefRegEx.match( converted );
586 while ( match.hasMatch() )
588 QString url = match.captured( 1 ).replace( QLatin1String(
"\"" ), QString() );
589 url.replace(
'\'', QString() );
590 QString name = match.captured( 2 );
591 QString anchor = QStringLiteral(
"[%1](%2)" ).arg( name, url );
592 converted.replace( match.capturedStart(), match.capturedLength(), anchor );
593 offset = match.capturedStart() + anchor.length();
594 match = hrefRegEx.match( converted, offset );
600QString
QgsStringUtils::wordWrap(
const QString &
string,
const int length,
const bool useMaxLineLength,
const QString &customDelimiter )
602 if (
string.isEmpty() || length == 0 )
606 QRegularExpression rx;
607 int delimiterLength = 0;
609 if ( !customDelimiter.isEmpty() )
611 rx.setPattern( QRegularExpression::escape( customDelimiter ) );
612 delimiterLength = customDelimiter.length();
617 rx.setPattern( QStringLiteral(
"[\\x{200B}\\s]" ) );
621 const QStringList lines =
string.split(
'\n' );
622 int strLength, strCurrent, strHit, lastHit;
624 for (
int i = 0; i < lines.size(); i++ )
626 const QString line = lines.at( i );
627 strLength = line.length();
628 if ( strLength <= length )
631 newstr.append( line );
632 if ( i < lines.size() - 1 )
633 newstr.append(
'\n' );
640 while ( strCurrent < strLength )
644 if ( useMaxLineLength )
647 strHit = ( strCurrent + length >= strLength ) ? -1 : line.lastIndexOf( rx, strCurrent + length );
648 if ( strHit == lastHit || strHit == -1 )
651 strHit = ( strCurrent + std::abs( length ) >= strLength ) ? -1 : line.indexOf( rx, strCurrent + std::abs( length ) );
657 strHit = ( strCurrent + std::abs( length ) >= strLength ) ? -1 : line.indexOf( rx, strCurrent + std::abs( length ) );
661 newstr.append( QStringView {line} .mid( strCurrent, strHit - strCurrent ) );
662 newstr.append(
'\n' );
663 strCurrent = strHit + delimiterLength;
667 newstr.append( QStringView {line} .mid( strCurrent ) );
668 strCurrent = strLength;
671 if ( i < lines.size() - 1 )
672 newstr.append(
'\n' );
680 string =
string.replace(
',', QChar( 65040 ) ).replace( QChar( 8229 ), QChar( 65072 ) );
681 string =
string.replace( QChar( 12289 ), QChar( 65041 ) ).replace( QChar( 12290 ), QChar( 65042 ) );
682 string =
string.replace(
':', QChar( 65043 ) ).replace(
';', QChar( 65044 ) );
683 string =
string.replace(
'!', QChar( 65045 ) ).replace(
'?', QChar( 65046 ) );
684 string =
string.replace( QChar( 12310 ), QChar( 65047 ) ).replace( QChar( 12311 ), QChar( 65048 ) );
685 string =
string.replace( QChar( 8230 ), QChar( 65049 ) );
686 string =
string.replace( QChar( 8212 ), QChar( 65073 ) ).replace( QChar( 8211 ), QChar( 65074 ) );
687 string =
string.replace(
'_', QChar( 65075 ) ).replace( QChar( 65103 ), QChar( 65076 ) );
688 string =
string.replace(
'(', QChar( 65077 ) ).replace(
')', QChar( 65078 ) );
689 string =
string.replace(
'{', QChar( 65079 ) ).replace(
'}', QChar( 65080 ) );
690 string =
string.replace(
'<', QChar( 65087 ) ).replace(
'>', QChar( 65088 ) );
691 string =
string.replace(
'[', QChar( 65095 ) ).replace(
']', QChar( 65096 ) );
692 string =
string.replace( QChar( 12308 ), QChar( 65081 ) ).replace( QChar( 12309 ), QChar( 65082 ) );
693 string =
string.replace( QChar( 12304 ), QChar( 65083 ) ).replace( QChar( 12305 ), QChar( 65084 ) );
694 string =
string.replace( QChar( 12298 ), QChar( 65085 ) ).replace( QChar( 12299 ), QChar( 65086 ) );
695 string =
string.replace( QChar( 12300 ), QChar( 65089 ) ).replace( QChar( 12301 ), QChar( 65090 ) );
696 string =
string.replace( QChar( 12302 ), QChar( 65091 ) ).replace( QChar( 12303 ), QChar( 65092 ) );
703 const QLatin1Char backslash(
'\\' );
704 const int count =
string.count();
707 escaped.reserve( count * 2 );
708 for (
int i = 0; i < count; i++ )
710 switch (
string.at( i ).toLatin1() )
726 escaped.append( backslash );
728 escaped.append(
string.at( i ) );
735 const int charactersToTruncate =
string.length() - maxLength;
736 if ( charactersToTruncate <= 0 )
740 const int truncateFrom =
string.length() / 2 - ( charactersToTruncate + 1 ) / 2;
741 if ( truncateFrom <= 0 )
742 return QChar( 0x2026 );
744#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
745 return string.leftRef( truncateFrom ) + QString( QChar( 0x2026 ) ) +
string.midRef( truncateFrom + charactersToTruncate + 1 );
747 return QStringView(
string ).first( truncateFrom ) + QString( QChar( 0x2026 ) ) + QStringView(
string ).sliced( truncateFrom + charactersToTruncate + 1 );
753 if ( candidate.trimmed().isEmpty() )
756 const thread_local QRegularExpression rxWhitespace( QStringLiteral(
"\\s+" ) );
757 const QStringList parts = words.split( rxWhitespace, Qt::SkipEmptyParts );
760 for (
const QString &word : parts )
762 if ( !candidate.contains( word, sensitivity ) )
774 if ( mWholeWordOnly )
776 mRx.setPattern( QStringLiteral(
"\\b%1\\b" ).arg( mMatch ) );
777 mRx.setPatternOptions( mCaseSensitive ? QRegularExpression::NoPatternOption : QRegularExpression::CaseInsensitiveOption );
783 QString result = input;
784 if ( !mWholeWordOnly )
786 return result.replace( mMatch, mReplacement, mCaseSensitive ? Qt::CaseSensitive : Qt::CaseInsensitive );
790 return result.replace( mRx, mReplacement );
797 map.insert( QStringLiteral(
"match" ), mMatch );
798 map.insert( QStringLiteral(
"replace" ), mReplacement );
799 map.insert( QStringLiteral(
"caseSensitive" ), mCaseSensitive ? QStringLiteral(
"1" ) : QStringLiteral(
"0" ) );
800 map.insert( QStringLiteral(
"wholeWord" ), mWholeWordOnly ? QStringLiteral(
"1" ) : QStringLiteral(
"0" ) );
807 properties.value( QStringLiteral(
"replace" ) ),
808 properties.value( QStringLiteral(
"caseSensitive" ), QStringLiteral(
"0" ) ) == QLatin1String(
"1" ),
809 properties.value( QStringLiteral(
"wholeWord" ), QStringLiteral(
"0" ) ) == QLatin1String(
"1" ) );
814 QString result = input;
817 result = r.process( result );
827 QDomElement propEl = doc.createElement( QStringLiteral(
"replacement" ) );
828 QgsStringMap::const_iterator it = props.constBegin();
829 for ( ; it != props.constEnd(); ++it )
831 propEl.setAttribute( it.key(), it.value() );
833 elem.appendChild( propEl );
839 mReplacements.clear();
840 QDomNodeList nodelist = elem.elementsByTagName( QStringLiteral(
"replacement" ) );
841 for (
int i = 0; i < nodelist.count(); i++ )
843 QDomElement replacementElem = nodelist.at( i ).toElement();
844 QDomNamedNodeMap nodeMap = replacementElem.attributes();
847 for (
int j = 0; j < nodeMap.count(); ++j )
849 props.insert( nodeMap.item( j ).nodeName(), nodeMap.item( j ).nodeValue() );
Capitalization
String capitalization options.
@ AllSmallCaps
Force all characters to small caps.
@ MixedCase
Mixed case, ie no change.
@ UpperCamelCase
Convert the string to upper camel case. Note that this method does not unaccent characters.
@ AllLowercase
Convert all characters to lowercase.
@ TitleCase
Simple title case conversion - does not fully grammatically parse the text and uses simple rules only...
@ SmallCaps
Mixed case small caps.
@ ForceFirstLetterToCapital
Convert just the first letter of each word to uppercase, leave the rest untouched.
@ AllUppercase
Convert all characters to uppercase.
void readXml(const QDomElement &elem)
Reads the collection state from an XML element.
QString process(const QString &input) const
Processes a given input string, applying any valid replacements which should be made using QgsStringR...
void writeXml(QDomElement &elem, QDomDocument &doc) const
Writes the collection state to an XML element.
A representation of a single string replacement.
static QgsStringReplacement fromProperties(const QgsStringMap &properties)
Creates a new QgsStringReplacement from an encoded properties map.
QString process(const QString &input) const
Processes a given input string, applying any valid replacements which should be made.
bool wholeWordOnly() const
Returns true if match only applies to whole words, or false if partial word matches are permitted.
QString replacement() const
Returns the string to replace matches with.
bool caseSensitive() const
Returns true if match is case sensitive.
QgsStringReplacement(const QString &match, const QString &replacement, bool caseSensitive=false, bool wholeWordOnly=false)
Constructor for QgsStringReplacement.
QString match() const
Returns the string matched by this object.
QgsStringMap properties() const
Returns a map of the replacement properties.
static int hammingDistance(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the Hamming distance between two strings.
static QString soundex(const QString &string)
Returns the Soundex representation of a string.
static int levenshteinDistance(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the Levenshtein edit distance between two strings.
static QString htmlToMarkdown(const QString &html)
Convert simple HTML to markdown.
static QString longestCommonSubstring(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the longest common substring between two strings.
static QString capitalize(const QString &string, Qgis::Capitalization capitalization)
Converts a string by applying capitalization rules to the string.
static QString substituteVerticalCharacters(QString string)
Returns a string with characters having vertical representation form substituted.
static bool containsByWord(const QString &candidate, const QString &words, Qt::CaseSensitivity sensitivity=Qt::CaseInsensitive)
Given a candidate string, returns true if the candidate contains all the individual words from anothe...
static QString insertLinks(const QString &string, bool *foundLinks=nullptr)
Returns a string with any URL (e.g., http(s)/ftp) and mailto: text converted to valid HTML <a ....
static double fuzzyScore(const QString &candidate, const QString &search)
Tests a candidate string to see how likely it is a match for a specified search string.
static QString qRegExpEscape(const QString &string)
Returns an escaped string matching the behavior of QRegExp::escape.
static QString ampersandEncode(const QString &string)
Makes a raw string safe for inclusion as a HTML/XML string literal.
static QString wordWrap(const QString &string, int length, bool useMaxLineLength=true, const QString &customDelimiter=QString())
Automatically wraps a string by inserting new line characters at appropriate locations in the string.
static bool isUrl(const QString &string)
Returns whether the string is a URL (http,https,ftp,file).
static QString truncateMiddleOfString(const QString &string, int maxLength)
Truncates a string to the specified maximum character length.
QMap< QString, QString > QgsStringMap
#define FUZZY_SCORE_CONSECUTIVE_MATCH
#define FUZZY_SCORE_WORD_MATCH
#define FUZZY_SCORE_NEW_MATCH