22#include <QRegularExpression>
25#include <QTextBoundaryFinder>
28using namespace Qt::StringLiterals;
38 const QString in = input.normalized( QString::NormalizationForm_C );
40 out.reserve( in.size() );
43 const qsizetype n = in.size();
47 const QChar
c = in.at( i );
51 if (
c.isHighSurrogate() && i + 1 < n )
53 const QChar c2 = in.at( i + 1 );
54 if ( c2.isLowSurrogate() )
58 const QString key = in.mid( i, len ).normalized( QString::NormalizationForm_C );
62 out.append( it.value() );
74 if (
string.isEmpty() )
77 switch ( capitalization )
84 return string.toUpper();
88 return string.toLower();
92 QString temp = string;
94 QTextBoundaryFinder wordSplitter( QTextBoundaryFinder::Word,
string.constData(),
string.length(),
nullptr, 0 );
95 QTextBoundaryFinder letterSplitter( QTextBoundaryFinder::Grapheme,
string.constData(),
string.length(),
nullptr, 0 );
97 wordSplitter.setPosition( 0 );
99 while ( ( first && wordSplitter.boundaryReasons() & QTextBoundaryFinder::StartOfItem ) || wordSplitter.toNextBoundary() >= 0 )
102 letterSplitter.setPosition( wordSplitter.position() );
103 ( void ) letterSplitter.toNextBoundary();
104 QString substr =
string.mid( wordSplitter.position(), letterSplitter.position() - wordSplitter.position() );
105 temp.replace( wordSplitter.position(), substr.length(), substr.toUpper() );
114 static QStringList smallWords;
115 static QStringList newPhraseSeparators;
116 static QRegularExpression splitWords;
117 if ( smallWords.empty() )
119 smallWords = QObject::tr(
"a|an|and|as|at|but|by|en|for|if|in|nor|of|on|or|per|s|the|to|vs.|vs|via" ).split(
'|' );
120 newPhraseSeparators = QObject::tr(
".|:" ).split(
'|' );
121 splitWords = QRegularExpression( u
"\\b"_s, QRegularExpression::UseUnicodePropertiesOption );
124 const bool allSameCase =
string.toLower() ==
string ||
string.toUpper() == string;
125 const QStringList parts = ( allSameCase ?
string.toLower() :
string ).split( splitWords, Qt::SkipEmptyParts );
127 bool firstWord =
true;
129 int lastWord = parts.count() - 1;
130 for (
const QString &word : std::as_const( parts ) )
132 if ( newPhraseSeparators.contains( word.trimmed() ) )
137 else if ( firstWord || ( i == lastWord ) || !smallWords.contains( word ) )
139 result += word.at( 0 ).toUpper() + word.mid( 1 );
153 result.remove(
' ' );
164 for (
int i = 0; i <
string.size(); ++i )
166 QChar ch =
string.at( i );
167 if ( ch.unicode() > 160 )
168 encoded += u
"&#%1;"_s.arg(
static_cast< int >( ch.unicode() ) );
169 else if ( ch.unicode() == 38 )
170 encoded +=
"&"_L1;
171 else if ( ch.unicode() == 60 )
172 encoded +=
"<"_L1;
173 else if ( ch.unicode() == 62 )
174 encoded +=
">"_L1;
183 int length1 = string1.length();
184 int length2 = string2.length();
187 if ( string1.isEmpty() )
191 else if ( string2.isEmpty() )
197 QString s1( caseSensitive ? string1 : string1.toLower() );
198 QString s2( caseSensitive ? string2 : string2.toLower() );
200 const QChar *s1Char = s1.constData();
201 const QChar *s2Char = s2.constData();
204 int commonPrefixLen = 0;
205 while ( length1 > 0 && length2 > 0 && *s1Char == *s2Char )
215 while ( length1 > 0 && length2 > 0 && s1.at( commonPrefixLen + length1 - 1 ) == s2.at( commonPrefixLen + length2 - 1 ) )
226 else if ( length2 == 0 )
232 if ( length1 > length2 )
235 std::swap( length1, length2 );
239 std::vector< int > col( length2 + 1, 0 );
240 std::vector< int > prevCol;
241 prevCol.reserve( length2 + 1 );
242 for (
int i = 0; i < length2 + 1; ++i )
244 prevCol.emplace_back( i );
246 const QChar *s2start = s2Char;
247 for (
int i = 0; i < length1; ++i )
251 for (
int j = 0; j < length2; ++j )
253 col[j + 1] = std::min( std::min( 1 + col[j], 1 + prevCol[1 + j] ), prevCol[j] + ( ( *s1Char == *s2Char ) ? 0 : 1 ) );
259 return prevCol[length2];
264 if ( string1.isEmpty() || string2.isEmpty() )
271 QString s1( caseSensitive ? string1 : string1.toLower() );
272 QString s2( caseSensitive ? string2 : string2.toLower() );
280 int *currentScores =
new int[s2.length()];
281 int *previousScores =
new int[s2.length()];
282 int maxCommonLength = 0;
283 int lastMaxBeginIndex = 0;
285 const QChar *s1Char = s1.constData();
286 const QChar *s2Char = s2.constData();
287 const QChar *s2Start = s2Char;
289 for (
int i = 0; i < s1.length(); ++i )
291 for (
int j = 0; j < s2.length(); ++j )
293 if ( *s1Char != *s2Char )
295 currentScores[j] = 0;
299 if ( i == 0 || j == 0 )
301 currentScores[j] = 1;
305 currentScores[j] = 1 + previousScores[j - 1];
308 if ( maxCommonLength < currentScores[j] )
310 maxCommonLength = currentScores[j];
311 lastMaxBeginIndex = i;
316 std::swap( currentScores, previousScores );
320 delete[] currentScores;
321 delete[] previousScores;
322 return string1.mid( lastMaxBeginIndex - maxCommonLength + 1, maxCommonLength );
327 if ( string1.isEmpty() && string2.isEmpty() )
333 if ( string1.length() != string2.length() )
340 QString s1( caseSensitive ? string1 : string1.toLower() );
341 QString s2( caseSensitive ? string2 : string2.toLower() );
350 const QChar *s1Char = s1.constData();
351 const QChar *s2Char = s2.constData();
353 for (
int i = 0; i < string1.length(); ++i )
355 if ( *s1Char != *s2Char )
366 if (
string.isEmpty() )
369 QString tmp =
string.toUpper();
372 QChar *char1 = tmp.data();
373 QChar *char2 = tmp.data();
375 for (
int i = 0; i < tmp.length(); ++i, ++char2 )
377 if ( ( *char2 ).unicode() >= 0x41 && ( *char2 ).unicode() <= 0x5A && ( i == 0 || ( ( *char2 ).unicode() != 0x41 && ( *char2 ).unicode() != 0x45
378 && ( *char2 ).unicode() != 0x48 && ( *char2 ).unicode() != 0x49
379 && ( *char2 ).unicode() != 0x4F && ( *char2 ).unicode() != 0x55
380 && ( *char2 ).unicode() != 0x57 && ( *char2 ).unicode() != 0x59 ) ) )
387 tmp.truncate( outLen );
389 QChar *tmpChar = tmp.data();
391 for (
int i = 1; i < tmp.length(); ++i, ++tmpChar )
393 switch ( ( *tmpChar ).unicode() )
399 tmp.replace( i, 1, QChar( 0x31 ) );
410 tmp.replace( i, 1, QChar( 0x32 ) );
415 tmp.replace( i, 1, QChar( 0x33 ) );
419 tmp.replace( i, 1, QChar( 0x34 ) );
424 tmp.replace( i, 1, QChar( 0x35 ) );
428 tmp.replace( i, 1, QChar( 0x36 ) );
438 for (
int i = 1; i < tmp.length(); ++i, ++char2 )
440 if ( *char2 != *char1 )
449 tmp.truncate( outLen );
450 if ( tmp.length() < 4 )
462 QString candidateNormalized = candidate.simplified().normalized( QString::NormalizationForm_C ).toLower();
463 QString searchNormalized = search.simplified().normalized( QString::NormalizationForm_C ).toLower();
465 int candidateLength = candidateNormalized.length();
466 int searchLength = searchNormalized.length();
470 if ( candidateLength == 0 || searchLength == 0 )
473 int candidateIdx = 0;
478 bool isPreviousIndexMatching =
false;
479 bool isWordOpen =
true;
482 while ( candidateIdx < candidateLength )
484 QChar candidateChar = candidateNormalized[candidateIdx++];
485 bool isCandidateCharWordEnd = candidateChar ==
' ' || candidateChar.isPunct();
488 if ( candidateIdx == 1 )
491 else if ( isCandidateCharWordEnd )
498 if ( searchIdx >= searchLength )
501 QChar searchChar = searchNormalized[searchIdx];
502 bool isSearchCharWordEnd = searchChar ==
' ' || searchChar.isPunct();
505 if ( candidateChar == searchChar || ( isCandidateCharWordEnd && isSearchCharWordEnd ) )
510 if ( isSearchCharWordEnd )
514 else if ( isPreviousIndexMatching )
522 else if ( isPreviousIndexMatching )
532 isPreviousIndexMatching =
true;
537 isPreviousIndexMatching =
false;
542 if ( searchIdx >= searchLength )
544 bool isEndOfWord = ( candidateIdx >= candidateLength ) ?
true : candidateNormalized[candidateIdx] ==
' ' || candidateNormalized[candidateIdx].isPunct();
555 if ( searchIdx < searchLength )
558 return static_cast<float>( std::max( score, 0 ) ) / std::max( maxScore, 1 );
564 QString converted = string;
568 const thread_local QRegularExpression urlRegEx(
569 u
"((?:(?:http|https|ftp|file)://[^\\s]+[^\\s,.]+)|(?:\\b(([\\w-]+://?|www[.])[^\\s()<>]+(?:\\([\\w\\d]+\\)|([^!\"#$%&'()*+,\\-./:;<=>?@[\\\\\\]^_`{|}~\\s]|/)))))"_s
571 const thread_local QRegularExpression protoRegEx( u
"^(?:f|ht)tps?://|file://"_s );
572 const thread_local QRegularExpression emailRegEx( u
"([\\w._%+-]+@[\\w.-]+\\.[A-Za-z]+)"_s );
576 QRegularExpressionMatch match = urlRegEx.match( converted );
577 while ( match.hasMatch() )
580 QString url = match.captured( 1 );
581 QString protoUrl = url;
582 if ( !protoRegEx.match( protoUrl ).hasMatch() )
584 protoUrl.prepend(
"http://" );
586 QString anchor = u
"<a href=\"%1\">%2</a>"_s.arg( protoUrl.toHtmlEscaped(), url.toHtmlEscaped() );
587 converted.replace( match.capturedStart( 1 ), url.length(), anchor );
588 offset = match.capturedStart( 1 ) + anchor.length();
589 match = urlRegEx.match( converted, offset );
593 match = emailRegEx.match( converted );
594 while ( match.hasMatch() )
597 QString email = match.captured( 1 );
598 QString anchor = u
"<a href=\"mailto:%1\">%1</a>"_s.arg( email.toHtmlEscaped() );
599 converted.replace( match.capturedStart( 1 ), email.length(), anchor );
600 offset = match.capturedStart( 1 ) + anchor.length();
601 match = emailRegEx.match( converted, offset );
612 const thread_local QRegularExpression rxUrl( u
"^(http|https|ftp|file)://\\S+$"_s );
613 return rxUrl.match(
string ).hasMatch();
619 QString converted = html;
620 converted.replace(
"<br>"_L1,
"\n"_L1 );
621 converted.replace(
"<b>"_L1,
"**"_L1 );
622 converted.replace(
"</b>"_L1,
"**"_L1 );
623 converted.replace(
"<pre>"_L1,
"\n```\n"_L1 );
624 converted.replace(
"</pre>"_L1,
"```\n"_L1 );
626 const thread_local QRegularExpression hrefRegEx( u
"<a\\s+href\\s*=\\s*([^<>]*)\\s*>([^<>]*)</a>"_s );
629 QRegularExpressionMatch match = hrefRegEx.match( converted );
630 while ( match.hasMatch() )
632 QString url = match.captured( 1 ).replace(
"\""_L1, QString() );
633 url.replace(
'\'', QString() );
634 QString name = match.captured( 2 );
635 QString anchor = u
"[%1](%2)"_s.arg( name, url );
636 converted.replace( match.capturedStart(), match.capturedLength(), anchor );
637 offset = match.capturedStart() + anchor.length();
638 match = hrefRegEx.match( converted, offset );
644QString
QgsStringUtils::wordWrap(
const QString &
string,
const int length,
const bool useMaxLineLength,
const QString &customDelimiter )
646 if (
string.isEmpty() || length == 0 )
650 QRegularExpression rx;
651 int delimiterLength = 0;
653 if ( !customDelimiter.isEmpty() )
655 rx.setPattern( QRegularExpression::escape( customDelimiter ) );
656 delimiterLength = customDelimiter.length();
661 rx.setPattern( u
"[\\x{200B}\\s]"_s );
665 const QStringList lines =
string.split(
'\n' );
666 int strLength, strCurrent, strHit, lastHit;
668 for (
int i = 0; i < lines.size(); i++ )
670 const QString line = lines.at( i );
671 strLength = line.length();
672 if ( strLength <= length )
675 newstr.append( line );
676 if ( i < lines.size() - 1 )
677 newstr.append(
'\n' );
684 while ( strCurrent < strLength )
688 if ( useMaxLineLength )
691 strHit = ( strCurrent + length >= strLength ) ? -1 : line.lastIndexOf( rx, strCurrent + length );
692 if ( strHit == lastHit || strHit == -1 )
695 strHit = ( strCurrent + std::abs( length ) >= strLength ) ? -1 : line.indexOf( rx, strCurrent + std::abs( length ) );
701 strHit = ( strCurrent + std::abs( length ) >= strLength ) ? -1 : line.indexOf( rx, strCurrent + std::abs( length ) );
705 newstr.append( QStringView { line }.mid( strCurrent, strHit - strCurrent ) );
706 newstr.append(
'\n' );
707 strCurrent = strHit + delimiterLength;
711 newstr.append( QStringView { line }.mid( strCurrent ) );
712 strCurrent = strLength;
715 if ( i < lines.size() - 1 )
716 newstr.append(
'\n' );
724 string =
string.replace(
',', QChar( 65040 ) ).replace( QChar( 8229 ), QChar( 65072 ) );
725 string =
string.replace( QChar( 12289 ), QChar( 65041 ) ).replace( QChar( 12290 ), QChar( 65042 ) );
726 string =
string.replace(
':', QChar( 65043 ) ).replace(
';', QChar( 65044 ) );
727 string =
string.replace(
'!', QChar( 65045 ) ).replace(
'?', QChar( 65046 ) );
728 string =
string.replace( QChar( 12310 ), QChar( 65047 ) ).replace( QChar( 12311 ), QChar( 65048 ) );
729 string =
string.replace( QChar( 8230 ), QChar( 65049 ) );
730 string =
string.replace( QChar( 8212 ), QChar( 65073 ) ).replace( QChar( 8211 ), QChar( 65074 ) );
731 string =
string.replace(
'_', QChar( 65075 ) ).replace( QChar( 65103 ), QChar( 65076 ) );
732 string =
string.replace(
'(', QChar( 65077 ) ).replace(
')', QChar( 65078 ) );
733 string =
string.replace(
'{', QChar( 65079 ) ).replace(
'}', QChar( 65080 ) );
734 string =
string.replace(
'<', QChar( 65087 ) ).replace(
'>', QChar( 65088 ) );
735 string =
string.replace(
'[', QChar( 65095 ) ).replace(
']', QChar( 65096 ) );
736 string =
string.replace( QChar( 12308 ), QChar( 65081 ) ).replace( QChar( 12309 ), QChar( 65082 ) );
737 string =
string.replace( QChar( 12304 ), QChar( 65083 ) ).replace( QChar( 12305 ), QChar( 65084 ) );
738 string =
string.replace( QChar( 12298 ), QChar( 65085 ) ).replace( QChar( 12299 ), QChar( 65086 ) );
739 string =
string.replace( QChar( 12300 ), QChar( 65089 ) ).replace( QChar( 12301 ), QChar( 65090 ) );
740 string =
string.replace( QChar( 12302 ), QChar( 65091 ) ).replace( QChar( 12303 ), QChar( 65092 ) );
747 const QLatin1Char backslash(
'\\' );
748 const int count =
string.count();
751 escaped.reserve( count * 2 );
752 for (
int i = 0; i < count; i++ )
754 switch (
string.at( i ).toLatin1() )
770 escaped.append( backslash );
772 escaped.append(
string.at( i ) );
779 const int charactersToTruncate =
string.length() - maxLength;
780 if ( charactersToTruncate <= 0 )
784 const int truncateFrom =
string.length() / 2 - ( charactersToTruncate + 1 ) / 2;
785 if ( truncateFrom <= 0 )
786 return QChar( 0x2026 );
788 return QStringView(
string ).first( truncateFrom ) + QString( QChar( 0x2026 ) ) + QStringView(
string ).sliced( truncateFrom + charactersToTruncate + 1 );
793 if ( candidate.trimmed().isEmpty() )
796 const thread_local QRegularExpression rxWhitespace( u
"\\s+"_s );
797 const QStringList parts = words.split( rxWhitespace, Qt::SkipEmptyParts );
800 for (
const QString &word : parts )
802 if ( !candidate.contains( word, sensitivity ) )
814 if ( mWholeWordOnly )
816 mRx.setPattern( u
"\\b%1\\b"_s.arg( mMatch ) );
817 mRx.setPatternOptions( mCaseSensitive ? QRegularExpression::NoPatternOption : QRegularExpression::CaseInsensitiveOption );
823 QString result = input;
824 if ( !mWholeWordOnly )
826 return result.replace( mMatch, mReplacement, mCaseSensitive ? Qt::CaseSensitive : Qt::CaseInsensitive );
830 return result.replace( mRx, mReplacement );
837 map.insert( u
"match"_s, mMatch );
838 map.insert( u
"replace"_s, mReplacement );
839 map.insert( u
"caseSensitive"_s, mCaseSensitive ? u
"1"_s : u
"0"_s );
840 map.insert( u
"wholeWord"_s, mWholeWordOnly ? u
"1"_s : u
"0"_s );
851 QString result = input;
854 result = r.process( result );
864 QDomElement propEl = doc.createElement( u
"replacement"_s );
865 QgsStringMap::const_iterator it = props.constBegin();
866 for ( ; it != props.constEnd(); ++it )
868 propEl.setAttribute( it.key(), it.value() );
870 elem.appendChild( propEl );
876 mReplacements.clear();
877 QDomNodeList nodelist = elem.elementsByTagName( u
"replacement"_s );
878 for (
int i = 0; i < nodelist.count(); i++ )
880 QDomElement replacementElem = nodelist.at( i ).toElement();
881 QDomNamedNodeMap nodeMap = replacementElem.attributes();
884 for (
int j = 0; j < nodeMap.count(); ++j )
886 props.insert( nodeMap.item( j ).nodeName(), nodeMap.item( j ).nodeValue() );
Capitalization
String capitalization options.
@ AllSmallCaps
Force all characters to small caps.
@ MixedCase
Mixed case, ie no change.
@ UpperCamelCase
Convert the string to upper camel case. Note that this method does not unaccent characters.
@ AllLowercase
Convert all characters to lowercase.
@ TitleCase
Simple title case conversion - does not fully grammatically parse the text and uses simple rules only...
@ SmallCaps
Mixed case small caps.
@ ForceFirstLetterToCapital
Convert just the first letter of each word to uppercase, leave the rest untouched.
@ AllUppercase
Convert all characters to uppercase.
void readXml(const QDomElement &elem)
Reads the collection state from an XML element.
QString process(const QString &input) const
Processes a given input string, applying any valid replacements which should be made using QgsStringR...
void writeXml(QDomElement &elem, QDomDocument &doc) const
Writes the collection state to an XML element.
A representation of a single string replacement.
static QgsStringReplacement fromProperties(const QgsStringMap &properties)
Creates a new QgsStringReplacement from an encoded properties map.
QString process(const QString &input) const
Processes a given input string, applying any valid replacements which should be made.
bool wholeWordOnly() const
Returns true if match only applies to whole words, or false if partial word matches are permitted.
QString replacement() const
Returns the string to replace matches with.
bool caseSensitive() const
Returns true if match is case sensitive.
QgsStringReplacement(const QString &match, const QString &replacement, bool caseSensitive=false, bool wholeWordOnly=false)
Constructor for QgsStringReplacement.
QString match() const
Returns the string matched by this object.
QgsStringMap properties() const
Returns a map of the replacement properties.
static int hammingDistance(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the Hamming distance between two strings.
static QString soundex(const QString &string)
Returns the Soundex representation of a string.
static QHash< QString, QString > UNACCENT_MAP
Lookup table used by unaccent().
static int levenshteinDistance(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the Levenshtein edit distance between two strings.
static QString htmlToMarkdown(const QString &html)
Convert simple HTML to markdown.
static QString longestCommonSubstring(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the longest common substring between two strings.
static QString capitalize(const QString &string, Qgis::Capitalization capitalization)
Converts a string by applying capitalization rules to the string.
static QString substituteVerticalCharacters(QString string)
Returns a string with characters having vertical representation form substituted.
static QString unaccent(const QString &input)
Removes accents and other diacritical marks from a string, replacing accented characters with their u...
static bool containsByWord(const QString &candidate, const QString &words, Qt::CaseSensitivity sensitivity=Qt::CaseInsensitive)
Given a candidate string, returns true if the candidate contains all the individual words from anothe...
static QString insertLinks(const QString &string, bool *foundLinks=nullptr)
Returns a string with any URL (e.g., http(s)/ftp) and mailto: text converted to valid HTML <a ....
static double fuzzyScore(const QString &candidate, const QString &search)
Tests a candidate string to see how likely it is a match for a specified search string.
static QString qRegExpEscape(const QString &string)
Returns an escaped string matching the behavior of QRegExp::escape.
static QString ampersandEncode(const QString &string)
Makes a raw string safe for inclusion as a HTML/XML string literal.
static QString wordWrap(const QString &string, int length, bool useMaxLineLength=true, const QString &customDelimiter=QString())
Automatically wraps a string by inserting new line characters at appropriate locations in the string.
static bool isUrl(const QString &string)
Returns whether the string is a URL (http,https,ftp,file).
static QString truncateMiddleOfString(const QString &string, int maxLength)
Truncates a string to the specified maximum character length.
static QHash< QString, QString > createUnaccentMap()
Generates the unaccent mapping table (auto-generated by script at build time).
As part of the API refactoring and improvements which landed in the Processing API was substantially reworked from the x version This was done in order to allow much of the underlying Processing framework to be ported into c
QMap< QString, QString > QgsStringMap
#define FUZZY_SCORE_CONSECUTIVE_MATCH
#define FUZZY_SCORE_WORD_MATCH
#define FUZZY_SCORE_NEW_MATCH