22#include <QRegularExpression>
25#include <QTextBoundaryFinder>
28using namespace Qt::StringLiterals;
38 const QString in = input.normalized( QString::NormalizationForm_C );
40 out.reserve( in.size() );
43 const qsizetype n = in.size();
47 const QChar
c = in.at( i );
51 if (
c.isHighSurrogate() && i + 1 < n )
53 const QChar c2 = in.at( i + 1 );
54 if ( c2.isLowSurrogate() )
58 const QString key = in.mid( i, len ).normalized( QString::NormalizationForm_C );
62 out.append( it.value() );
74 if (
string.isEmpty() )
77 switch ( capitalization )
84 return string.toUpper();
88 return string.toLower();
92 QString temp = string;
94 QTextBoundaryFinder wordSplitter( QTextBoundaryFinder::Word,
string.constData(),
string.length(),
nullptr, 0 );
95 QTextBoundaryFinder letterSplitter( QTextBoundaryFinder::Grapheme,
string.constData(),
string.length(),
nullptr, 0 );
97 wordSplitter.setPosition( 0 );
99 while ( ( first && wordSplitter.boundaryReasons() & QTextBoundaryFinder::StartOfItem )
100 || wordSplitter.toNextBoundary() >= 0 )
103 letterSplitter.setPosition( wordSplitter.position() );
104 ( void )letterSplitter.toNextBoundary();
105 QString substr =
string.mid( wordSplitter.position(), letterSplitter.position() - wordSplitter.position() );
106 temp.replace( wordSplitter.position(), substr.length(), substr.toUpper() );
115 static QStringList smallWords;
116 static QStringList newPhraseSeparators;
117 static QRegularExpression splitWords;
118 if ( smallWords.empty() )
120 smallWords = QObject::tr(
"a|an|and|as|at|but|by|en|for|if|in|nor|of|on|or|per|s|the|to|vs.|vs|via" ).split(
'|' );
121 newPhraseSeparators = QObject::tr(
".|:" ).split(
'|' );
122 splitWords = QRegularExpression( u
"\\b"_s, QRegularExpression::UseUnicodePropertiesOption );
125 const bool allSameCase =
string.toLower() ==
string ||
string.toUpper() == string;
126 const QStringList parts = ( allSameCase ?
string.toLower() :
string ).split( splitWords, Qt::SkipEmptyParts );
128 bool firstWord =
true;
130 int lastWord = parts.count() - 1;
131 for (
const QString &word : std::as_const( parts ) )
133 if ( newPhraseSeparators.contains( word.trimmed() ) )
138 else if ( firstWord || ( i == lastWord ) || !smallWords.contains( word ) )
140 result += word.at( 0 ).toUpper() + word.mid( 1 );
154 result.remove(
' ' );
165 for (
int i = 0; i <
string.size(); ++i )
167 QChar ch =
string.at( i );
168 if ( ch.unicode() > 160 )
169 encoded += u
"&#%1;"_s.arg(
static_cast< int >( ch.unicode() ) );
170 else if ( ch.unicode() == 38 )
171 encoded +=
"&"_L1;
172 else if ( ch.unicode() == 60 )
173 encoded +=
"<"_L1;
174 else if ( ch.unicode() == 62 )
175 encoded +=
">"_L1;
184 int length1 = string1.length();
185 int length2 = string2.length();
188 if ( string1.isEmpty() )
192 else if ( string2.isEmpty() )
198 QString s1( caseSensitive ? string1 : string1.toLower() );
199 QString s2( caseSensitive ? string2 : string2.toLower() );
201 const QChar *s1Char = s1.constData();
202 const QChar *s2Char = s2.constData();
205 int commonPrefixLen = 0;
206 while ( length1 > 0 && length2 > 0 && *s1Char == *s2Char )
216 while ( length1 > 0 && length2 > 0 && s1.at( commonPrefixLen + length1 - 1 ) == s2.at( commonPrefixLen + length2 - 1 ) )
227 else if ( length2 == 0 )
233 if ( length1 > length2 )
236 std::swap( length1, length2 );
240 std::vector< int > col( length2 + 1, 0 );
241 std::vector< int > prevCol;
242 prevCol.reserve( length2 + 1 );
243 for (
int i = 0; i < length2 + 1; ++i )
245 prevCol.emplace_back( i );
247 const QChar *s2start = s2Char;
248 for (
int i = 0; i < length1; ++i )
252 for (
int j = 0; j < length2; ++j )
254 col[j + 1] = std::min( std::min( 1 + col[j], 1 + prevCol[1 + j] ), prevCol[j] + ( ( *s1Char == *s2Char ) ? 0 : 1 ) );
260 return prevCol[length2];
265 if ( string1.isEmpty() || string2.isEmpty() )
272 QString s1( caseSensitive ? string1 : string1.toLower() );
273 QString s2( caseSensitive ? string2 : string2.toLower() );
281 int *currentScores =
new int [ s2.length()];
282 int *previousScores =
new int [ s2.length()];
283 int maxCommonLength = 0;
284 int lastMaxBeginIndex = 0;
286 const QChar *s1Char = s1.constData();
287 const QChar *s2Char = s2.constData();
288 const QChar *s2Start = s2Char;
290 for (
int i = 0; i < s1.length(); ++i )
292 for (
int j = 0; j < s2.length(); ++j )
294 if ( *s1Char != *s2Char )
296 currentScores[j] = 0;
300 if ( i == 0 || j == 0 )
302 currentScores[j] = 1;
306 currentScores[j] = 1 + previousScores[j - 1];
309 if ( maxCommonLength < currentScores[j] )
311 maxCommonLength = currentScores[j];
312 lastMaxBeginIndex = i;
317 std::swap( currentScores, previousScores );
321 delete [] currentScores;
322 delete [] previousScores;
323 return string1.mid( lastMaxBeginIndex - maxCommonLength + 1, maxCommonLength );
328 if ( string1.isEmpty() && string2.isEmpty() )
334 if ( string1.length() != string2.length() )
341 QString s1( caseSensitive ? string1 : string1.toLower() );
342 QString s2( caseSensitive ? string2 : string2.toLower() );
351 const QChar *s1Char = s1.constData();
352 const QChar *s2Char = s2.constData();
354 for (
int i = 0; i < string1.length(); ++i )
356 if ( *s1Char != *s2Char )
367 if (
string.isEmpty() )
370 QString tmp =
string.toUpper();
373 QChar *char1 = tmp.data();
374 QChar *char2 = tmp.data();
376 for (
int i = 0; i < tmp.length(); ++i, ++char2 )
378 if ( ( *char2 ).unicode() >= 0x41 && ( *char2 ).unicode() <= 0x5A && ( i == 0 || ( ( *char2 ).unicode() != 0x41 && ( *char2 ).unicode() != 0x45
379 && ( *char2 ).unicode() != 0x48 && ( *char2 ).unicode() != 0x49
380 && ( *char2 ).unicode() != 0x4F && ( *char2 ).unicode() != 0x55
381 && ( *char2 ).unicode() != 0x57 && ( *char2 ).unicode() != 0x59 ) ) )
388 tmp.truncate( outLen );
390 QChar *tmpChar = tmp.data();
392 for (
int i = 1; i < tmp.length(); ++i, ++tmpChar )
394 switch ( ( *tmpChar ).unicode() )
400 tmp.replace( i, 1, QChar( 0x31 ) );
411 tmp.replace( i, 1, QChar( 0x32 ) );
416 tmp.replace( i, 1, QChar( 0x33 ) );
420 tmp.replace( i, 1, QChar( 0x34 ) );
425 tmp.replace( i, 1, QChar( 0x35 ) );
429 tmp.replace( i, 1, QChar( 0x36 ) );
439 for (
int i = 1; i < tmp.length(); ++i, ++char2 )
441 if ( *char2 != *char1 )
450 tmp.truncate( outLen );
451 if ( tmp.length() < 4 )
463 QString candidateNormalized = candidate.simplified().normalized( QString:: NormalizationForm_C ).toLower();
464 QString searchNormalized = search.simplified().normalized( QString:: NormalizationForm_C ).toLower();
466 int candidateLength = candidateNormalized.length();
467 int searchLength = searchNormalized.length();
471 if ( candidateLength == 0 || searchLength == 0 )
474 int candidateIdx = 0;
479 bool isPreviousIndexMatching =
false;
480 bool isWordOpen =
true;
483 while ( candidateIdx < candidateLength )
485 QChar candidateChar = candidateNormalized[ candidateIdx++ ];
486 bool isCandidateCharWordEnd = candidateChar ==
' ' || candidateChar.isPunct();
489 if ( candidateIdx == 1 )
492 else if ( isCandidateCharWordEnd )
499 if ( searchIdx >= searchLength )
502 QChar searchChar = searchNormalized[ searchIdx ];
503 bool isSearchCharWordEnd = searchChar ==
' ' || searchChar.isPunct();
506 if ( candidateChar == searchChar || ( isCandidateCharWordEnd && isSearchCharWordEnd ) )
511 if ( isSearchCharWordEnd )
515 else if ( isPreviousIndexMatching )
523 else if ( isPreviousIndexMatching )
533 isPreviousIndexMatching =
true;
538 isPreviousIndexMatching =
false;
543 if ( searchIdx >= searchLength )
545 bool isEndOfWord = ( candidateIdx >= candidateLength )
547 : candidateNormalized[candidateIdx] ==
' ' || candidateNormalized[candidateIdx].isPunct();
558 if ( searchIdx < searchLength )
561 return static_cast<float>( std::max( score, 0 ) ) / std::max( maxScore, 1 );
567 QString converted = string;
571 const thread_local QRegularExpression urlRegEx( u
"((?:(?:http|https|ftp|file)://[^\\s]+[^\\s,.]+)|(?:\\b(([\\w-]+://?|www[.])[^\\s()<>]+(?:\\([\\w\\d]+\\)|([^!\"#$%&'()*+,\\-./:;<=>?@[\\\\\\]^_`{|}~\\s]|/)))))"_s );
572 const thread_local QRegularExpression protoRegEx( u
"^(?:f|ht)tps?://|file://"_s );
573 const thread_local QRegularExpression emailRegEx( u
"([\\w._%+-]+@[\\w.-]+\\.[A-Za-z]+)"_s );
577 QRegularExpressionMatch match = urlRegEx.match( converted );
578 while ( match.hasMatch() )
581 QString url = match.captured( 1 );
582 QString protoUrl = url;
583 if ( !protoRegEx.match( protoUrl ).hasMatch() )
585 protoUrl.prepend(
"http://" );
587 QString anchor = u
"<a href=\"%1\">%2</a>"_s.arg( protoUrl.toHtmlEscaped(), url.toHtmlEscaped() );
588 converted.replace( match.capturedStart( 1 ), url.length(), anchor );
589 offset = match.capturedStart( 1 ) + anchor.length();
590 match = urlRegEx.match( converted, offset );
594 match = emailRegEx.match( converted );
595 while ( match.hasMatch() )
598 QString email = match.captured( 1 );
599 QString anchor = u
"<a href=\"mailto:%1\">%1</a>"_s.arg( email.toHtmlEscaped() );
600 converted.replace( match.capturedStart( 1 ), email.length(), anchor );
601 offset = match.capturedStart( 1 ) + anchor.length();
602 match = emailRegEx.match( converted, offset );
613 const thread_local QRegularExpression rxUrl( u
"^(http|https|ftp|file)://\\S+$"_s );
614 return rxUrl.match(
string ).hasMatch();
620 QString converted = html;
621 converted.replace(
"<br>"_L1,
"\n"_L1 );
622 converted.replace(
"<b>"_L1,
"**"_L1 );
623 converted.replace(
"</b>"_L1,
"**"_L1 );
624 converted.replace(
"<pre>"_L1,
"\n```\n"_L1 );
625 converted.replace(
"</pre>"_L1,
"```\n"_L1 );
627 const thread_local QRegularExpression hrefRegEx( u
"<a\\s+href\\s*=\\s*([^<>]*)\\s*>([^<>]*)</a>"_s );
630 QRegularExpressionMatch match = hrefRegEx.match( converted );
631 while ( match.hasMatch() )
633 QString url = match.captured( 1 ).replace(
"\""_L1, QString() );
634 url.replace(
'\'', QString() );
635 QString name = match.captured( 2 );
636 QString anchor = u
"[%1](%2)"_s.arg( name, url );
637 converted.replace( match.capturedStart(), match.capturedLength(), anchor );
638 offset = match.capturedStart() + anchor.length();
639 match = hrefRegEx.match( converted, offset );
645QString
QgsStringUtils::wordWrap(
const QString &
string,
const int length,
const bool useMaxLineLength,
const QString &customDelimiter )
647 if (
string.isEmpty() || length == 0 )
651 QRegularExpression rx;
652 int delimiterLength = 0;
654 if ( !customDelimiter.isEmpty() )
656 rx.setPattern( QRegularExpression::escape( customDelimiter ) );
657 delimiterLength = customDelimiter.length();
662 rx.setPattern( u
"[\\x{200B}\\s]"_s );
666 const QStringList lines =
string.split(
'\n' );
667 int strLength, strCurrent, strHit, lastHit;
669 for (
int i = 0; i < lines.size(); i++ )
671 const QString line = lines.at( i );
672 strLength = line.length();
673 if ( strLength <= length )
676 newstr.append( line );
677 if ( i < lines.size() - 1 )
678 newstr.append(
'\n' );
685 while ( strCurrent < strLength )
689 if ( useMaxLineLength )
692 strHit = ( strCurrent + length >= strLength ) ? -1 : line.lastIndexOf( rx, strCurrent + length );
693 if ( strHit == lastHit || strHit == -1 )
696 strHit = ( strCurrent + std::abs( length ) >= strLength ) ? -1 : line.indexOf( rx, strCurrent + std::abs( length ) );
702 strHit = ( strCurrent + std::abs( length ) >= strLength ) ? -1 : line.indexOf( rx, strCurrent + std::abs( length ) );
706 newstr.append( QStringView {line} .mid( strCurrent, strHit - strCurrent ) );
707 newstr.append(
'\n' );
708 strCurrent = strHit + delimiterLength;
712 newstr.append( QStringView {line} .mid( strCurrent ) );
713 strCurrent = strLength;
716 if ( i < lines.size() - 1 )
717 newstr.append(
'\n' );
725 string =
string.replace(
',', QChar( 65040 ) ).replace( QChar( 8229 ), QChar( 65072 ) );
726 string =
string.replace( QChar( 12289 ), QChar( 65041 ) ).replace( QChar( 12290 ), QChar( 65042 ) );
727 string =
string.replace(
':', QChar( 65043 ) ).replace(
';', QChar( 65044 ) );
728 string =
string.replace(
'!', QChar( 65045 ) ).replace(
'?', QChar( 65046 ) );
729 string =
string.replace( QChar( 12310 ), QChar( 65047 ) ).replace( QChar( 12311 ), QChar( 65048 ) );
730 string =
string.replace( QChar( 8230 ), QChar( 65049 ) );
731 string =
string.replace( QChar( 8212 ), QChar( 65073 ) ).replace( QChar( 8211 ), QChar( 65074 ) );
732 string =
string.replace(
'_', QChar( 65075 ) ).replace( QChar( 65103 ), QChar( 65076 ) );
733 string =
string.replace(
'(', QChar( 65077 ) ).replace(
')', QChar( 65078 ) );
734 string =
string.replace(
'{', QChar( 65079 ) ).replace(
'}', QChar( 65080 ) );
735 string =
string.replace(
'<', QChar( 65087 ) ).replace(
'>', QChar( 65088 ) );
736 string =
string.replace(
'[', QChar( 65095 ) ).replace(
']', QChar( 65096 ) );
737 string =
string.replace( QChar( 12308 ), QChar( 65081 ) ).replace( QChar( 12309 ), QChar( 65082 ) );
738 string =
string.replace( QChar( 12304 ), QChar( 65083 ) ).replace( QChar( 12305 ), QChar( 65084 ) );
739 string =
string.replace( QChar( 12298 ), QChar( 65085 ) ).replace( QChar( 12299 ), QChar( 65086 ) );
740 string =
string.replace( QChar( 12300 ), QChar( 65089 ) ).replace( QChar( 12301 ), QChar( 65090 ) );
741 string =
string.replace( QChar( 12302 ), QChar( 65091 ) ).replace( QChar( 12303 ), QChar( 65092 ) );
748 const QLatin1Char backslash(
'\\' );
749 const int count =
string.count();
752 escaped.reserve( count * 2 );
753 for (
int i = 0; i < count; i++ )
755 switch (
string.at( i ).toLatin1() )
771 escaped.append( backslash );
773 escaped.append(
string.at( i ) );
780 const int charactersToTruncate =
string.length() - maxLength;
781 if ( charactersToTruncate <= 0 )
785 const int truncateFrom =
string.length() / 2 - ( charactersToTruncate + 1 ) / 2;
786 if ( truncateFrom <= 0 )
787 return QChar( 0x2026 );
789 return QStringView(
string ).first( truncateFrom ) + QString( QChar( 0x2026 ) ) + QStringView(
string ).sliced( truncateFrom + charactersToTruncate + 1 );
794 if ( candidate.trimmed().isEmpty() )
797 const thread_local QRegularExpression rxWhitespace( u
"\\s+"_s );
798 const QStringList parts = words.split( rxWhitespace, Qt::SkipEmptyParts );
801 for (
const QString &word : parts )
803 if ( !candidate.contains( word, sensitivity ) )
815 if ( mWholeWordOnly )
817 mRx.setPattern( u
"\\b%1\\b"_s.arg( mMatch ) );
818 mRx.setPatternOptions( mCaseSensitive ? QRegularExpression::NoPatternOption : QRegularExpression::CaseInsensitiveOption );
824 QString result = input;
825 if ( !mWholeWordOnly )
827 return result.replace( mMatch, mReplacement, mCaseSensitive ? Qt::CaseSensitive : Qt::CaseInsensitive );
831 return result.replace( mRx, mReplacement );
838 map.insert( u
"match"_s, mMatch );
839 map.insert( u
"replace"_s, mReplacement );
840 map.insert( u
"caseSensitive"_s, mCaseSensitive ? u
"1"_s : u
"0"_s );
841 map.insert( u
"wholeWord"_s, mWholeWordOnly ? u
"1"_s : u
"0"_s );
849 properties.value( u
"caseSensitive"_s, u
"0"_s ) ==
"1"_L1,
850 properties.value( u
"wholeWord"_s, u
"0"_s ) ==
"1"_L1 );
855 QString result = input;
858 result = r.process( result );
868 QDomElement propEl = doc.createElement( u
"replacement"_s );
869 QgsStringMap::const_iterator it = props.constBegin();
870 for ( ; it != props.constEnd(); ++it )
872 propEl.setAttribute( it.key(), it.value() );
874 elem.appendChild( propEl );
880 mReplacements.clear();
881 QDomNodeList nodelist = elem.elementsByTagName( u
"replacement"_s );
882 for (
int i = 0; i < nodelist.count(); i++ )
884 QDomElement replacementElem = nodelist.at( i ).toElement();
885 QDomNamedNodeMap nodeMap = replacementElem.attributes();
888 for (
int j = 0; j < nodeMap.count(); ++j )
890 props.insert( nodeMap.item( j ).nodeName(), nodeMap.item( j ).nodeValue() );
Capitalization
String capitalization options.
@ AllSmallCaps
Force all characters to small caps.
@ MixedCase
Mixed case, ie no change.
@ UpperCamelCase
Convert the string to upper camel case. Note that this method does not unaccent characters.
@ AllLowercase
Convert all characters to lowercase.
@ TitleCase
Simple title case conversion - does not fully grammatically parse the text and uses simple rules only...
@ SmallCaps
Mixed case small caps.
@ ForceFirstLetterToCapital
Convert just the first letter of each word to uppercase, leave the rest untouched.
@ AllUppercase
Convert all characters to uppercase.
void readXml(const QDomElement &elem)
Reads the collection state from an XML element.
QString process(const QString &input) const
Processes a given input string, applying any valid replacements which should be made using QgsStringR...
void writeXml(QDomElement &elem, QDomDocument &doc) const
Writes the collection state to an XML element.
A representation of a single string replacement.
static QgsStringReplacement fromProperties(const QgsStringMap &properties)
Creates a new QgsStringReplacement from an encoded properties map.
QString process(const QString &input) const
Processes a given input string, applying any valid replacements which should be made.
bool wholeWordOnly() const
Returns true if match only applies to whole words, or false if partial word matches are permitted.
QString replacement() const
Returns the string to replace matches with.
bool caseSensitive() const
Returns true if match is case sensitive.
QgsStringReplacement(const QString &match, const QString &replacement, bool caseSensitive=false, bool wholeWordOnly=false)
Constructor for QgsStringReplacement.
QString match() const
Returns the string matched by this object.
QgsStringMap properties() const
Returns a map of the replacement properties.
static int hammingDistance(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the Hamming distance between two strings.
static QString soundex(const QString &string)
Returns the Soundex representation of a string.
static QHash< QString, QString > UNACCENT_MAP
Lookup table used by unaccent().
static int levenshteinDistance(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the Levenshtein edit distance between two strings.
static QString htmlToMarkdown(const QString &html)
Convert simple HTML to markdown.
static QString longestCommonSubstring(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the longest common substring between two strings.
static QString capitalize(const QString &string, Qgis::Capitalization capitalization)
Converts a string by applying capitalization rules to the string.
static QString substituteVerticalCharacters(QString string)
Returns a string with characters having vertical representation form substituted.
static QString unaccent(const QString &input)
Removes accents and other diacritical marks from a string, replacing accented characters with their u...
static bool containsByWord(const QString &candidate, const QString &words, Qt::CaseSensitivity sensitivity=Qt::CaseInsensitive)
Given a candidate string, returns true if the candidate contains all the individual words from anothe...
static QString insertLinks(const QString &string, bool *foundLinks=nullptr)
Returns a string with any URL (e.g., http(s)/ftp) and mailto: text converted to valid HTML <a ....
static double fuzzyScore(const QString &candidate, const QString &search)
Tests a candidate string to see how likely it is a match for a specified search string.
static QString qRegExpEscape(const QString &string)
Returns an escaped string matching the behavior of QRegExp::escape.
static QString ampersandEncode(const QString &string)
Makes a raw string safe for inclusion as a HTML/XML string literal.
static QString wordWrap(const QString &string, int length, bool useMaxLineLength=true, const QString &customDelimiter=QString())
Automatically wraps a string by inserting new line characters at appropriate locations in the string.
static bool isUrl(const QString &string)
Returns whether the string is a URL (http,https,ftp,file).
static QString truncateMiddleOfString(const QString &string, int maxLength)
Truncates a string to the specified maximum character length.
static QHash< QString, QString > createUnaccentMap()
Generates the unaccent mapping table (auto-generated by script at build time).
As part of the API refactoring and improvements which landed in the Processing API was substantially reworked from the x version This was done in order to allow much of the underlying Processing framework to be ported into c
QMap< QString, QString > QgsStringMap
#define FUZZY_SCORE_CONSECUTIVE_MATCH
#define FUZZY_SCORE_WORD_MATCH
#define FUZZY_SCORE_NEW_MATCH