22#include <QRegularExpression>
24#include <QTextBoundaryFinder>
35 const QString in = input.normalized( QString::NormalizationForm_C );
37 out.reserve( in.size() );
40 const qsizetype n = in.size();
44 const QChar
c = in.at( i );
48 if (
c.isHighSurrogate() && i + 1 < n )
50 const QChar c2 = in.at( i + 1 );
51 if ( c2.isLowSurrogate() )
55 const QString key = in.mid( i, len ).normalized( QString::NormalizationForm_C );
59 out.append( it.value() );
71 if (
string.isEmpty() )
74 switch ( capitalization )
81 return string.toUpper();
85 return string.toLower();
89 QString temp = string;
91 QTextBoundaryFinder wordSplitter( QTextBoundaryFinder::Word,
string.constData(),
string.length(),
nullptr, 0 );
92 QTextBoundaryFinder letterSplitter( QTextBoundaryFinder::Grapheme,
string.constData(),
string.length(),
nullptr, 0 );
94 wordSplitter.setPosition( 0 );
96 while ( ( first && wordSplitter.boundaryReasons() & QTextBoundaryFinder::StartOfItem )
97 || wordSplitter.toNextBoundary() >= 0 )
100 letterSplitter.setPosition( wordSplitter.position() );
101 ( void )letterSplitter.toNextBoundary();
102 QString substr =
string.mid( wordSplitter.position(), letterSplitter.position() - wordSplitter.position() );
103 temp.replace( wordSplitter.position(), substr.length(), substr.toUpper() );
112 static QStringList smallWords;
113 static QStringList newPhraseSeparators;
114 static QRegularExpression splitWords;
115 if ( smallWords.empty() )
117 smallWords = QObject::tr(
"a|an|and|as|at|but|by|en|for|if|in|nor|of|on|or|per|s|the|to|vs.|vs|via" ).split(
'|' );
118 newPhraseSeparators = QObject::tr(
".|:" ).split(
'|' );
119 splitWords = QRegularExpression( u
"\\b"_s, QRegularExpression::UseUnicodePropertiesOption );
122 const bool allSameCase =
string.toLower() ==
string ||
string.toUpper() == string;
123 const QStringList parts = ( allSameCase ?
string.toLower() :
string ).split( splitWords, Qt::SkipEmptyParts );
125 bool firstWord =
true;
127 int lastWord = parts.count() - 1;
128 for (
const QString &word : std::as_const( parts ) )
130 if ( newPhraseSeparators.contains( word.trimmed() ) )
135 else if ( firstWord || ( i == lastWord ) || !smallWords.contains( word ) )
137 result += word.at( 0 ).toUpper() + word.mid( 1 );
151 result.remove(
' ' );
162 for (
int i = 0; i <
string.size(); ++i )
164 QChar ch =
string.at( i );
165 if ( ch.unicode() > 160 )
166 encoded += u
"&#%1;"_s.arg(
static_cast< int >( ch.unicode() ) );
167 else if ( ch.unicode() == 38 )
168 encoded +=
"&"_L1;
169 else if ( ch.unicode() == 60 )
170 encoded +=
"<"_L1;
171 else if ( ch.unicode() == 62 )
172 encoded +=
">"_L1;
181 int length1 = string1.length();
182 int length2 = string2.length();
185 if ( string1.isEmpty() )
189 else if ( string2.isEmpty() )
195 QString s1( caseSensitive ? string1 : string1.toLower() );
196 QString s2( caseSensitive ? string2 : string2.toLower() );
198 const QChar *s1Char = s1.constData();
199 const QChar *s2Char = s2.constData();
202 int commonPrefixLen = 0;
203 while ( length1 > 0 && length2 > 0 && *s1Char == *s2Char )
213 while ( length1 > 0 && length2 > 0 && s1.at( commonPrefixLen + length1 - 1 ) == s2.at( commonPrefixLen + length2 - 1 ) )
224 else if ( length2 == 0 )
230 if ( length1 > length2 )
233 std::swap( length1, length2 );
237 std::vector< int > col( length2 + 1, 0 );
238 std::vector< int > prevCol;
239 prevCol.reserve( length2 + 1 );
240 for (
int i = 0; i < length2 + 1; ++i )
242 prevCol.emplace_back( i );
244 const QChar *s2start = s2Char;
245 for (
int i = 0; i < length1; ++i )
249 for (
int j = 0; j < length2; ++j )
251 col[j + 1] = std::min( std::min( 1 + col[j], 1 + prevCol[1 + j] ), prevCol[j] + ( ( *s1Char == *s2Char ) ? 0 : 1 ) );
257 return prevCol[length2];
262 if ( string1.isEmpty() || string2.isEmpty() )
269 QString s1( caseSensitive ? string1 : string1.toLower() );
270 QString s2( caseSensitive ? string2 : string2.toLower() );
278 int *currentScores =
new int [ s2.length()];
279 int *previousScores =
new int [ s2.length()];
280 int maxCommonLength = 0;
281 int lastMaxBeginIndex = 0;
283 const QChar *s1Char = s1.constData();
284 const QChar *s2Char = s2.constData();
285 const QChar *s2Start = s2Char;
287 for (
int i = 0; i < s1.length(); ++i )
289 for (
int j = 0; j < s2.length(); ++j )
291 if ( *s1Char != *s2Char )
293 currentScores[j] = 0;
297 if ( i == 0 || j == 0 )
299 currentScores[j] = 1;
303 currentScores[j] = 1 + previousScores[j - 1];
306 if ( maxCommonLength < currentScores[j] )
308 maxCommonLength = currentScores[j];
309 lastMaxBeginIndex = i;
314 std::swap( currentScores, previousScores );
318 delete [] currentScores;
319 delete [] previousScores;
320 return string1.mid( lastMaxBeginIndex - maxCommonLength + 1, maxCommonLength );
325 if ( string1.isEmpty() && string2.isEmpty() )
331 if ( string1.length() != string2.length() )
338 QString s1( caseSensitive ? string1 : string1.toLower() );
339 QString s2( caseSensitive ? string2 : string2.toLower() );
348 const QChar *s1Char = s1.constData();
349 const QChar *s2Char = s2.constData();
351 for (
int i = 0; i < string1.length(); ++i )
353 if ( *s1Char != *s2Char )
364 if (
string.isEmpty() )
367 QString tmp =
string.toUpper();
370 QChar *char1 = tmp.data();
371 QChar *char2 = tmp.data();
373 for (
int i = 0; i < tmp.length(); ++i, ++char2 )
375 if ( ( *char2 ).unicode() >= 0x41 && ( *char2 ).unicode() <= 0x5A && ( i == 0 || ( ( *char2 ).unicode() != 0x41 && ( *char2 ).unicode() != 0x45
376 && ( *char2 ).unicode() != 0x48 && ( *char2 ).unicode() != 0x49
377 && ( *char2 ).unicode() != 0x4F && ( *char2 ).unicode() != 0x55
378 && ( *char2 ).unicode() != 0x57 && ( *char2 ).unicode() != 0x59 ) ) )
385 tmp.truncate( outLen );
387 QChar *tmpChar = tmp.data();
389 for (
int i = 1; i < tmp.length(); ++i, ++tmpChar )
391 switch ( ( *tmpChar ).unicode() )
397 tmp.replace( i, 1, QChar( 0x31 ) );
408 tmp.replace( i, 1, QChar( 0x32 ) );
413 tmp.replace( i, 1, QChar( 0x33 ) );
417 tmp.replace( i, 1, QChar( 0x34 ) );
422 tmp.replace( i, 1, QChar( 0x35 ) );
426 tmp.replace( i, 1, QChar( 0x36 ) );
436 for (
int i = 1; i < tmp.length(); ++i, ++char2 )
438 if ( *char2 != *char1 )
447 tmp.truncate( outLen );
448 if ( tmp.length() < 4 )
460 QString candidateNormalized = candidate.simplified().normalized( QString:: NormalizationForm_C ).toLower();
461 QString searchNormalized = search.simplified().normalized( QString:: NormalizationForm_C ).toLower();
463 int candidateLength = candidateNormalized.length();
464 int searchLength = searchNormalized.length();
468 if ( candidateLength == 0 || searchLength == 0 )
471 int candidateIdx = 0;
476 bool isPreviousIndexMatching =
false;
477 bool isWordOpen =
true;
480 while ( candidateIdx < candidateLength )
482 QChar candidateChar = candidateNormalized[ candidateIdx++ ];
483 bool isCandidateCharWordEnd = candidateChar ==
' ' || candidateChar.isPunct();
486 if ( candidateIdx == 1 )
489 else if ( isCandidateCharWordEnd )
496 if ( searchIdx >= searchLength )
499 QChar searchChar = searchNormalized[ searchIdx ];
500 bool isSearchCharWordEnd = searchChar ==
' ' || searchChar.isPunct();
503 if ( candidateChar == searchChar || ( isCandidateCharWordEnd && isSearchCharWordEnd ) )
508 if ( isSearchCharWordEnd )
512 else if ( isPreviousIndexMatching )
520 else if ( isPreviousIndexMatching )
530 isPreviousIndexMatching =
true;
535 isPreviousIndexMatching =
false;
540 if ( searchIdx >= searchLength )
542 bool isEndOfWord = ( candidateIdx >= candidateLength )
544 : candidateNormalized[candidateIdx] ==
' ' || candidateNormalized[candidateIdx].isPunct();
555 if ( searchIdx < searchLength )
558 return static_cast<float>( std::max( score, 0 ) ) / std::max( maxScore, 1 );
564 QString converted = string;
568 const thread_local QRegularExpression urlRegEx( u
"((?:(?:http|https|ftp|file)://[^\\s]+[^\\s,.]+)|(?:\\b(([\\w-]+://?|www[.])[^\\s()<>]+(?:\\([\\w\\d]+\\)|([^!\"#$%&'()*+,\\-./:;<=>?@[\\\\\\]^_`{|}~\\s]|/)))))"_s );
569 const thread_local QRegularExpression protoRegEx( u
"^(?:f|ht)tps?://|file://"_s );
570 const thread_local QRegularExpression emailRegEx( u
"([\\w._%+-]+@[\\w.-]+\\.[A-Za-z]+)"_s );
574 QRegularExpressionMatch match = urlRegEx.match( converted );
575 while ( match.hasMatch() )
578 QString url = match.captured( 1 );
579 QString protoUrl = url;
580 if ( !protoRegEx.match( protoUrl ).hasMatch() )
582 protoUrl.prepend(
"http://" );
584 QString anchor = u
"<a href=\"%1\">%2</a>"_s.arg( protoUrl.toHtmlEscaped(), url.toHtmlEscaped() );
585 converted.replace( match.capturedStart( 1 ), url.length(), anchor );
586 offset = match.capturedStart( 1 ) + anchor.length();
587 match = urlRegEx.match( converted, offset );
591 match = emailRegEx.match( converted );
592 while ( match.hasMatch() )
595 QString email = match.captured( 1 );
596 QString anchor = u
"<a href=\"mailto:%1\">%1</a>"_s.arg( email.toHtmlEscaped() );
597 converted.replace( match.capturedStart( 1 ), email.length(), anchor );
598 offset = match.capturedStart( 1 ) + anchor.length();
599 match = emailRegEx.match( converted, offset );
610 const thread_local QRegularExpression rxUrl( u
"^(http|https|ftp|file)://\\S+$"_s );
611 return rxUrl.match(
string ).hasMatch();
617 QString converted = html;
618 converted.replace(
"<br>"_L1,
"\n"_L1 );
619 converted.replace(
"<b>"_L1,
"**"_L1 );
620 converted.replace(
"</b>"_L1,
"**"_L1 );
621 converted.replace(
"<pre>"_L1,
"\n```\n"_L1 );
622 converted.replace(
"</pre>"_L1,
"```\n"_L1 );
624 const thread_local QRegularExpression hrefRegEx( u
"<a\\s+href\\s*=\\s*([^<>]*)\\s*>([^<>]*)</a>"_s );
627 QRegularExpressionMatch match = hrefRegEx.match( converted );
628 while ( match.hasMatch() )
630 QString url = match.captured( 1 ).replace(
"\""_L1, QString() );
631 url.replace(
'\'', QString() );
632 QString name = match.captured( 2 );
633 QString anchor = u
"[%1](%2)"_s.arg( name, url );
634 converted.replace( match.capturedStart(), match.capturedLength(), anchor );
635 offset = match.capturedStart() + anchor.length();
636 match = hrefRegEx.match( converted, offset );
642QString
QgsStringUtils::wordWrap(
const QString &
string,
const int length,
const bool useMaxLineLength,
const QString &customDelimiter )
644 if (
string.isEmpty() || length == 0 )
648 QRegularExpression rx;
649 int delimiterLength = 0;
651 if ( !customDelimiter.isEmpty() )
653 rx.setPattern( QRegularExpression::escape( customDelimiter ) );
654 delimiterLength = customDelimiter.length();
659 rx.setPattern( u
"[\\x{200B}\\s]"_s );
663 const QStringList lines =
string.split(
'\n' );
664 int strLength, strCurrent, strHit, lastHit;
666 for (
int i = 0; i < lines.size(); i++ )
668 const QString line = lines.at( i );
669 strLength = line.length();
670 if ( strLength <= length )
673 newstr.append( line );
674 if ( i < lines.size() - 1 )
675 newstr.append(
'\n' );
682 while ( strCurrent < strLength )
686 if ( useMaxLineLength )
689 strHit = ( strCurrent + length >= strLength ) ? -1 : line.lastIndexOf( rx, strCurrent + length );
690 if ( strHit == lastHit || strHit == -1 )
693 strHit = ( strCurrent + std::abs( length ) >= strLength ) ? -1 : line.indexOf( rx, strCurrent + std::abs( length ) );
699 strHit = ( strCurrent + std::abs( length ) >= strLength ) ? -1 : line.indexOf( rx, strCurrent + std::abs( length ) );
703 newstr.append( QStringView {line} .mid( strCurrent, strHit - strCurrent ) );
704 newstr.append(
'\n' );
705 strCurrent = strHit + delimiterLength;
709 newstr.append( QStringView {line} .mid( strCurrent ) );
710 strCurrent = strLength;
713 if ( i < lines.size() - 1 )
714 newstr.append(
'\n' );
722 string =
string.replace(
',', QChar( 65040 ) ).replace( QChar( 8229 ), QChar( 65072 ) );
723 string =
string.replace( QChar( 12289 ), QChar( 65041 ) ).replace( QChar( 12290 ), QChar( 65042 ) );
724 string =
string.replace(
':', QChar( 65043 ) ).replace(
';', QChar( 65044 ) );
725 string =
string.replace(
'!', QChar( 65045 ) ).replace(
'?', QChar( 65046 ) );
726 string =
string.replace( QChar( 12310 ), QChar( 65047 ) ).replace( QChar( 12311 ), QChar( 65048 ) );
727 string =
string.replace( QChar( 8230 ), QChar( 65049 ) );
728 string =
string.replace( QChar( 8212 ), QChar( 65073 ) ).replace( QChar( 8211 ), QChar( 65074 ) );
729 string =
string.replace(
'_', QChar( 65075 ) ).replace( QChar( 65103 ), QChar( 65076 ) );
730 string =
string.replace(
'(', QChar( 65077 ) ).replace(
')', QChar( 65078 ) );
731 string =
string.replace(
'{', QChar( 65079 ) ).replace(
'}', QChar( 65080 ) );
732 string =
string.replace(
'<', QChar( 65087 ) ).replace(
'>', QChar( 65088 ) );
733 string =
string.replace(
'[', QChar( 65095 ) ).replace(
']', QChar( 65096 ) );
734 string =
string.replace( QChar( 12308 ), QChar( 65081 ) ).replace( QChar( 12309 ), QChar( 65082 ) );
735 string =
string.replace( QChar( 12304 ), QChar( 65083 ) ).replace( QChar( 12305 ), QChar( 65084 ) );
736 string =
string.replace( QChar( 12298 ), QChar( 65085 ) ).replace( QChar( 12299 ), QChar( 65086 ) );
737 string =
string.replace( QChar( 12300 ), QChar( 65089 ) ).replace( QChar( 12301 ), QChar( 65090 ) );
738 string =
string.replace( QChar( 12302 ), QChar( 65091 ) ).replace( QChar( 12303 ), QChar( 65092 ) );
745 const QLatin1Char backslash(
'\\' );
746 const int count =
string.count();
749 escaped.reserve( count * 2 );
750 for (
int i = 0; i < count; i++ )
752 switch (
string.at( i ).toLatin1() )
768 escaped.append( backslash );
770 escaped.append(
string.at( i ) );
777 const int charactersToTruncate =
string.length() - maxLength;
778 if ( charactersToTruncate <= 0 )
782 const int truncateFrom =
string.length() / 2 - ( charactersToTruncate + 1 ) / 2;
783 if ( truncateFrom <= 0 )
784 return QChar( 0x2026 );
786 return QStringView(
string ).first( truncateFrom ) + QString( QChar( 0x2026 ) ) + QStringView(
string ).sliced( truncateFrom + charactersToTruncate + 1 );
791 if ( candidate.trimmed().isEmpty() )
794 const thread_local QRegularExpression rxWhitespace( u
"\\s+"_s );
795 const QStringList parts = words.split( rxWhitespace, Qt::SkipEmptyParts );
798 for (
const QString &word : parts )
800 if ( !candidate.contains( word, sensitivity ) )
812 if ( mWholeWordOnly )
814 mRx.setPattern( u
"\\b%1\\b"_s.arg( mMatch ) );
815 mRx.setPatternOptions( mCaseSensitive ? QRegularExpression::NoPatternOption : QRegularExpression::CaseInsensitiveOption );
821 QString result = input;
822 if ( !mWholeWordOnly )
824 return result.replace( mMatch, mReplacement, mCaseSensitive ? Qt::CaseSensitive : Qt::CaseInsensitive );
828 return result.replace( mRx, mReplacement );
835 map.insert( u
"match"_s, mMatch );
836 map.insert( u
"replace"_s, mReplacement );
837 map.insert( u
"caseSensitive"_s, mCaseSensitive ? u
"1"_s : u
"0"_s );
838 map.insert( u
"wholeWord"_s, mWholeWordOnly ? u
"1"_s : u
"0"_s );
846 properties.value( u
"caseSensitive"_s, u
"0"_s ) ==
"1"_L1,
847 properties.value( u
"wholeWord"_s, u
"0"_s ) ==
"1"_L1 );
852 QString result = input;
855 result = r.process( result );
865 QDomElement propEl = doc.createElement( u
"replacement"_s );
866 QgsStringMap::const_iterator it = props.constBegin();
867 for ( ; it != props.constEnd(); ++it )
869 propEl.setAttribute( it.key(), it.value() );
871 elem.appendChild( propEl );
877 mReplacements.clear();
878 QDomNodeList nodelist = elem.elementsByTagName( u
"replacement"_s );
879 for (
int i = 0; i < nodelist.count(); i++ )
881 QDomElement replacementElem = nodelist.at( i ).toElement();
882 QDomNamedNodeMap nodeMap = replacementElem.attributes();
885 for (
int j = 0; j < nodeMap.count(); ++j )
887 props.insert( nodeMap.item( j ).nodeName(), nodeMap.item( j ).nodeValue() );
Capitalization
String capitalization options.
@ AllSmallCaps
Force all characters to small caps.
@ MixedCase
Mixed case, ie no change.
@ UpperCamelCase
Convert the string to upper camel case. Note that this method does not unaccent characters.
@ AllLowercase
Convert all characters to lowercase.
@ TitleCase
Simple title case conversion - does not fully grammatically parse the text and uses simple rules only...
@ SmallCaps
Mixed case small caps.
@ ForceFirstLetterToCapital
Convert just the first letter of each word to uppercase, leave the rest untouched.
@ AllUppercase
Convert all characters to uppercase.
void readXml(const QDomElement &elem)
Reads the collection state from an XML element.
QString process(const QString &input) const
Processes a given input string, applying any valid replacements which should be made using QgsStringR...
void writeXml(QDomElement &elem, QDomDocument &doc) const
Writes the collection state to an XML element.
A representation of a single string replacement.
static QgsStringReplacement fromProperties(const QgsStringMap &properties)
Creates a new QgsStringReplacement from an encoded properties map.
QString process(const QString &input) const
Processes a given input string, applying any valid replacements which should be made.
bool wholeWordOnly() const
Returns true if match only applies to whole words, or false if partial word matches are permitted.
QString replacement() const
Returns the string to replace matches with.
bool caseSensitive() const
Returns true if match is case sensitive.
QgsStringReplacement(const QString &match, const QString &replacement, bool caseSensitive=false, bool wholeWordOnly=false)
Constructor for QgsStringReplacement.
QString match() const
Returns the string matched by this object.
QgsStringMap properties() const
Returns a map of the replacement properties.
static int hammingDistance(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the Hamming distance between two strings.
static QString soundex(const QString &string)
Returns the Soundex representation of a string.
static QHash< QString, QString > UNACCENT_MAP
Lookup table used by unaccent().
static int levenshteinDistance(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the Levenshtein edit distance between two strings.
static QString htmlToMarkdown(const QString &html)
Convert simple HTML to markdown.
static QString longestCommonSubstring(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the longest common substring between two strings.
static QString capitalize(const QString &string, Qgis::Capitalization capitalization)
Converts a string by applying capitalization rules to the string.
static QString substituteVerticalCharacters(QString string)
Returns a string with characters having vertical representation form substituted.
static QString unaccent(const QString &input)
Removes accents and other diacritical marks from a string, replacing accented characters with their u...
static bool containsByWord(const QString &candidate, const QString &words, Qt::CaseSensitivity sensitivity=Qt::CaseInsensitive)
Given a candidate string, returns true if the candidate contains all the individual words from anothe...
static QString insertLinks(const QString &string, bool *foundLinks=nullptr)
Returns a string with any URL (e.g., http(s)/ftp) and mailto: text converted to valid HTML <a ....
static double fuzzyScore(const QString &candidate, const QString &search)
Tests a candidate string to see how likely it is a match for a specified search string.
static QString qRegExpEscape(const QString &string)
Returns an escaped string matching the behavior of QRegExp::escape.
static QString ampersandEncode(const QString &string)
Makes a raw string safe for inclusion as a HTML/XML string literal.
static QString wordWrap(const QString &string, int length, bool useMaxLineLength=true, const QString &customDelimiter=QString())
Automatically wraps a string by inserting new line characters at appropriate locations in the string.
static bool isUrl(const QString &string)
Returns whether the string is a URL (http,https,ftp,file).
static QString truncateMiddleOfString(const QString &string, int maxLength)
Truncates a string to the specified maximum character length.
static QHash< QString, QString > createUnaccentMap()
Generates the unaccent mapping table (auto-generated by script at build time).
As part of the API refactoring and improvements which landed in the Processing API was substantially reworked from the x version This was done in order to allow much of the underlying Processing framework to be ported into c
QMap< QString, QString > QgsStringMap
#define FUZZY_SCORE_CONSECUTIVE_MATCH
#define FUZZY_SCORE_WORD_MATCH
#define FUZZY_SCORE_NEW_MATCH