22#include <QRegularExpression>
25#include <QTextBoundaryFinder>
28using namespace Qt::StringLiterals;
38 const QString in = input.normalized( QString::NormalizationForm_C );
40 out.reserve( in.size() );
43 const qsizetype n = in.size();
47 const QChar
c = in.at( i );
51 if (
c.isHighSurrogate() && i + 1 < n )
53 const QChar c2 = in.at( i + 1 );
54 if ( c2.isLowSurrogate() )
58 const QString key = in.mid( i, len ).normalized( QString::NormalizationForm_C );
62 out.append( it.value() );
74 if (
string.isEmpty() )
77 switch ( capitalization )
84 return string.toUpper();
88 return string.toLower();
92 QString temp = string;
94 QTextBoundaryFinder wordSplitter( QTextBoundaryFinder::Word,
string.constData(),
string.length(),
nullptr, 0 );
95 QTextBoundaryFinder letterSplitter( QTextBoundaryFinder::Grapheme,
string.constData(),
string.length(),
nullptr, 0 );
97 wordSplitter.setPosition( 0 );
99 while ( ( first && wordSplitter.boundaryReasons() & QTextBoundaryFinder::StartOfItem ) || wordSplitter.toNextBoundary() >= 0 )
102 letterSplitter.setPosition( wordSplitter.position() );
103 ( void ) letterSplitter.toNextBoundary();
104 QString substr =
string.mid( wordSplitter.position(), letterSplitter.position() - wordSplitter.position() );
105 temp.replace( wordSplitter.position(), substr.length(), substr.toUpper() );
114 static QStringList smallWords;
115 static QStringList newPhraseSeparators;
116 static QRegularExpression splitWords;
117 if ( smallWords.empty() )
119 smallWords = QObject::tr(
"a|an|and|as|at|but|by|en|for|if|in|nor|of|on|or|per|s|the|to|vs.|vs|via" ).split(
'|' );
120 newPhraseSeparators = QObject::tr(
".|:" ).split(
'|' );
121 splitWords = QRegularExpression( u
"\\b"_s, QRegularExpression::UseUnicodePropertiesOption );
124 const bool allSameCase =
string.toLower() ==
string ||
string.toUpper() == string;
125 const QStringList parts = ( allSameCase ?
string.toLower() :
string ).split( splitWords, Qt::SkipEmptyParts );
127 bool firstWord =
true;
129 int lastWord = parts.count() - 1;
130 for (
const QString &word : std::as_const( parts ) )
132 if ( newPhraseSeparators.contains( word.trimmed() ) )
137 else if ( firstWord || ( i == lastWord ) || !smallWords.contains( word ) )
139 result += word.at( 0 ).toUpper() + word.mid( 1 );
153 result.remove(
' ' );
164 for (
int i = 0; i <
string.size(); ++i )
166 QChar ch =
string.at( i );
167 if ( ch.unicode() > 160 )
168 encoded += u
"&#%1;"_s.arg(
static_cast< int >( ch.unicode() ) );
169 else if ( ch.unicode() == 38 )
170 encoded +=
"&"_L1;
171 else if ( ch.unicode() == 60 )
172 encoded +=
"<"_L1;
173 else if ( ch.unicode() == 62 )
174 encoded +=
">"_L1;
183 int length1 = string1.length();
184 int length2 = string2.length();
187 if ( string1.isEmpty() )
191 else if ( string2.isEmpty() )
197 QString s1( caseSensitive ? string1 : string1.toLower() );
198 QString s2( caseSensitive ? string2 : string2.toLower() );
200 const QChar *s1Char = s1.constData();
201 const QChar *s2Char = s2.constData();
204 int commonPrefixLen = 0;
205 while ( length1 > 0 && length2 > 0 && *s1Char == *s2Char )
215 while ( length1 > 0 && length2 > 0 && s1.at( commonPrefixLen + length1 - 1 ) == s2.at( commonPrefixLen + length2 - 1 ) )
226 else if ( length2 == 0 )
232 if ( length1 > length2 )
235 std::swap( length1, length2 );
239 std::vector< int > col( length2 + 1, 0 );
240 std::vector< int > prevCol;
241 prevCol.reserve( length2 + 1 );
242 for (
int i = 0; i < length2 + 1; ++i )
244 prevCol.emplace_back( i );
246 const QChar *s2start = s2Char;
247 for (
int i = 0; i < length1; ++i )
251 for (
int j = 0; j < length2; ++j )
253 col[j + 1] = std::min( std::min( 1 + col[j], 1 + prevCol[1 + j] ), prevCol[j] + ( ( *s1Char == *s2Char ) ? 0 : 1 ) );
259 return prevCol[length2];
264 if ( string1.isEmpty() || string2.isEmpty() )
271 QString s1( caseSensitive ? string1 : string1.toLower() );
272 QString s2( caseSensitive ? string2 : string2.toLower() );
280 int *currentScores =
new int[s2.length()];
281 int *previousScores =
new int[s2.length()];
282 int maxCommonLength = 0;
283 int lastMaxBeginIndex = 0;
285 const QChar *s1Char = s1.constData();
286 const QChar *s2Char = s2.constData();
287 const QChar *s2Start = s2Char;
289 for (
int i = 0; i < s1.length(); ++i )
291 for (
int j = 0; j < s2.length(); ++j )
293 if ( *s1Char != *s2Char )
295 currentScores[j] = 0;
299 if ( i == 0 || j == 0 )
301 currentScores[j] = 1;
305 currentScores[j] = 1 + previousScores[j - 1];
308 if ( maxCommonLength < currentScores[j] )
310 maxCommonLength = currentScores[j];
311 lastMaxBeginIndex = i;
316 std::swap( currentScores, previousScores );
320 delete[] currentScores;
321 delete[] previousScores;
322 return string1.mid( lastMaxBeginIndex - maxCommonLength + 1, maxCommonLength );
327 if ( string1.isEmpty() && string2.isEmpty() )
333 if ( string1.length() != string2.length() )
340 QString s1( caseSensitive ? string1 : string1.toLower() );
341 QString s2( caseSensitive ? string2 : string2.toLower() );
350 const QChar *s1Char = s1.constData();
351 const QChar *s2Char = s2.constData();
353 for (
int i = 0; i < string1.length(); ++i )
355 if ( *s1Char != *s2Char )
366 if (
string.isEmpty() )
369 QString tmp =
string.toUpper();
372 QChar *char1 = tmp.data();
373 QChar *char2 = tmp.data();
375 for (
int i = 0; i < tmp.length(); ++i, ++char2 )
377 if ( ( *char2 ).unicode() >= 0x41 && ( *char2 ).unicode() <= 0x5A && ( i == 0 || ( ( *char2 ).unicode() != 0x41 && ( *char2 ).unicode() != 0x45
378 && ( *char2 ).unicode() != 0x48 && ( *char2 ).unicode() != 0x49
379 && ( *char2 ).unicode() != 0x4F && ( *char2 ).unicode() != 0x55
380 && ( *char2 ).unicode() != 0x57 && ( *char2 ).unicode() != 0x59 ) ) )
387 tmp.truncate( outLen );
389 QChar *tmpChar = tmp.data();
391 for (
int i = 1; i < tmp.length(); ++i, ++tmpChar )
393 switch ( ( *tmpChar ).unicode() )
399 tmp.replace( i, 1, QChar( 0x31 ) );
410 tmp.replace( i, 1, QChar( 0x32 ) );
415 tmp.replace( i, 1, QChar( 0x33 ) );
419 tmp.replace( i, 1, QChar( 0x34 ) );
424 tmp.replace( i, 1, QChar( 0x35 ) );
428 tmp.replace( i, 1, QChar( 0x36 ) );
438 for (
int i = 1; i < tmp.length(); ++i, ++char2 )
440 if ( *char2 != *char1 )
449 tmp.truncate( outLen );
450 if ( tmp.length() < 4 )
462 QString candidateNormalized = candidate.simplified().normalized( QString::NormalizationForm_C ).toLower();
463 QString searchNormalized = search.simplified().normalized( QString::NormalizationForm_C ).toLower();
465 int candidateLength = candidateNormalized.length();
466 int searchLength = searchNormalized.length();
470 if ( candidateLength == 0 || searchLength == 0 )
473 int candidateIdx = 0;
478 bool isPreviousIndexMatching =
false;
479 bool isWordOpen =
true;
482 while ( candidateIdx < candidateLength )
484 QChar candidateChar = candidateNormalized[candidateIdx++];
485 bool isCandidateCharWordEnd = candidateChar ==
' ' || candidateChar.isPunct();
488 if ( candidateIdx == 1 )
491 else if ( isCandidateCharWordEnd )
498 if ( searchIdx >= searchLength )
501 QChar searchChar = searchNormalized[searchIdx];
502 bool isSearchCharWordEnd = searchChar ==
' ' || searchChar.isPunct();
505 if ( candidateChar == searchChar || ( isCandidateCharWordEnd && isSearchCharWordEnd ) )
510 if ( isSearchCharWordEnd )
514 else if ( isPreviousIndexMatching )
522 else if ( isPreviousIndexMatching )
532 isPreviousIndexMatching =
true;
537 isPreviousIndexMatching =
false;
542 if ( searchIdx >= searchLength )
544 bool isEndOfWord = ( candidateIdx >= candidateLength ) ?
true : candidateNormalized[candidateIdx] ==
' ' || candidateNormalized[candidateIdx].isPunct();
555 if ( searchIdx < searchLength )
558 return static_cast<float>( std::max( score, 0 ) ) / std::max( maxScore, 1 );
564 QString converted = string;
568 const thread_local QRegularExpression urlRegEx(
569 u
"((?:(?:['\"\\(]?http|https|ftp|file)://[^\\s]+[^\\s,.]+)|(?:\\b(([\\w-]+://?|www[.])[^\\s()<>]+(?:\\([\\w\\d]+\\)|([^!\"#$%&'()*+,\\-./:;<=>?@[\\\\\\]^_`{|}~\\s]|/)))))"_s
571 const thread_local QRegularExpression groupedStringRegEx( u
"^(['\"\\(]+)(.*?)(?:['\")]+)"_s );
572 const thread_local QRegularExpression protoRegEx( u
"^(?:f|ht)tps?://|file://"_s );
573 const thread_local QRegularExpression emailRegEx( u
"([\\w._%+-]+@[\\w.-]+\\.[A-Za-z]+)"_s );
575 std::size_t offset = 0;
577 QRegularExpressionMatch match = urlRegEx.match( converted );
578 while ( match.hasMatch() )
581 QString url = match.captured( 1 );
582 std::size_t urlStart = match.capturedStart( 1 );
584 QString protoUrl = url;
585 const QRegularExpressionMatch groupedStringMatch = groupedStringRegEx.match( protoUrl );
586 if ( groupedStringMatch.hasMatch() )
588 url = groupedStringMatch.captured( 2 );
590 urlStart += groupedStringMatch.capturedLength( 1 );
592 if ( !protoRegEx.match( protoUrl ).hasMatch() )
594 protoUrl.prepend(
"http://" );
596 QString anchor = u
"<a href=\"%1\">%2</a>"_s.arg( protoUrl.toHtmlEscaped(), url.toHtmlEscaped() );
597 converted.replace( urlStart, url.length(), anchor );
598 offset = urlStart + anchor.length();
599 match = urlRegEx.match( converted, offset );
602 match = emailRegEx.match( converted );
603 while ( match.hasMatch() )
606 QString email = match.captured( 1 );
607 QString anchor = u
"<a href=\"mailto:%1\">%1</a>"_s.arg( email.toHtmlEscaped() );
608 converted.replace( match.capturedStart( 1 ), email.length(), anchor );
609 offset = match.capturedStart( 1 ) + anchor.length();
610 match = emailRegEx.match( converted, offset );
621 const thread_local QRegularExpression rxUrl( u
"^(http|https|ftp|file)://\\S+$"_s );
622 return rxUrl.match(
string ).hasMatch();
628 QString converted = html;
629 converted.replace(
"<br>"_L1,
"\n"_L1 );
630 converted.replace(
"<b>"_L1,
"**"_L1 );
631 converted.replace(
"</b>"_L1,
"**"_L1 );
632 converted.replace(
"<pre>"_L1,
"\n```\n"_L1 );
633 converted.replace(
"</pre>"_L1,
"```\n"_L1 );
635 const thread_local QRegularExpression hrefRegEx( u
"<a\\s+href\\s*=\\s*([^<>]*)\\s*>([^<>]*)</a>"_s );
638 QRegularExpressionMatch match = hrefRegEx.match( converted );
639 while ( match.hasMatch() )
641 QString url = match.captured( 1 ).replace(
"\""_L1, QString() );
642 url.replace(
'\'', QString() );
643 QString name = match.captured( 2 );
644 QString anchor = u
"[%1](%2)"_s.arg( name, url );
645 converted.replace( match.capturedStart(), match.capturedLength(), anchor );
646 offset = match.capturedStart() + anchor.length();
647 match = hrefRegEx.match( converted, offset );
653QString
QgsStringUtils::wordWrap(
const QString &
string,
const int length,
const bool useMaxLineLength,
const QString &customDelimiter )
655 if (
string.isEmpty() || length == 0 )
659 QRegularExpression rx;
660 int delimiterLength = 0;
662 if ( !customDelimiter.isEmpty() )
664 rx.setPattern( QRegularExpression::escape( customDelimiter ) );
665 delimiterLength = customDelimiter.length();
670 rx.setPattern( u
"[\\x{200B}\\s]"_s );
674 const QStringList lines =
string.split(
'\n' );
675 int strLength, strCurrent, strHit, lastHit;
677 for (
int i = 0; i < lines.size(); i++ )
679 const QString line = lines.at( i );
680 strLength = line.length();
681 if ( strLength <= length )
684 newstr.append( line );
685 if ( i < lines.size() - 1 )
686 newstr.append(
'\n' );
693 while ( strCurrent < strLength )
697 if ( useMaxLineLength )
700 strHit = ( strCurrent + length >= strLength ) ? -1 : line.lastIndexOf( rx, strCurrent + length );
701 if ( strHit == lastHit || strHit == -1 )
704 strHit = ( strCurrent + std::abs( length ) >= strLength ) ? -1 : line.indexOf( rx, strCurrent + std::abs( length ) );
710 strHit = ( strCurrent + std::abs( length ) >= strLength ) ? -1 : line.indexOf( rx, strCurrent + std::abs( length ) );
714 newstr.append( QStringView { line }.mid( strCurrent, strHit - strCurrent ) );
715 newstr.append(
'\n' );
716 strCurrent = strHit + delimiterLength;
720 newstr.append( QStringView { line }.mid( strCurrent ) );
721 strCurrent = strLength;
724 if ( i < lines.size() - 1 )
725 newstr.append(
'\n' );
733 string =
string.replace(
',', QChar( 65040 ) ).replace( QChar( 8229 ), QChar( 65072 ) );
734 string =
string.replace( QChar( 12289 ), QChar( 65041 ) ).replace( QChar( 12290 ), QChar( 65042 ) );
735 string =
string.replace(
':', QChar( 65043 ) ).replace(
';', QChar( 65044 ) );
736 string =
string.replace(
'!', QChar( 65045 ) ).replace(
'?', QChar( 65046 ) );
737 string =
string.replace( QChar( 12310 ), QChar( 65047 ) ).replace( QChar( 12311 ), QChar( 65048 ) );
738 string =
string.replace( QChar( 8230 ), QChar( 65049 ) );
739 string =
string.replace( QChar( 8212 ), QChar( 65073 ) ).replace( QChar( 8211 ), QChar( 65074 ) );
740 string =
string.replace(
'_', QChar( 65075 ) ).replace( QChar( 65103 ), QChar( 65076 ) );
741 string =
string.replace(
'(', QChar( 65077 ) ).replace(
')', QChar( 65078 ) );
742 string =
string.replace(
'{', QChar( 65079 ) ).replace(
'}', QChar( 65080 ) );
743 string =
string.replace(
'<', QChar( 65087 ) ).replace(
'>', QChar( 65088 ) );
744 string =
string.replace(
'[', QChar( 65095 ) ).replace(
']', QChar( 65096 ) );
745 string =
string.replace( QChar( 12308 ), QChar( 65081 ) ).replace( QChar( 12309 ), QChar( 65082 ) );
746 string =
string.replace( QChar( 12304 ), QChar( 65083 ) ).replace( QChar( 12305 ), QChar( 65084 ) );
747 string =
string.replace( QChar( 12298 ), QChar( 65085 ) ).replace( QChar( 12299 ), QChar( 65086 ) );
748 string =
string.replace( QChar( 12300 ), QChar( 65089 ) ).replace( QChar( 12301 ), QChar( 65090 ) );
749 string =
string.replace( QChar( 12302 ), QChar( 65091 ) ).replace( QChar( 12303 ), QChar( 65092 ) );
756 const QLatin1Char backslash(
'\\' );
757 const int count =
string.count();
760 escaped.reserve( count * 2 );
761 for (
int i = 0; i < count; i++ )
763 switch (
string.at( i ).toLatin1() )
779 escaped.append( backslash );
781 escaped.append(
string.at( i ) );
788 const int charactersToTruncate =
string.length() - maxLength;
789 if ( charactersToTruncate <= 0 )
793 const int truncateFrom =
string.length() / 2 - ( charactersToTruncate + 1 ) / 2;
794 if ( truncateFrom <= 0 )
795 return QChar( 0x2026 );
797 return QStringView(
string ).first( truncateFrom ) + QString( QChar( 0x2026 ) ) + QStringView(
string ).sliced( truncateFrom + charactersToTruncate + 1 );
802 if ( candidate.trimmed().isEmpty() )
805 const thread_local QRegularExpression rxWhitespace( u
"\\s+"_s );
806 const QStringList parts = words.split( rxWhitespace, Qt::SkipEmptyParts );
809 for (
const QString &word : parts )
811 if ( !candidate.contains( word, sensitivity ) )
823 if ( mWholeWordOnly )
825 mRx.setPattern( u
"\\b%1\\b"_s.arg( mMatch ) );
826 mRx.setPatternOptions( mCaseSensitive ? QRegularExpression::NoPatternOption : QRegularExpression::CaseInsensitiveOption );
832 QString result = input;
833 if ( !mWholeWordOnly )
835 return result.replace( mMatch, mReplacement, mCaseSensitive ? Qt::CaseSensitive : Qt::CaseInsensitive );
839 return result.replace( mRx, mReplacement );
846 map.insert( u
"match"_s, mMatch );
847 map.insert( u
"replace"_s, mReplacement );
848 map.insert( u
"caseSensitive"_s, mCaseSensitive ? u
"1"_s : u
"0"_s );
849 map.insert( u
"wholeWord"_s, mWholeWordOnly ? u
"1"_s : u
"0"_s );
860 QString result = input;
863 result = r.process( result );
873 QDomElement propEl = doc.createElement( u
"replacement"_s );
874 QgsStringMap::const_iterator it = props.constBegin();
875 for ( ; it != props.constEnd(); ++it )
877 propEl.setAttribute( it.key(), it.value() );
879 elem.appendChild( propEl );
885 mReplacements.clear();
886 QDomNodeList nodelist = elem.elementsByTagName( u
"replacement"_s );
887 for (
int i = 0; i < nodelist.count(); i++ )
889 QDomElement replacementElem = nodelist.at( i ).toElement();
890 QDomNamedNodeMap nodeMap = replacementElem.attributes();
893 for (
int j = 0; j < nodeMap.count(); ++j )
895 props.insert( nodeMap.item( j ).nodeName(), nodeMap.item( j ).nodeValue() );
Capitalization
String capitalization options.
@ AllSmallCaps
Force all characters to small caps.
@ MixedCase
Mixed case, ie no change.
@ UpperCamelCase
Convert the string to upper camel case. Note that this method does not unaccent characters.
@ AllLowercase
Convert all characters to lowercase.
@ TitleCase
Simple title case conversion - does not fully grammatically parse the text and uses simple rules only...
@ SmallCaps
Mixed case small caps.
@ ForceFirstLetterToCapital
Convert just the first letter of each word to uppercase, leave the rest untouched.
@ AllUppercase
Convert all characters to uppercase.
void readXml(const QDomElement &elem)
Reads the collection state from an XML element.
QString process(const QString &input) const
Processes a given input string, applying any valid replacements which should be made using QgsStringR...
void writeXml(QDomElement &elem, QDomDocument &doc) const
Writes the collection state to an XML element.
A representation of a single string replacement.
static QgsStringReplacement fromProperties(const QgsStringMap &properties)
Creates a new QgsStringReplacement from an encoded properties map.
QString process(const QString &input) const
Processes a given input string, applying any valid replacements which should be made.
bool wholeWordOnly() const
Returns true if match only applies to whole words, or false if partial word matches are permitted.
QString replacement() const
Returns the string to replace matches with.
bool caseSensitive() const
Returns true if match is case sensitive.
QgsStringReplacement(const QString &match, const QString &replacement, bool caseSensitive=false, bool wholeWordOnly=false)
Constructor for QgsStringReplacement.
QString match() const
Returns the string matched by this object.
QgsStringMap properties() const
Returns a map of the replacement properties.
static int hammingDistance(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the Hamming distance between two strings.
static QString soundex(const QString &string)
Returns the Soundex representation of a string.
static QHash< QString, QString > UNACCENT_MAP
Lookup table used by unaccent().
static int levenshteinDistance(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the Levenshtein edit distance between two strings.
static QString htmlToMarkdown(const QString &html)
Convert simple HTML to markdown.
static QString longestCommonSubstring(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the longest common substring between two strings.
static QString capitalize(const QString &string, Qgis::Capitalization capitalization)
Converts a string by applying capitalization rules to the string.
static QString substituteVerticalCharacters(QString string)
Returns a string with characters having vertical representation form substituted.
static QString unaccent(const QString &input)
Removes accents and other diacritical marks from a string, replacing accented characters with their u...
static bool containsByWord(const QString &candidate, const QString &words, Qt::CaseSensitivity sensitivity=Qt::CaseInsensitive)
Given a candidate string, returns true if the candidate contains all the individual words from anothe...
static QString insertLinks(const QString &string, bool *foundLinks=nullptr)
Returns a string with any URL (e.g., http(s)/ftp) and mailto: text converted to valid HTML <a ....
static double fuzzyScore(const QString &candidate, const QString &search)
Tests a candidate string to see how likely it is a match for a specified search string.
static QString qRegExpEscape(const QString &string)
Returns an escaped string matching the behavior of QRegExp::escape.
static QString ampersandEncode(const QString &string)
Makes a raw string safe for inclusion as a HTML/XML string literal.
static QString wordWrap(const QString &string, int length, bool useMaxLineLength=true, const QString &customDelimiter=QString())
Automatically wraps a string by inserting new line characters at appropriate locations in the string.
static bool isUrl(const QString &string)
Returns whether the string is a URL (http,https,ftp,file).
static QString truncateMiddleOfString(const QString &string, int maxLength)
Truncates a string to the specified maximum character length.
static QHash< QString, QString > createUnaccentMap()
Generates the unaccent mapping table (auto-generated by script at build time).
As part of the API refactoring and improvements which landed in the Processing API was substantially reworked from the x version This was done in order to allow much of the underlying Processing framework to be ported into c
QMap< QString, QString > QgsStringMap
#define FUZZY_SCORE_CONSECUTIVE_MATCH
#define FUZZY_SCORE_WORD_MATCH
#define FUZZY_SCORE_NEW_MATCH