20#include <QTextBoundaryFinder>
21#include <QRegularExpression>
26 if (
string.isEmpty() )
29 switch ( capitalization )
36 return string.toUpper();
40 return string.toLower();
44 QString temp = string;
46 QTextBoundaryFinder wordSplitter( QTextBoundaryFinder::Word,
string.constData(),
string.length(),
nullptr, 0 );
47 QTextBoundaryFinder letterSplitter( QTextBoundaryFinder::Grapheme,
string.constData(),
string.length(),
nullptr, 0 );
49 wordSplitter.setPosition( 0 );
51 while ( ( first && wordSplitter.boundaryReasons() & QTextBoundaryFinder::StartOfItem )
52 || wordSplitter.toNextBoundary() >= 0 )
55 letterSplitter.setPosition( wordSplitter.position() );
56 letterSplitter.toNextBoundary();
57 QString substr =
string.mid( wordSplitter.position(), letterSplitter.position() - wordSplitter.position() );
58 temp.replace( wordSplitter.position(), substr.length(), substr.toUpper() );
67 static QStringList smallWords;
68 static QStringList newPhraseSeparators;
69 static QRegularExpression splitWords;
70 if ( smallWords.empty() )
72 smallWords = QObject::tr(
"a|an|and|as|at|but|by|en|for|if|in|nor|of|on|or|per|s|the|to|vs.|vs|via" ).split(
'|' );
73 newPhraseSeparators = QObject::tr(
".|:" ).split(
'|' );
74 splitWords = QRegularExpression( QStringLiteral(
"\\b" ), QRegularExpression::UseUnicodePropertiesOption );
77 const bool allSameCase =
string.toLower() ==
string ||
string.toUpper() == string;
78 const QStringList parts = ( allSameCase ?
string.toLower() : string ).split( splitWords, Qt::SkipEmptyParts );
80 bool firstWord =
true;
82 int lastWord = parts.count() - 1;
83 for (
const QString &word : std::as_const( parts ) )
85 if ( newPhraseSeparators.contains( word.trimmed() ) )
90 else if ( firstWord || ( i == lastWord ) || !smallWords.contains( word ) )
92 result += word.at( 0 ).toUpper() + word.mid( 1 );
106 result.remove(
' ' );
117 for (
int i = 0; i <
string.size(); ++i )
119 QChar ch =
string.at( i );
120 if ( ch.unicode() > 160 )
121 encoded += QStringLiteral(
"&#%1;" ).arg(
static_cast< int >( ch.unicode() ) );
122 else if ( ch.unicode() == 38 )
123 encoded += QLatin1String(
"&" );
124 else if ( ch.unicode() == 60 )
125 encoded += QLatin1String(
"<" );
126 else if ( ch.unicode() == 62 )
127 encoded += QLatin1String(
">" );
136 int length1 = string1.length();
137 int length2 = string2.length();
140 if ( string1.isEmpty() )
144 else if ( string2.isEmpty() )
150 QString s1( caseSensitive ? string1 : string1.toLower() );
151 QString s2( caseSensitive ? string2 : string2.toLower() );
153 const QChar *s1Char = s1.constData();
154 const QChar *s2Char = s2.constData();
157 int commonPrefixLen = 0;
158 while ( length1 > 0 && length2 > 0 && *s1Char == *s2Char )
168 while ( length1 > 0 && length2 > 0 && s1.at( commonPrefixLen + length1 - 1 ) == s2.at( commonPrefixLen + length2 - 1 ) )
179 else if ( length2 == 0 )
185 if ( length1 > length2 )
188 std::swap( length1, length2 );
192 std::vector< int > col( length2 + 1, 0 );
193 std::vector< int > prevCol;
194 prevCol.reserve( length2 + 1 );
195 for (
int i = 0; i < length2 + 1; ++i )
197 prevCol.emplace_back( i );
199 const QChar *s2start = s2Char;
200 for (
int i = 0; i < length1; ++i )
204 for (
int j = 0; j < length2; ++j )
206 col[j + 1] = std::min( std::min( 1 + col[j], 1 + prevCol[1 + j] ), prevCol[j] + ( ( *s1Char == *s2Char ) ? 0 : 1 ) );
212 return prevCol[length2];
217 if ( string1.isEmpty() || string2.isEmpty() )
224 QString s1( caseSensitive ? string1 : string1.toLower() );
225 QString s2( caseSensitive ? string2 : string2.toLower() );
233 int *currentScores =
new int [ s2.length()];
234 int *previousScores =
new int [ s2.length()];
235 int maxCommonLength = 0;
236 int lastMaxBeginIndex = 0;
238 const QChar *s1Char = s1.constData();
239 const QChar *s2Char = s2.constData();
240 const QChar *s2Start = s2Char;
242 for (
int i = 0; i < s1.length(); ++i )
244 for (
int j = 0; j < s2.length(); ++j )
246 if ( *s1Char != *s2Char )
248 currentScores[j] = 0;
252 if ( i == 0 || j == 0 )
254 currentScores[j] = 1;
258 currentScores[j] = 1 + previousScores[j - 1];
261 if ( maxCommonLength < currentScores[j] )
263 maxCommonLength = currentScores[j];
264 lastMaxBeginIndex = i;
269 std::swap( currentScores, previousScores );
273 delete [] currentScores;
274 delete [] previousScores;
275 return string1.mid( lastMaxBeginIndex - maxCommonLength + 1, maxCommonLength );
280 if ( string1.isEmpty() && string2.isEmpty() )
286 if ( string1.length() != string2.length() )
293 QString s1( caseSensitive ? string1 : string1.toLower() );
294 QString s2( caseSensitive ? string2 : string2.toLower() );
303 const QChar *s1Char = s1.constData();
304 const QChar *s2Char = s2.constData();
306 for (
int i = 0; i < string1.length(); ++i )
308 if ( *s1Char != *s2Char )
319 if (
string.isEmpty() )
322 QString tmp =
string.toUpper();
325 QChar *char1 = tmp.data();
326 QChar *char2 = tmp.data();
328 for (
int i = 0; i < tmp.length(); ++i, ++char2 )
330 if ( ( *char2 ).unicode() >= 0x41 && ( *char2 ).unicode() <= 0x5A && ( i == 0 || ( ( *char2 ).unicode() != 0x41 && ( *char2 ).unicode() != 0x45
331 && ( *char2 ).unicode() != 0x48 && ( *char2 ).unicode() != 0x49
332 && ( *char2 ).unicode() != 0x4F && ( *char2 ).unicode() != 0x55
333 && ( *char2 ).unicode() != 0x57 && ( *char2 ).unicode() != 0x59 ) ) )
340 tmp.truncate( outLen );
342 QChar *tmpChar = tmp.data();
344 for (
int i = 1; i < tmp.length(); ++i, ++tmpChar )
346 switch ( ( *tmpChar ).unicode() )
352 tmp.replace( i, 1, QChar( 0x31 ) );
363 tmp.replace( i, 1, QChar( 0x32 ) );
368 tmp.replace( i, 1, QChar( 0x33 ) );
372 tmp.replace( i, 1, QChar( 0x34 ) );
377 tmp.replace( i, 1, QChar( 0x35 ) );
381 tmp.replace( i, 1, QChar( 0x36 ) );
391 for (
int i = 1; i < tmp.length(); ++i, ++char2 )
393 if ( *char2 != *char1 )
402 tmp.truncate( outLen );
403 if ( tmp.length() < 4 )
415 QString candidateNormalized = candidate.simplified().normalized( QString:: NormalizationForm_C ).toLower();
416 QString searchNormalized = search.simplified().normalized( QString:: NormalizationForm_C ).toLower();
418 int candidateLength = candidateNormalized.length();
419 int searchLength = searchNormalized.length();
423 if ( candidateLength == 0 || searchLength == 0 )
426 int candidateIdx = 0;
431 bool isPreviousIndexMatching =
false;
432 bool isWordOpen =
true;
435 while ( candidateIdx < candidateLength )
437 QChar candidateChar = candidateNormalized[ candidateIdx++ ];
438 bool isCandidateCharWordEnd = candidateChar ==
' ' || candidateChar.isPunct();
441 if ( candidateIdx == 1 )
444 else if ( isCandidateCharWordEnd )
451 if ( searchIdx >= searchLength )
454 QChar searchChar = searchNormalized[ searchIdx ];
455 bool isSearchCharWordEnd = searchChar ==
' ' || searchChar.isPunct();
458 if ( candidateChar == searchChar || ( isCandidateCharWordEnd && isSearchCharWordEnd ) )
463 if ( isSearchCharWordEnd )
467 else if ( isPreviousIndexMatching )
475 else if ( isPreviousIndexMatching )
485 isPreviousIndexMatching =
true;
490 isPreviousIndexMatching =
false;
495 if ( searchIdx >= searchLength )
497 bool isEndOfWord = ( candidateIdx >= candidateLength )
499 : candidateNormalized[candidateIdx] ==
' ' || candidateNormalized[candidateIdx].isPunct();
510 if ( searchIdx < searchLength )
513 return static_cast<float>( std::max( score, 0 ) ) / std::max( maxScore, 1 );
519 QString converted = string;
523 const thread_local QRegularExpression urlRegEx( QStringLiteral(
"((?:(?:http|https|ftp|file)://[^\\s]+[^\\s,.]+)|(?:\\b(([\\w-]+://?|www[.])[^\\s()<>]+(?:\\([\\w\\d]+\\)|([^!\"#$%&'()*+,\\-./:;<=>?@[\\\\\\]^_`{|}~\\s]|/)))))" ) );
524 const thread_local QRegularExpression protoRegEx( QStringLiteral(
"^(?:f|ht)tps?://|file://" ) );
525 const thread_local QRegularExpression emailRegEx( QStringLiteral(
"([\\w._%+-]+@[\\w.-]+\\.[A-Za-z]+)" ) );
529 QRegularExpressionMatch match = urlRegEx.match( converted );
530 while ( match.hasMatch() )
533 QString url = match.captured( 1 );
534 QString protoUrl = url;
535 if ( !protoRegEx.match( protoUrl ).hasMatch() )
537 protoUrl.prepend(
"http://" );
539 QString anchor = QStringLiteral(
"<a href=\"%1\">%2</a>" ).arg( protoUrl.toHtmlEscaped(), url.toHtmlEscaped() );
540 converted.replace( match.capturedStart( 1 ), url.length(), anchor );
541 offset = match.capturedStart( 1 ) + anchor.length();
542 match = urlRegEx.match( converted, offset );
546 match = emailRegEx.match( converted );
547 while ( match.hasMatch() )
550 QString email = match.captured( 1 );
551 QString anchor = QStringLiteral(
"<a href=\"mailto:%1\">%1</a>" ).arg( email.toHtmlEscaped() );
552 converted.replace( match.capturedStart( 1 ), email.length(), anchor );
553 offset = match.capturedStart( 1 ) + anchor.length();
554 match = emailRegEx.match( converted, offset );
565 const thread_local QRegularExpression rxUrl( QStringLiteral(
"^(http|https|ftp|file)://\\S+$" ) );
566 return rxUrl.match(
string ).hasMatch();
572 QString converted = html;
573 converted.replace( QLatin1String(
"<br>" ), QLatin1String(
"\n" ) );
574 converted.replace( QLatin1String(
"<b>" ), QLatin1String(
"**" ) );
575 converted.replace( QLatin1String(
"</b>" ), QLatin1String(
"**" ) );
576 converted.replace( QLatin1String(
"<pre>" ), QLatin1String(
"\n```\n" ) );
577 converted.replace( QLatin1String(
"</pre>" ), QLatin1String(
"```\n" ) );
579 const thread_local QRegularExpression hrefRegEx( QStringLiteral(
"<a\\s+href\\s*=\\s*([^<>]*)\\s*>([^<>]*)</a>" ) );
582 QRegularExpressionMatch match = hrefRegEx.match( converted );
583 while ( match.hasMatch() )
585 QString url = match.captured( 1 ).replace( QLatin1String(
"\"" ), QString() );
586 url.replace(
'\'', QString() );
587 QString name = match.captured( 2 );
588 QString anchor = QStringLiteral(
"[%1](%2)" ).arg( name, url );
589 converted.replace( match.capturedStart(), match.capturedLength(), anchor );
590 offset = match.capturedStart() + anchor.length();
591 match = hrefRegEx.match( converted, offset );
597QString
QgsStringUtils::wordWrap(
const QString &
string,
const int length,
const bool useMaxLineLength,
const QString &customDelimiter )
599 if (
string.isEmpty() || length == 0 )
603 QRegularExpression rx;
604 int delimiterLength = 0;
606 if ( !customDelimiter.isEmpty() )
608 rx.setPattern( QRegularExpression::escape( customDelimiter ) );
609 delimiterLength = customDelimiter.length();
614 rx.setPattern( QStringLiteral(
"[\\x{200B}\\s]" ) );
618 const QStringList lines =
string.split(
'\n' );
619 int strLength, strCurrent, strHit, lastHit;
621 for (
int i = 0; i < lines.size(); i++ )
623 const QString line = lines.at( i );
624 strLength = line.length();
625 if ( strLength <= length )
628 newstr.append( line );
629 if ( i < lines.size() - 1 )
630 newstr.append(
'\n' );
637 while ( strCurrent < strLength )
641 if ( useMaxLineLength )
644 strHit = ( strCurrent + length >= strLength ) ? -1 : line.lastIndexOf( rx, strCurrent + length );
645 if ( strHit == lastHit || strHit == -1 )
648 strHit = ( strCurrent + std::abs( length ) >= strLength ) ? -1 : line.indexOf( rx, strCurrent + std::abs( length ) );
654 strHit = ( strCurrent + std::abs( length ) >= strLength ) ? -1 : line.indexOf( rx, strCurrent + std::abs( length ) );
658 newstr.append( QStringView {line} .mid( strCurrent, strHit - strCurrent ) );
659 newstr.append(
'\n' );
660 strCurrent = strHit + delimiterLength;
664 newstr.append( QStringView {line} .mid( strCurrent ) );
665 strCurrent = strLength;
668 if ( i < lines.size() - 1 )
669 newstr.append(
'\n' );
677 string =
string.replace(
',', QChar( 65040 ) ).replace( QChar( 8229 ), QChar( 65072 ) );
678 string =
string.replace( QChar( 12289 ), QChar( 65041 ) ).replace( QChar( 12290 ), QChar( 65042 ) );
679 string =
string.replace(
':', QChar( 65043 ) ).replace(
';', QChar( 65044 ) );
680 string =
string.replace(
'!', QChar( 65045 ) ).replace(
'?', QChar( 65046 ) );
681 string =
string.replace( QChar( 12310 ), QChar( 65047 ) ).replace( QChar( 12311 ), QChar( 65048 ) );
682 string =
string.replace( QChar( 8230 ), QChar( 65049 ) );
683 string =
string.replace( QChar( 8212 ), QChar( 65073 ) ).replace( QChar( 8211 ), QChar( 65074 ) );
684 string =
string.replace(
'_', QChar( 65075 ) ).replace( QChar( 65103 ), QChar( 65076 ) );
685 string =
string.replace(
'(', QChar( 65077 ) ).replace(
')', QChar( 65078 ) );
686 string =
string.replace(
'{', QChar( 65079 ) ).replace(
'}', QChar( 65080 ) );
687 string =
string.replace(
'<', QChar( 65087 ) ).replace(
'>', QChar( 65088 ) );
688 string =
string.replace(
'[', QChar( 65095 ) ).replace(
']', QChar( 65096 ) );
689 string =
string.replace( QChar( 12308 ), QChar( 65081 ) ).replace( QChar( 12309 ), QChar( 65082 ) );
690 string =
string.replace( QChar( 12304 ), QChar( 65083 ) ).replace( QChar( 12305 ), QChar( 65084 ) );
691 string =
string.replace( QChar( 12298 ), QChar( 65085 ) ).replace( QChar( 12299 ), QChar( 65086 ) );
692 string =
string.replace( QChar( 12300 ), QChar( 65089 ) ).replace( QChar( 12301 ), QChar( 65090 ) );
693 string =
string.replace( QChar( 12302 ), QChar( 65091 ) ).replace( QChar( 12303 ), QChar( 65092 ) );
700 const QLatin1Char backslash(
'\\' );
701 const int count =
string.count();
704 escaped.reserve( count * 2 );
705 for (
int i = 0; i < count; i++ )
707 switch (
string.at( i ).toLatin1() )
723 escaped.append( backslash );
725 escaped.append(
string.at( i ) );
732 const int charactersToTruncate =
string.length() - maxLength;
733 if ( charactersToTruncate <= 0 )
737 const int truncateFrom =
string.length() / 2 - ( charactersToTruncate + 1 ) / 2;
738 if ( truncateFrom <= 0 )
739 return QChar( 0x2026 );
741#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
742 return string.leftRef( truncateFrom ) + QString( QChar( 0x2026 ) ) +
string.midRef( truncateFrom + charactersToTruncate + 1 );
744 return QStringView(
string ).first( truncateFrom ) + QString( QChar( 0x2026 ) ) + QStringView(
string ).sliced( truncateFrom + charactersToTruncate + 1 );
750 if ( candidate.trimmed().isEmpty() )
753 const thread_local QRegularExpression rxWhitespace( QStringLiteral(
"\\s+" ) );
754 const QStringList parts = words.split( rxWhitespace, Qt::SkipEmptyParts );
757 for (
const QString &word : parts )
759 if ( !candidate.contains( word, sensitivity ) )
767 , mReplacement( replacement )
768 , mCaseSensitive( caseSensitive )
769 , mWholeWordOnly( wholeWordOnly )
771 if ( mWholeWordOnly )
773 mRx.setPattern( QStringLiteral(
"\\b%1\\b" ).arg( mMatch ) );
774 mRx.setPatternOptions( mCaseSensitive ? QRegularExpression::NoPatternOption : QRegularExpression::CaseInsensitiveOption );
780 QString result = input;
781 if ( !mWholeWordOnly )
783 return result.replace( mMatch, mReplacement, mCaseSensitive ? Qt::CaseSensitive : Qt::CaseInsensitive );
787 return result.replace( mRx, mReplacement );
794 map.insert( QStringLiteral(
"match" ), mMatch );
795 map.insert( QStringLiteral(
"replace" ), mReplacement );
796 map.insert( QStringLiteral(
"caseSensitive" ), mCaseSensitive ? QStringLiteral(
"1" ) : QStringLiteral(
"0" ) );
797 map.insert( QStringLiteral(
"wholeWord" ), mWholeWordOnly ? QStringLiteral(
"1" ) : QStringLiteral(
"0" ) );
804 properties.value( QStringLiteral(
"replace" ) ),
805 properties.value( QStringLiteral(
"caseSensitive" ), QStringLiteral(
"0" ) ) == QLatin1String(
"1" ),
806 properties.value( QStringLiteral(
"wholeWord" ), QStringLiteral(
"0" ) ) == QLatin1String(
"1" ) );
811 QString result = input;
814 result = r.process( result );
824 QDomElement propEl = doc.createElement( QStringLiteral(
"replacement" ) );
825 QgsStringMap::const_iterator it = props.constBegin();
826 for ( ; it != props.constEnd(); ++it )
828 propEl.setAttribute( it.key(), it.value() );
830 elem.appendChild( propEl );
836 mReplacements.clear();
837 QDomNodeList nodelist = elem.elementsByTagName( QStringLiteral(
"replacement" ) );
838 for (
int i = 0; i < nodelist.count(); i++ )
840 QDomElement replacementElem = nodelist.at( i ).toElement();
841 QDomNamedNodeMap nodeMap = replacementElem.attributes();
844 for (
int j = 0; j < nodeMap.count(); ++j )
846 props.insert( nodeMap.item( j ).nodeName(), nodeMap.item( j ).nodeValue() );
Capitalization
String capitalization options.
@ AllSmallCaps
Force all characters to small caps.
@ MixedCase
Mixed case, ie no change.
@ UpperCamelCase
Convert the string to upper camel case. Note that this method does not unaccent characters.
@ AllLowercase
Convert all characters to lowercase.
@ TitleCase
Simple title case conversion - does not fully grammatically parse the text and uses simple rules only...
@ SmallCaps
Mixed case small caps.
@ ForceFirstLetterToCapital
Convert just the first letter of each word to uppercase, leave the rest untouched.
@ AllUppercase
Convert all characters to uppercase.
void readXml(const QDomElement &elem)
Reads the collection state from an XML element.
QString process(const QString &input) const
Processes a given input string, applying any valid replacements which should be made using QgsStringR...
void writeXml(QDomElement &elem, QDomDocument &doc) const
Writes the collection state to an XML element.
A representation of a single string replacement.
static QgsStringReplacement fromProperties(const QgsStringMap &properties)
Creates a new QgsStringReplacement from an encoded properties map.
QString process(const QString &input) const
Processes a given input string, applying any valid replacements which should be made.
QgsStringReplacement(const QString &match, const QString &replacement, bool caseSensitive=false, bool wholeWordOnly=false)
Constructor for QgsStringReplacement.
QgsStringMap properties() const
Returns a map of the replacement properties.
static int hammingDistance(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the Hamming distance between two strings.
static QString soundex(const QString &string)
Returns the Soundex representation of a string.
static int levenshteinDistance(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the Levenshtein edit distance between two strings.
static QString htmlToMarkdown(const QString &html)
Convert simple HTML to markdown.
static QString longestCommonSubstring(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the longest common substring between two strings.
static QString capitalize(const QString &string, Qgis::Capitalization capitalization)
Converts a string by applying capitalization rules to the string.
static QString substituteVerticalCharacters(QString string)
Returns a string with characters having vertical representation form substituted.
static bool containsByWord(const QString &candidate, const QString &words, Qt::CaseSensitivity sensitivity=Qt::CaseInsensitive)
Given a candidate string, returns true if the candidate contains all the individual words from anothe...
static QString insertLinks(const QString &string, bool *foundLinks=nullptr)
Returns a string with any URL (e.g., http(s)/ftp) and mailto: text converted to valid HTML <a ....
static double fuzzyScore(const QString &candidate, const QString &search)
Tests a candidate string to see how likely it is a match for a specified search string.
static QString qRegExpEscape(const QString &string)
Returns an escaped string matching the behavior of QRegExp::escape.
static QString ampersandEncode(const QString &string)
Makes a raw string safe for inclusion as a HTML/XML string literal.
static QString wordWrap(const QString &string, int length, bool useMaxLineLength=true, const QString &customDelimiter=QString())
Automatically wraps a string by inserting new line characters at appropriate locations in the string.
static bool isUrl(const QString &string)
Returns whether the string is a URL (http,https,ftp,file)
static QString truncateMiddleOfString(const QString &string, int maxLength)
Truncates a string to the specified maximum character length.
QMap< QString, QString > QgsStringMap
#define FUZZY_SCORE_CONSECUTIVE_MATCH
#define FUZZY_SCORE_WORD_MATCH
#define FUZZY_SCORE_NEW_MATCH