api/qgsstringutils_8cpp_source.html

/***************************************************************************

    qgsstringutils.cpp

    ------------------

    begin                : June 2015

    copyright            : (C) 2015 by Nyall Dawson

    email                : nyall dot dawson at gmail dot com

 ***************************************************************************

 *                                                                         *

 *   This program is free software; you can redistribute it and/or modify  *

 *   it under the terms of the GNU General Public License as published by  *

 *   the Free Software Foundation; either version 2 of the License, or     *

 *   (at your option) any later version.                                   *

 *                                                                         *

 ***************************************************************************/


#include "qgsstringutils.h"


#include <cstdlib>


#include "qgslogger.h"


#include <QRegularExpression>

#include <QString>

#include <QStringList>

#include <QTextBoundaryFinder>

#include <QVector>


using namespace Qt::StringLiterals;


QHash<QString, QString> QgsStringUtils::UNACCENT_MAP = QgsStringUtils::createUnaccentMap();


QString QgsStringUtils::unaccent( const QString &input )

{

  // Normalize input to NFC so that Unicode characters composed of base +

  // combining marks are converted to their canonical composed form.

  // This ensures lookups match the keys in UNACCENT_MAP, which are stored

  // in NFC (e.g. "e" + U+0301 becomes "é", as PostgreSQL does it.).

  const QString in = input.normalized( QString::NormalizationForm_C );

  QString out;

  out.reserve( in.size() );


  qsizetype i = 0;

  const qsizetype n = in.size();


  while ( i < n )

  {

    const QChar c = in.at( i );

    int len = 1;


    // Detect surrogate pair (non-BMP)

    if ( c.isHighSurrogate() && i + 1 < n )

    {

      const QChar c2 = in.at( i + 1 );

      if ( c2.isLowSurrogate() )

        len = 2;

    }


    const QString key = in.mid( i, len ).normalized( QString::NormalizationForm_C );


    auto it = UNACCENT_MAP.constFind( key );

    if ( it != UNACCENT_MAP.constEnd() )

      out.append( it.value() );

    else

      out.append( key );


    i += len;

  }


  return out;

}


QString QgsStringUtils::capitalize( const QString &string, Qgis::Capitalization capitalization )

{

  if ( string.isEmpty() )

    return QString();


  switch ( capitalization )

  {

    case Qgis::Capitalization::MixedCase:

    case Qgis::Capitalization::SmallCaps:

      return string;


    case Qgis::Capitalization::AllUppercase:

      return string.toUpper();


    case Qgis::Capitalization::AllLowercase:

    case Qgis::Capitalization::AllSmallCaps:

      return string.toLower();


    case Qgis::Capitalization::ForceFirstLetterToCapital:

    {

      QString temp = string;


      QTextBoundaryFinder wordSplitter( QTextBoundaryFinder::Word, string.constData(), string.length(), nullptr, 0 );

      QTextBoundaryFinder letterSplitter( QTextBoundaryFinder::Grapheme, string.constData(), string.length(), nullptr, 0 );


      wordSplitter.setPosition( 0 );

      bool first = true;

      while ( ( first && wordSplitter.boundaryReasons() & QTextBoundaryFinder::StartOfItem ) || wordSplitter.toNextBoundary() >= 0 )

      {

        first = false;

        letterSplitter.setPosition( wordSplitter.position() );

        ( void ) letterSplitter.toNextBoundary();

        QString substr = string.mid( wordSplitter.position(), letterSplitter.position() - wordSplitter.position() );

        temp.replace( wordSplitter.position(), substr.length(), substr.toUpper() );

      }

      return temp;

    }


    case Qgis::Capitalization::TitleCase:

    {

      // yes, this is MASSIVELY simplifying the problem!!


      static QStringList smallWords;

      static QStringList newPhraseSeparators;

      static QRegularExpression splitWords;

      if ( smallWords.empty() )

      {

        smallWords = QObject::tr( "a|an|and|as|at|but|by|en|for|if|in|nor|of|on|or|per|s|the|to|vs.|vs|via" ).split( '|' );

        newPhraseSeparators = QObject::tr( ".|:" ).split( '|' );

        splitWords = QRegularExpression( u"\\b"_s, QRegularExpression::UseUnicodePropertiesOption );

      }


      const bool allSameCase = string.toLower() == string || string.toUpper() == string;

      const QStringList parts = ( allSameCase ? string.toLower() : string ).split( splitWords, Qt::SkipEmptyParts );

      QString result;

      bool firstWord = true;

      int i = 0;

      int lastWord = parts.count() - 1;

      for ( const QString &word : std::as_const( parts ) )

      {

        if ( newPhraseSeparators.contains( word.trimmed() ) )

        {

          firstWord = true;

          result += word;

        }

        else if ( firstWord || ( i == lastWord ) || !smallWords.contains( word ) )

        {

          result += word.at( 0 ).toUpper() + word.mid( 1 );

          firstWord = false;

        }

        else

        {

          result += word;

        }

        i++;

      }

      return result;

    }


    case Qgis::Capitalization::UpperCamelCase:

      QString result = QgsStringUtils::capitalize( string.toLower(), Qgis::Capitalization::ForceFirstLetterToCapital ).simplified();

      result.remove( ' ' );

      return result;

  }

  // no warnings

  return string;

}


// original code from http://www.qtcentre.org/threads/52456-HTML-Unicode-ampersand-encoding


QString QgsStringUtils::ampersandEncode( const QString &string )

{

  QString encoded;

  for ( int i = 0; i < string.size(); ++i )

  {

    QChar ch = string.at( i );

    if ( ch.unicode() > 160 )

      encoded += u"&#%1;"_s.arg( static_cast< int >( ch.unicode() ) );

    else if ( ch.unicode() == 38 )

      encoded += "&amp;"_L1;

    else if ( ch.unicode() == 60 )

      encoded += "&lt;"_L1;

    else if ( ch.unicode() == 62 )

      encoded += "&gt;"_L1;

    else

      encoded += ch;

  }

  return encoded;

}


int QgsStringUtils::levenshteinDistance( const QString &string1, const QString &string2, bool caseSensitive )

{

  int length1 = string1.length();

  int length2 = string2.length();


  //empty strings? solution is trivial...

  if ( string1.isEmpty() )

  {

    return length2;

  }

  else if ( string2.isEmpty() )

  {

    return length1;

  }


  //handle case sensitive flag (or not)

  QString s1( caseSensitive ? string1 : string1.toLower() );

  QString s2( caseSensitive ? string2 : string2.toLower() );


  const QChar *s1Char = s1.constData();

  const QChar *s2Char = s2.constData();


  //strip out any common prefix

  int commonPrefixLen = 0;

  while ( length1 > 0 && length2 > 0 && *s1Char == *s2Char )

  {

    commonPrefixLen++;

    length1--;

    length2--;

    s1Char++;

    s2Char++;

  }


  //strip out any common suffix

  while ( length1 > 0 && length2 > 0 && s1.at( commonPrefixLen + length1 - 1 ) == s2.at( commonPrefixLen + length2 - 1 ) )

  {

    length1--;

    length2--;

  }


  //fully checked either string? if so, the answer is easy...

  if ( length1 == 0 )

  {

    return length2;

  }

  else if ( length2 == 0 )

  {

    return length1;

  }


  //ensure the inner loop is longer

  if ( length1 > length2 )

  {

    std::swap( s1, s2 );

    std::swap( length1, length2 );

  }


  //levenshtein algorithm begins here

  std::vector< int > col( length2 + 1, 0 );

  std::vector< int > prevCol;

  prevCol.reserve( length2 + 1 );

  for ( int i = 0; i < length2 + 1; ++i )

  {

    prevCol.emplace_back( i );

  }

  const QChar *s2start = s2Char;

  for ( int i = 0; i < length1; ++i )

  {

    col[0] = i + 1;

    s2Char = s2start;

    for ( int j = 0; j < length2; ++j )

    {

      col[j + 1] = std::min( std::min( 1 + col[j], 1 + prevCol[1 + j] ), prevCol[j] + ( ( *s1Char == *s2Char ) ? 0 : 1 ) );

      s2Char++;

    }

    col.swap( prevCol );

    s1Char++;

  }

  return prevCol[length2];

}


QString QgsStringUtils::longestCommonSubstring( const QString &string1, const QString &string2, bool caseSensitive )

{

  if ( string1.isEmpty() || string2.isEmpty() )

  {

    //empty strings, solution is trivial...

    return QString();

  }


  //handle case sensitive flag (or not)

  QString s1( caseSensitive ? string1 : string1.toLower() );

  QString s2( caseSensitive ? string2 : string2.toLower() );


  if ( s1 == s2 )

  {

    //another trivial case, identical strings

    return s1;

  }


  int *currentScores = new int[s2.length()];

  int *previousScores = new int[s2.length()];

  int maxCommonLength = 0;

  int lastMaxBeginIndex = 0;


  const QChar *s1Char = s1.constData();

  const QChar *s2Char = s2.constData();

  const QChar *s2Start = s2Char;


  for ( int i = 0; i < s1.length(); ++i )

  {

    for ( int j = 0; j < s2.length(); ++j )

    {

      if ( *s1Char != *s2Char )

      {

        currentScores[j] = 0;

      }

      else

      {

        if ( i == 0 || j == 0 )

        {

          currentScores[j] = 1;

        }

        else

        {

          currentScores[j] = 1 + previousScores[j - 1];

        }


        if ( maxCommonLength < currentScores[j] )

        {

          maxCommonLength = currentScores[j];

          lastMaxBeginIndex = i;

        }

      }

      s2Char++;

    }

    std::swap( currentScores, previousScores );

    s1Char++;

    s2Char = s2Start;

  }

  delete[] currentScores;

  delete[] previousScores;

  return string1.mid( lastMaxBeginIndex - maxCommonLength + 1, maxCommonLength );

}


int QgsStringUtils::hammingDistance( const QString &string1, const QString &string2, bool caseSensitive )

{

  if ( string1.isEmpty() && string2.isEmpty() )

  {

    //empty strings, solution is trivial...

    return 0;

  }


  if ( string1.length() != string2.length() )

  {

    //invalid inputs

    return -1;

  }


  //handle case sensitive flag (or not)

  QString s1( caseSensitive ? string1 : string1.toLower() );

  QString s2( caseSensitive ? string2 : string2.toLower() );


  if ( s1 == s2 )

  {

    //another trivial case, identical strings

    return 0;

  }


  int distance = 0;

  const QChar *s1Char = s1.constData();

  const QChar *s2Char = s2.constData();


  for ( int i = 0; i < string1.length(); ++i )

  {

    if ( *s1Char != *s2Char )

      distance++;

    s1Char++;

    s2Char++;

  }


  return distance;

}


QString QgsStringUtils::soundex( const QString &string )

{

  if ( string.isEmpty() )

    return QString();


  QString tmp = string.toUpper();


  //strip non character codes, and vowel like characters after the first character

  QChar *char1 = tmp.data();

  QChar *char2 = tmp.data();

  int outLen = 0;

  for ( int i = 0; i < tmp.length(); ++i, ++char2 )

  {

    if ( ( *char2 ).unicode() >= 0x41 && ( *char2 ).unicode() <= 0x5A && ( i == 0 || ( ( *char2 ).unicode() != 0x41 && ( *char2 ).unicode() != 0x45

         && ( *char2 ).unicode() != 0x48 && ( *char2 ).unicode() != 0x49

         && ( *char2 ).unicode() != 0x4F && ( *char2 ).unicode() != 0x55

         && ( *char2 ).unicode() != 0x57 && ( *char2 ).unicode() != 0x59 ) ) )

    {

      *char1 = *char2;

      char1++;

      outLen++;

    }

  }

  tmp.truncate( outLen );


  QChar *tmpChar = tmp.data();

  tmpChar++;

  for ( int i = 1; i < tmp.length(); ++i, ++tmpChar )

  {

    switch ( ( *tmpChar ).unicode() )

    {

      case 0x42:

      case 0x46:

      case 0x50:

      case 0x56:

        tmp.replace( i, 1, QChar( 0x31 ) );

        break;


      case 0x43:

      case 0x47:

      case 0x4A:

      case 0x4B:

      case 0x51:

      case 0x53:

      case 0x58:

      case 0x5A:

        tmp.replace( i, 1, QChar( 0x32 ) );

        break;


      case 0x44:

      case 0x54:

        tmp.replace( i, 1, QChar( 0x33 ) );

        break;


      case 0x4C:

        tmp.replace( i, 1, QChar( 0x34 ) );

        break;


      case 0x4D:

      case 0x4E:

        tmp.replace( i, 1, QChar( 0x35 ) );

        break;


      case 0x52:

        tmp.replace( i, 1, QChar( 0x36 ) );

        break;

    }

  }


  //remove adjacent duplicates

  char1 = tmp.data();

  char2 = tmp.data();

  char2++;

  outLen = 1;

  for ( int i = 1; i < tmp.length(); ++i, ++char2 )

  {

    if ( *char2 != *char1 )

    {

      char1++;

      *char1 = *char2;

      outLen++;

      if ( outLen == 4 )

        break;

    }

  }

  tmp.truncate( outLen );

  if ( tmp.length() < 4 )

  {

    tmp.append( "000" );

    tmp.truncate( 4 );

  }


  return tmp;

}


double QgsStringUtils::fuzzyScore( const QString &candidate, const QString &search )

{

  QString candidateNormalized = candidate.simplified().normalized( QString::NormalizationForm_C ).toLower();

  QString searchNormalized = search.simplified().normalized( QString::NormalizationForm_C ).toLower();


  int candidateLength = candidateNormalized.length();

  int searchLength = searchNormalized.length();

  int score = 0;


  // if the candidate and the search term are empty, no other option than 0 score

  if ( candidateLength == 0 || searchLength == 0 )

    return score;


  int candidateIdx = 0;

  int searchIdx = 0;

  // there is always at least one word

  int maxScore = FUZZY_SCORE_WORD_MATCH;


  bool isPreviousIndexMatching = false;

  bool isWordOpen = true;


  // loop trough each candidate char and calculate the potential max score

  while ( candidateIdx < candidateLength )

  {

    QChar candidateChar = candidateNormalized[candidateIdx++];

    bool isCandidateCharWordEnd = candidateChar == ' ' || candidateChar.isPunct();


    // the first char is always the default score

    if ( candidateIdx == 1 )

      maxScore += FUZZY_SCORE_NEW_MATCH;

    // every space character or underscore is a opportunity for a new word

    else if ( isCandidateCharWordEnd )

      maxScore += FUZZY_SCORE_WORD_MATCH;

    // potentially we can match every other character

    else

      maxScore += FUZZY_SCORE_CONSECUTIVE_MATCH;


    // we looped through all the characters

    if ( searchIdx >= searchLength )

      continue;


    QChar searchChar = searchNormalized[searchIdx];

    bool isSearchCharWordEnd = searchChar == ' ' || searchChar.isPunct();


    // match!

    if ( candidateChar == searchChar || ( isCandidateCharWordEnd && isSearchCharWordEnd ) )

    {

      searchIdx++;


      // if we have just successfully finished a word, give higher score

      if ( isSearchCharWordEnd )

      {

        if ( isWordOpen )

          score += FUZZY_SCORE_WORD_MATCH;

        else if ( isPreviousIndexMatching )

          score += FUZZY_SCORE_CONSECUTIVE_MATCH;

        else

          score += FUZZY_SCORE_NEW_MATCH;


        isWordOpen = true;

      }

      // if we have consecutive characters matching, give higher score

      else if ( isPreviousIndexMatching )

      {

        score += FUZZY_SCORE_CONSECUTIVE_MATCH;

      }

      // normal score for new independent character that matches

      else

      {

        score += FUZZY_SCORE_NEW_MATCH;

      }


      isPreviousIndexMatching = true;

    }

    // if the current character does NOT match, we are sure we cannot build a word for now

    else

    {

      isPreviousIndexMatching = false;

      isWordOpen = false;

    }


    // if the search string is covered, check if the last match is end of word

    if ( searchIdx >= searchLength )

    {

      bool isEndOfWord = ( candidateIdx >= candidateLength ) ? true : candidateNormalized[candidateIdx] == ' ' || candidateNormalized[candidateIdx].isPunct();


      if ( isEndOfWord )

        score += FUZZY_SCORE_WORD_MATCH;

    }


    // QgsLogger::debug( u"TMP: %1 | %2 | %3 | %4 | %5"_s.arg( candidateChar, searchChar, QString::number(score), QString::number(isCandidateCharWordEnd), QString::number(isSearchCharWordEnd) ) + QStringLiteral( __FILE__ ) );

  }


  // QgsLogger::debug( u"RES: %1 | %2"_s.arg( QString::number(maxScore),  QString::number(score) ) + QStringLiteral( __FILE__ ) );

  // we didn't loop through all the search chars, it means, that they are not present in the current candidate

  if ( searchIdx < searchLength )

    score = 0;


  return static_cast<float>( std::max( score, 0 ) ) / std::max( maxScore, 1 );

}


QString QgsStringUtils::insertLinks( const QString &string, bool *foundLinks )

{

  QString converted = string;


  // http://alanstorm.com/url_regex_explained

  // note - there's more robust implementations available

  const thread_local QRegularExpression urlRegEx(

    u"((?:(?:http|https|ftp|file)://[^\\s]+[^\\s,.]+)|(?:\\b(([\\w-]+://?|www[.])[^\\s()<>]+(?:\\([\\w\\d]+\\)|([^!\"#$%&'()*+,\\-./:;<=>?@[\\\\\\]^_`{|}~\\s]|/)))))"_s

  );

  const thread_local QRegularExpression protoRegEx( u"^(?:f|ht)tps?://|file://"_s );

  const thread_local QRegularExpression emailRegEx( u"([\\w._%+-]+@[\\w.-]+\\.[A-Za-z]+)"_s );


  int offset = 0;

  bool found = false;

  QRegularExpressionMatch match = urlRegEx.match( converted );

  while ( match.hasMatch() )

  {

    found = true;

    QString url = match.captured( 1 );

    QString protoUrl = url;

    if ( !protoRegEx.match( protoUrl ).hasMatch() )

    {

      protoUrl.prepend( "http://" );

    }

    QString anchor = u"<a href=\"%1\">%2</a>"_s.arg( protoUrl.toHtmlEscaped(), url.toHtmlEscaped() );

    converted.replace( match.capturedStart( 1 ), url.length(), anchor );

    offset = match.capturedStart( 1 ) + anchor.length();

    match = urlRegEx.match( converted, offset );

  }


  offset = 0;

  match = emailRegEx.match( converted );

  while ( match.hasMatch() )

  {

    found = true;

    QString email = match.captured( 1 );

    QString anchor = u"<a href=\"mailto:%1\">%1</a>"_s.arg( email.toHtmlEscaped() );

    converted.replace( match.capturedStart( 1 ), email.length(), anchor );

    offset = match.capturedStart( 1 ) + anchor.length();

    match = emailRegEx.match( converted, offset );

  }


  if ( foundLinks )

    *foundLinks = found;


  return converted;

}


bool QgsStringUtils::isUrl( const QString &string )

{

  const thread_local QRegularExpression rxUrl( u"^(http|https|ftp|file)://\\S+$"_s );

  return rxUrl.match( string ).hasMatch();

}


QString QgsStringUtils::htmlToMarkdown( const QString &html )

{

  // Any changes in this function must be copied to qgscrashreport.cpp too

  QString converted = html;

  converted.replace( "<br>"_L1, "\n"_L1 );

  converted.replace( "<b>"_L1, "**"_L1 );

  converted.replace( "</b>"_L1, "**"_L1 );

  converted.replace( "<pre>"_L1, "\n```\n"_L1 );

  converted.replace( "</pre>"_L1, "```\n"_L1 );


  const thread_local QRegularExpression hrefRegEx( u"<a\\s+href\\s*=\\s*([^<>]*)\\s*>([^<>]*)</a>"_s );


  int offset = 0;

  QRegularExpressionMatch match = hrefRegEx.match( converted );

  while ( match.hasMatch() )

  {

    QString url = match.captured( 1 ).replace( "\""_L1, QString() );

    url.replace( '\'', QString() );

    QString name = match.captured( 2 );

    QString anchor = u"[%1](%2)"_s.arg( name, url );

    converted.replace( match.capturedStart(), match.capturedLength(), anchor );

    offset = match.capturedStart() + anchor.length();

    match = hrefRegEx.match( converted, offset );

  }


  return converted;

}


QString QgsStringUtils::wordWrap( const QString &string, const int length, const bool useMaxLineLength, const QString &customDelimiter )

{

  if ( string.isEmpty() || length == 0 )

    return string;


  QString newstr;

  QRegularExpression rx;

  int delimiterLength = 0;


  if ( !customDelimiter.isEmpty() )

  {

    rx.setPattern( QRegularExpression::escape( customDelimiter ) );

    delimiterLength = customDelimiter.length();

  }

  else

  {

    // \x{200B} is a ZERO-WIDTH SPACE, needed for worwrap to support a number of complex scripts (Indic, Arabic, etc.)

    rx.setPattern( u"[\\x{200B}\\s]"_s );

    delimiterLength = 1;

  }


  const QStringList lines = string.split( '\n' );

  int strLength, strCurrent, strHit, lastHit;


  for ( int i = 0; i < lines.size(); i++ )

  {

    const QString line = lines.at( i );

    strLength = line.length();

    if ( strLength <= length )

    {

      // shortcut, no wrapping required

      newstr.append( line );

      if ( i < lines.size() - 1 )

        newstr.append( '\n' );

      continue;

    }

    strCurrent = 0;

    strHit = 0;

    lastHit = 0;


    while ( strCurrent < strLength )

    {

      // positive wrap value = desired maximum line width to wrap

      // negative wrap value = desired minimum line width before wrap

      if ( useMaxLineLength )

      {

        //first try to locate delimiter backwards

        strHit = ( strCurrent + length >= strLength ) ? -1 : line.lastIndexOf( rx, strCurrent + length );

        if ( strHit == lastHit || strHit == -1 )

        {

          //if no new backward delimiter found, try to locate forward

          strHit = ( strCurrent + std::abs( length ) >= strLength ) ? -1 : line.indexOf( rx, strCurrent + std::abs( length ) );

        }

        lastHit = strHit;

      }

      else

      {

        strHit = ( strCurrent + std::abs( length ) >= strLength ) ? -1 : line.indexOf( rx, strCurrent + std::abs( length ) );

      }

      if ( strHit > -1 )

      {

        newstr.append( QStringView { line }.mid( strCurrent, strHit - strCurrent ) );

        newstr.append( '\n' );

        strCurrent = strHit + delimiterLength;

      }

      else

      {

        newstr.append( QStringView { line }.mid( strCurrent ) );

        strCurrent = strLength;

      }

    }

    if ( i < lines.size() - 1 )

      newstr.append( '\n' );

  }


  return newstr;

}


QString QgsStringUtils::substituteVerticalCharacters( QString string )

{

  string = string.replace( ',', QChar( 65040 ) ).replace( QChar( 8229 ), QChar( 65072 ) );             // comma & two-dot leader

  string = string.replace( QChar( 12289 ), QChar( 65041 ) ).replace( QChar( 12290 ), QChar( 65042 ) ); // ideographic comma & full stop

  string = string.replace( ':', QChar( 65043 ) ).replace( ';', QChar( 65044 ) );

  string = string.replace( '!', QChar( 65045 ) ).replace( '?', QChar( 65046 ) );

  string = string.replace( QChar( 12310 ), QChar( 65047 ) ).replace( QChar( 12311 ), QChar( 65048 ) ); // white lenticular brackets

  string = string.replace( QChar( 8230 ), QChar( 65049 ) );                                            // three-dot ellipse

  string = string.replace( QChar( 8212 ), QChar( 65073 ) ).replace( QChar( 8211 ), QChar( 65074 ) );   // em & en dash

  string = string.replace( '_', QChar( 65075 ) ).replace( QChar( 65103 ), QChar( 65076 ) );            // low line & wavy low line

  string = string.replace( '(', QChar( 65077 ) ).replace( ')', QChar( 65078 ) );

  string = string.replace( '{', QChar( 65079 ) ).replace( '}', QChar( 65080 ) );

  string = string.replace( '<', QChar( 65087 ) ).replace( '>', QChar( 65088 ) );

  string = string.replace( '[', QChar( 65095 ) ).replace( ']', QChar( 65096 ) );

  string = string.replace( QChar( 12308 ), QChar( 65081 ) ).replace( QChar( 12309 ), QChar( 65082 ) ); // tortoise shell brackets

  string = string.replace( QChar( 12304 ), QChar( 65083 ) ).replace( QChar( 12305 ), QChar( 65084 ) ); // black lenticular brackets

  string = string.replace( QChar( 12298 ), QChar( 65085 ) ).replace( QChar( 12299 ), QChar( 65086 ) ); // double angle brackets

  string = string.replace( QChar( 12300 ), QChar( 65089 ) ).replace( QChar( 12301 ), QChar( 65090 ) ); // corner brackets

  string = string.replace( QChar( 12302 ), QChar( 65091 ) ).replace( QChar( 12303 ), QChar( 65092 ) ); // white corner brackets

  return string;

}


QString QgsStringUtils::qRegExpEscape( const QString &string )

{

  // code and logic taken from the Qt source code

  const QLatin1Char backslash( '\\' );

  const int count = string.count();


  QString escaped;

  escaped.reserve( count * 2 );

  for ( int i = 0; i < count; i++ )

  {

    switch ( string.at( i ).toLatin1() )

    {

      case '$':

      case '(':

      case ')':

      case '*':

      case '+':

      case '.':

      case '?':

      case '[':

      case '\\':

      case ']':

      case '^':

      case '{':

      case '|':

      case '}':

        escaped.append( backslash );

    }

    escaped.append( string.at( i ) );

  }

  return escaped;

}


QString QgsStringUtils::truncateMiddleOfString( const QString &string, int maxLength )

{

  const int charactersToTruncate = string.length() - maxLength;

  if ( charactersToTruncate <= 0 )

    return string;


  // note we actually truncate an extra character, as we'll be replacing it with the ... character

  const int truncateFrom = string.length() / 2 - ( charactersToTruncate + 1 ) / 2;

  if ( truncateFrom <= 0 )

    return QChar( 0x2026 );


  return QStringView( string ).first( truncateFrom ) + QString( QChar( 0x2026 ) ) + QStringView( string ).sliced( truncateFrom + charactersToTruncate + 1 );

}


bool QgsStringUtils::containsByWord( const QString &candidate, const QString &words, Qt::CaseSensitivity sensitivity )

{

  if ( candidate.trimmed().isEmpty() )

    return false;


  const thread_local QRegularExpression rxWhitespace( u"\\s+"_s );

  const QStringList parts = words.split( rxWhitespace, Qt::SkipEmptyParts );

  if ( parts.empty() )

    return false;

  for ( const QString &word : parts )

  {

    if ( !candidate.contains( word, sensitivity ) )

      return false;

  }

  return true;

}


QgsStringReplacement::QgsStringReplacement( const QString &match, const QString &replacement, bool caseSensitive, bool wholeWordOnly )

  : mMatch( match )

  , mReplacement( replacement )

  , mCaseSensitive( caseSensitive )

  , mWholeWordOnly( wholeWordOnly )

{

  if ( mWholeWordOnly )

  {

    mRx.setPattern( u"\\b%1\\b"_s.arg( mMatch ) );

    mRx.setPatternOptions( mCaseSensitive ? QRegularExpression::NoPatternOption : QRegularExpression::CaseInsensitiveOption );

  }

}


QString QgsStringReplacement::process( const QString &input ) const

{

  QString result = input;

  if ( !mWholeWordOnly )

  {

    return result.replace( mMatch, mReplacement, mCaseSensitive ? Qt::CaseSensitive : Qt::CaseInsensitive );

  }

  else

  {

    return result.replace( mRx, mReplacement );

  }

}


QgsStringMap QgsStringReplacement::properties() const

{

  QgsStringMap map;

  map.insert( u"match"_s, mMatch );

  map.insert( u"replace"_s, mReplacement );

  map.insert( u"caseSensitive"_s, mCaseSensitive ? u"1"_s : u"0"_s );

  map.insert( u"wholeWord"_s, mWholeWordOnly ? u"1"_s : u"0"_s );

  return map;

}


QgsStringReplacement QgsStringReplacement::fromProperties( const QgsStringMap &properties )

{

  return QgsStringReplacement( properties.value( u"match"_s ), properties.value( u"replace"_s ), properties.value( u"caseSensitive"_s, u"0"_s ) == "1"_L1, properties.value( u"wholeWord"_s, u"0"_s ) == "1"_L1 );

}


QString QgsStringReplacementCollection::process( const QString &input ) const

{

  QString result = input;

  for ( const QgsStringReplacement &r : mReplacements )

  {

    result = r.process( result );

  }

  return result;

}


void QgsStringReplacementCollection::writeXml( QDomElement &elem, QDomDocument &doc ) const

{

  for ( const QgsStringReplacement &r : mReplacements )

  {

    QgsStringMap props = r.properties();

    QDomElement propEl = doc.createElement( u"replacement"_s );

    QgsStringMap::const_iterator it = props.constBegin();

    for ( ; it != props.constEnd(); ++it )

    {

      propEl.setAttribute( it.key(), it.value() );

    }

    elem.appendChild( propEl );

  }

}


void QgsStringReplacementCollection::readXml( const QDomElement &elem )

{

  mReplacements.clear();

  QDomNodeList nodelist = elem.elementsByTagName( u"replacement"_s );

  for ( int i = 0; i < nodelist.count(); i++ )

  {

    QDomElement replacementElem = nodelist.at( i ).toElement();

    QDomNamedNodeMap nodeMap = replacementElem.attributes();


    QgsStringMap props;

    for ( int j = 0; j < nodeMap.count(); ++j )

    {

      props.insert( nodeMap.item( j ).nodeName(), nodeMap.item( j ).nodeValue() );

    }

    mReplacements << QgsStringReplacement::fromProperties( props );

  }

}


Qgis::Capitalization
Capitalization
String capitalization options.
Definition qgis.h:3503

Qgis::Capitalization::AllSmallCaps
@ AllSmallCaps
Force all characters to small caps.
Definition qgis.h:3511

Qgis::Capitalization::MixedCase
@ MixedCase
Mixed case, ie no change.
Definition qgis.h:3504

Qgis::Capitalization::UpperCamelCase
@ UpperCamelCase
Convert the string to upper camel case. Note that this method does not unaccent characters.
Definition qgis.h:3510

Qgis::Capitalization::AllLowercase
@ AllLowercase
Convert all characters to lowercase.
Definition qgis.h:3506

Qgis::Capitalization::TitleCase
@ TitleCase
Simple title case conversion - does not fully grammatically parse the text and uses simple rules only...
Definition qgis.h:3509

Qgis::Capitalization::SmallCaps
@ SmallCaps
Mixed case small caps.
Definition qgis.h:3508

Qgis::Capitalization::ForceFirstLetterToCapital
@ ForceFirstLetterToCapital
Convert just the first letter of each word to uppercase, leave the rest untouched.
Definition qgis.h:3507

Qgis::Capitalization::AllUppercase
@ AllUppercase
Convert all characters to uppercase.
Definition qgis.h:3505

QgsStringReplacementCollection::readXml
void readXml(const QDomElement &elem)
Reads the collection state from an XML element.
Definition qgsstringutils.cpp:874

QgsStringReplacementCollection::process
QString process(const QString &input) const
Processes a given input string, applying any valid replacements which should be made using QgsStringR...
Definition qgsstringutils.cpp:849

QgsStringReplacementCollection::writeXml
void writeXml(QDomElement &elem, QDomDocument &doc) const
Writes the collection state to an XML element.
Definition qgsstringutils.cpp:859

QgsStringReplacement
A representation of a single string replacement.
Definition qgsstringutils.h:39

QgsStringReplacement::fromProperties
static QgsStringReplacement fromProperties(const QgsStringMap &properties)
Creates a new QgsStringReplacement from an encoded properties map.
Definition qgsstringutils.cpp:844

QgsStringReplacement::process
QString process(const QString &input) const
Processes a given input string, applying any valid replacements which should be made.
Definition qgsstringutils.cpp:821

QgsStringReplacement::wholeWordOnly
bool wholeWordOnly() const
Returns true if match only applies to whole words, or false if partial word matches are permitted.
Definition qgsstringutils.h:60

QgsStringReplacement::replacement
QString replacement() const
Returns the string to replace matches with.
Definition qgsstringutils.h:54

QgsStringReplacement::caseSensitive
bool caseSensitive() const
Returns true if match is case sensitive.
Definition qgsstringutils.h:57

QgsStringReplacement::QgsStringReplacement
QgsStringReplacement(const QString &match, const QString &replacement, bool caseSensitive=false, bool wholeWordOnly=false)
Constructor for QgsStringReplacement.
Definition qgsstringutils.cpp:808

QgsStringReplacement::match
QString match() const
Returns the string matched by this object.
Definition qgsstringutils.h:51

QgsStringReplacement::properties
QgsStringMap properties() const
Returns a map of the replacement properties.
Definition qgsstringutils.cpp:834

QgsStringUtils::hammingDistance
static int hammingDistance(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the Hamming distance between two strings.
Definition qgsstringutils.cpp:325

QgsStringUtils::soundex
static QString soundex(const QString &string)
Returns the Soundex representation of a string.
Definition qgsstringutils.cpp:364

QgsStringUtils::UNACCENT_MAP
static QHash< QString, QString > UNACCENT_MAP
Lookup table used by unaccent().
Definition qgsstringutils.h:342

QgsStringUtils::levenshteinDistance
static int levenshteinDistance(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the Levenshtein edit distance between two strings.
Definition qgsstringutils.cpp:181

QgsStringUtils::htmlToMarkdown
static QString htmlToMarkdown(const QString &html)
Convert simple HTML to markdown.
Definition qgsstringutils.cpp:616

QgsStringUtils::longestCommonSubstring
static QString longestCommonSubstring(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the longest common substring between two strings.
Definition qgsstringutils.cpp:262

QgsStringUtils::capitalize
static QString capitalize(const QString &string, Qgis::Capitalization capitalization)
Converts a string by applying capitalization rules to the string.
Definition qgsstringutils.cpp:72

QgsStringUtils::substituteVerticalCharacters
static QString substituteVerticalCharacters(QString string)
Returns a string with characters having vertical representation form substituted.
Definition qgsstringutils.cpp:722

QgsStringUtils::unaccent
static QString unaccent(const QString &input)
Removes accents and other diacritical marks from a string, replacing accented characters with their u...
Definition qgsstringutils.cpp:32

QgsStringUtils::containsByWord
static bool containsByWord(const QString &candidate, const QString &words, Qt::CaseSensitivity sensitivity=Qt::CaseInsensitive)
Given a candidate string, returns true if the candidate contains all the individual words from anothe...
Definition qgsstringutils.cpp:791

QgsStringUtils::insertLinks
static QString insertLinks(const QString &string, bool *foundLinks=nullptr)
Returns a string with any URL (e.g., http(s)/ftp) and mailto: text converted to valid HTML <a ....
Definition qgsstringutils.cpp:562

QgsStringUtils::fuzzyScore
static double fuzzyScore(const QString &candidate, const QString &search)
Tests a candidate string to see how likely it is a match for a specified search string.
Definition qgsstringutils.cpp:460

QgsStringUtils::qRegExpEscape
static QString qRegExpEscape(const QString &string)
Returns an escaped string matching the behavior of QRegExp::escape.
Definition qgsstringutils.cpp:744

QgsStringUtils::ampersandEncode
static QString ampersandEncode(const QString &string)
Makes a raw string safe for inclusion as a HTML/XML string literal.
Definition qgsstringutils.cpp:161

QgsStringUtils::wordWrap
static QString wordWrap(const QString &string, int length, bool useMaxLineLength=true, const QString &customDelimiter=QString())
Automatically wraps a string by inserting new line characters at appropriate locations in the string.
Definition qgsstringutils.cpp:644

QgsStringUtils::isUrl
static bool isUrl(const QString &string)
Returns whether the string is a URL (http,https,ftp,file).
Definition qgsstringutils.cpp:610

QgsStringUtils::truncateMiddleOfString
static QString truncateMiddleOfString(const QString &string, int maxLength)
Truncates a string to the specified maximum character length.
Definition qgsstringutils.cpp:777

QgsStringUtils::createUnaccentMap
static QHash< QString, QString > createUnaccentMap()
Generates the unaccent mapping table (auto-generated by script at build time).
Definition qgsunaccentrules.cpp:37

c
As part of the API refactoring and improvements which landed in the Processing API was substantially reworked from the x version This was done in order to allow much of the underlying Processing framework to be ported into c
Definition porting_processing.dox:1

QgsStringMap
QMap< QString, QString > QgsStringMap
Definition qgis.h:7475

qgslogger.h

qgsstringutils.h

FUZZY_SCORE_CONSECUTIVE_MATCH
#define FUZZY_SCORE_CONSECUTIVE_MATCH
Definition qgsstringutils.h:29

FUZZY_SCORE_WORD_MATCH
#define FUZZY_SCORE_WORD_MATCH
Definition qgsstringutils.h:27

FUZZY_SCORE_NEW_MATCH
#define FUZZY_SCORE_NEW_MATCH
Definition qgsstringutils.h:28