File indexing completed on 2024-05-12 04:02:20

0001 /*
0002     SPDX-FileCopyrightText: 2016 Volker Krause <vkrause@kde.org>
0003     SPDX-FileCopyrightText: 2018 Christoph Cullmann <cullmann@kde.org>
0004     SPDX-FileCopyrightText: 2020 Jonathan Poelen <jonathan.poelen+kde@gmail.com>
0005 
0006     SPDX-License-Identifier: MIT
0007 */
0008 
0009 #include "context_p.h"
0010 #include "definition_p.h"
0011 #include "dynamicregexpcache_p.h"
0012 #include "ksyntaxhighlighting_logging.h"
0013 #include "rule_p.h"
0014 #include "worddelimiters_p.h"
0015 #include "xml_p.h"
0016 
0017 using namespace KSyntaxHighlighting;
0018 
0019 // QChar::isDigit() match any digit in unicode (romain numeral, etc)
0020 static bool isDigit(QChar c)
0021 {
0022     return (c <= QLatin1Char('9') && QLatin1Char('0') <= c);
0023 }
0024 
0025 static bool isOctalChar(QChar c)
0026 {
0027     return (c <= QLatin1Char('7') && QLatin1Char('0') <= c);
0028 }
0029 
0030 static bool isHexChar(QChar c)
0031 {
0032     return isDigit(c) || (c <= QLatin1Char('f') && QLatin1Char('a') <= c) || (c <= QLatin1Char('F') && QLatin1Char('A') <= c);
0033 }
0034 
0035 static int matchEscapedChar(QStringView text, int offset)
0036 {
0037     if (text.at(offset) != QLatin1Char('\\') || text.size() < offset + 2) {
0038         return offset;
0039     }
0040 
0041     const auto c = text.at(offset + 1);
0042     switch (c.unicode()) {
0043     // control chars
0044     case 'a':
0045     case 'b':
0046     case 'e':
0047     case 'f':
0048     case 'n':
0049     case 'r':
0050     case 't':
0051     case 'v':
0052     case '"':
0053     case '\'':
0054     case '?':
0055     case '\\':
0056         return offset + 2;
0057 
0058     // hex encoded character
0059     case 'x':
0060         if (offset + 2 < text.size() && isHexChar(text.at(offset + 2))) {
0061             if (offset + 3 < text.size() && isHexChar(text.at(offset + 3))) {
0062                 return offset + 4;
0063             }
0064             return offset + 3;
0065         }
0066         return offset;
0067 
0068     // octal encoding, simple \0 is OK, too, unlike simple \x above
0069     case '0':
0070     case '1':
0071     case '2':
0072     case '3':
0073     case '4':
0074     case '5':
0075     case '6':
0076     case '7':
0077         if (offset + 2 < text.size() && isOctalChar(text.at(offset + 2))) {
0078             if (offset + 3 < text.size() && isOctalChar(text.at(offset + 3))) {
0079                 return offset + 4;
0080             }
0081             return offset + 3;
0082         }
0083         return offset + 2;
0084     }
0085 
0086     return offset;
0087 }
0088 
0089 static QString replaceCaptures(const QString &pattern, const QStringList &captures, bool quote)
0090 {
0091     auto result = pattern;
0092     for (int i = captures.size(); i >= 1; --i) {
0093         result.replace(QLatin1Char('%') + QString::number(i), quote ? QRegularExpression::escape(captures.at(i - 1)) : captures.at(i - 1));
0094     }
0095     return result;
0096 }
0097 
0098 static MatchResult matchString(QStringView pattern, QStringView text, int offset, Qt::CaseSensitivity caseSensitivity)
0099 {
0100     if (offset + pattern.size() <= text.size() && text.mid(offset, pattern.size()).compare(pattern, caseSensitivity) == 0) {
0101         return offset + pattern.size();
0102     }
0103     return offset;
0104 }
0105 
0106 static void resolveAdditionalWordDelimiters(WordDelimiters &wordDelimiters, const HighlightingContextData::Rule::WordDelimiters &delimiters)
0107 {
0108     // cache for DefinitionData::wordDelimiters, is accessed VERY often
0109     if (!delimiters.additionalDeliminator.isEmpty() || !delimiters.weakDeliminator.isEmpty()) {
0110         wordDelimiters.append(QStringView(delimiters.additionalDeliminator));
0111         wordDelimiters.remove(QStringView(delimiters.weakDeliminator));
0112     }
0113 }
0114 
0115 Rule::~Rule() = default;
0116 
0117 const IncludeRules *Rule::castToIncludeRules() const
0118 {
0119     if (m_type != Type::IncludeRules) {
0120         return nullptr;
0121     }
0122     return static_cast<const IncludeRules *>(this);
0123 }
0124 
0125 bool Rule::resolveCommon(DefinitionData &def, const HighlightingContextData::Rule &ruleData, QStringView lookupContextName)
0126 {
0127     switch (ruleData.type) {
0128     // IncludeRules uses this with a different semantic
0129     case HighlightingContextData::Rule::Type::IncludeRules:
0130         m_type = Type::IncludeRules;
0131         return true;
0132     case HighlightingContextData::Rule::Type::LineContinue:
0133         m_type = Type::LineContinue;
0134         break;
0135     default:
0136         m_type = Type::OtherRule;
0137         break;
0138     }
0139 
0140     /**
0141      * try to get our format from the definition we stem from
0142      */
0143     if (!ruleData.common.attributeName.isEmpty()) {
0144         m_attributeFormat = def.formatByName(ruleData.common.attributeName);
0145         if (!m_attributeFormat.isValid()) {
0146             qCWarning(Log) << "Rule: Unknown format" << ruleData.common.attributeName << "in context" << lookupContextName << "of definition" << def.name;
0147         }
0148     }
0149 
0150     m_firstNonSpace = ruleData.common.firstNonSpace;
0151     m_lookAhead = ruleData.common.lookAhead;
0152     m_column = ruleData.common.column;
0153 
0154     if (!ruleData.common.beginRegionName.isEmpty()) {
0155         m_beginRegion = FoldingRegion(FoldingRegion::Begin, def.foldingRegionId(ruleData.common.beginRegionName));
0156     }
0157     if (!ruleData.common.endRegionName.isEmpty()) {
0158         m_endRegion = FoldingRegion(FoldingRegion::End, def.foldingRegionId(ruleData.common.endRegionName));
0159     }
0160 
0161     m_context.resolve(def, ruleData.common.contextName);
0162 
0163     return !(m_lookAhead && m_context.isStay());
0164 }
0165 
0166 static Rule::Ptr createRule(DefinitionData &def, const HighlightingContextData::Rule &ruleData, QStringView lookupContextName)
0167 {
0168     using Type = HighlightingContextData::Rule::Type;
0169 
0170     switch (ruleData.type) {
0171     case Type::AnyChar:
0172         return std::make_shared<AnyChar>(ruleData.data.anyChar);
0173     case Type::DetectChar:
0174         return std::make_shared<DetectChar>(ruleData.data.detectChar);
0175     case Type::Detect2Chars:
0176         return std::make_shared<Detect2Chars>(ruleData.data.detect2Chars);
0177     case Type::IncludeRules:
0178         return std::make_shared<IncludeRules>(ruleData.data.includeRules);
0179     case Type::Int:
0180         return std::make_shared<Int>(def, ruleData.data.detectInt);
0181     case Type::Keyword:
0182         return KeywordListRule::create(def, ruleData.data.keyword, lookupContextName);
0183     case Type::LineContinue:
0184         return std::make_shared<LineContinue>(ruleData.data.lineContinue);
0185     case Type::RangeDetect:
0186         return std::make_shared<RangeDetect>(ruleData.data.rangeDetect);
0187     case Type::RegExpr:
0188         if (!ruleData.data.regExpr.dynamic) {
0189             return std::make_shared<RegExpr>(ruleData.data.regExpr);
0190         } else {
0191             return std::make_shared<DynamicRegExpr>(ruleData.data.regExpr);
0192         }
0193     case Type::StringDetect:
0194         if (ruleData.data.stringDetect.dynamic) {
0195             return std::make_shared<DynamicStringDetect>(ruleData.data.stringDetect);
0196         }
0197         return std::make_shared<StringDetect>(ruleData.data.stringDetect);
0198     case Type::WordDetect:
0199         return std::make_shared<WordDetect>(def, ruleData.data.wordDetect);
0200     case Type::Float:
0201         return std::make_shared<Float>(def, ruleData.data.detectFloat);
0202     case Type::HlCOct:
0203         return std::make_shared<HlCOct>(def, ruleData.data.hlCOct);
0204     case Type::HlCStringChar:
0205         return std::make_shared<HlCStringChar>();
0206     case Type::DetectIdentifier:
0207         return std::make_shared<DetectIdentifier>();
0208     case Type::DetectSpaces:
0209         return std::make_shared<DetectSpaces>();
0210     case Type::HlCChar:
0211         return std::make_shared<HlCChar>();
0212     case Type::HlCHex:
0213         return std::make_shared<HlCHex>(def, ruleData.data.hlCHex);
0214 
0215     case Type::Unknown:;
0216     }
0217 
0218     return Rule::Ptr(nullptr);
0219 }
0220 
0221 Rule::Ptr Rule::create(DefinitionData &def, const HighlightingContextData::Rule &ruleData, QStringView lookupContextName)
0222 {
0223     auto rule = createRule(def, ruleData, lookupContextName);
0224     if (rule && !rule->resolveCommon(def, ruleData, lookupContextName)) {
0225         rule.reset();
0226     }
0227     return rule;
0228 }
0229 
0230 AnyChar::AnyChar(const HighlightingContextData::Rule::AnyChar &data)
0231     : m_chars(data.chars)
0232 {
0233 }
0234 
0235 MatchResult AnyChar::doMatch(QStringView text, int offset, const QStringList &, DynamicRegexpCache &) const
0236 {
0237     if (m_chars.contains(text.at(offset))) {
0238         return offset + 1;
0239     }
0240     return offset;
0241 }
0242 
0243 DetectChar::DetectChar(const HighlightingContextData::Rule::DetectChar &data)
0244     : m_char(data.char1)
0245     , m_captureIndex((data.dynamic ? data.char1.digitValue() : 0) - 1)
0246 {
0247     m_dynamic = data.dynamic;
0248 }
0249 
0250 MatchResult DetectChar::doMatch(QStringView text, int offset, const QStringList &captures, DynamicRegexpCache &) const
0251 {
0252     if (m_dynamic) {
0253         if (m_captureIndex == -1 || captures.size() <= m_captureIndex || captures.at(m_captureIndex).isEmpty()) {
0254             return offset;
0255         }
0256         if (text.at(offset) == captures.at(m_captureIndex).at(0)) {
0257             return offset + 1;
0258         }
0259         return offset;
0260     }
0261 
0262     if (text.at(offset) == m_char) {
0263         return offset + 1;
0264     }
0265     return offset;
0266 }
0267 
0268 Detect2Chars::Detect2Chars(const HighlightingContextData::Rule::Detect2Chars &data)
0269     : m_char1(data.char1)
0270     , m_char2(data.char2)
0271 {
0272 }
0273 
0274 MatchResult Detect2Chars::doMatch(QStringView text, int offset, const QStringList &, DynamicRegexpCache &) const
0275 {
0276     if (text.size() - offset < 2) {
0277         return offset;
0278     }
0279     if (text.at(offset) == m_char1 && text.at(offset + 1) == m_char2) {
0280         return offset + 2;
0281     }
0282     return offset;
0283 }
0284 
0285 MatchResult DetectIdentifier::doMatch(QStringView text, int offset, const QStringList &, DynamicRegexpCache &) const
0286 {
0287     if (!text.at(offset).isLetter() && text.at(offset) != QLatin1Char('_')) {
0288         return offset;
0289     }
0290 
0291     for (int i = offset + 1; i < text.size(); ++i) {
0292         const auto c = text.at(i);
0293         if (!c.isLetterOrNumber() && c != QLatin1Char('_')) {
0294             return i;
0295         }
0296     }
0297 
0298     return text.size();
0299 }
0300 
0301 MatchResult DetectSpaces::doMatch(QStringView text, int offset, const QStringList &, DynamicRegexpCache &) const
0302 {
0303     while (offset < text.size() && text.at(offset).isSpace()) {
0304         ++offset;
0305     }
0306     return offset;
0307 }
0308 
0309 Float::Float(DefinitionData &def, const HighlightingContextData::Rule::Float &data)
0310     : m_wordDelimiters(def.wordDelimiters)
0311 {
0312     resolveAdditionalWordDelimiters(m_wordDelimiters, data.wordDelimiters);
0313 }
0314 
0315 MatchResult Float::doMatch(QStringView text, int offset, const QStringList &, DynamicRegexpCache &) const
0316 {
0317     if (offset > 0 && !m_wordDelimiters.contains(text.at(offset - 1))) {
0318         return offset;
0319     }
0320 
0321     auto newOffset = offset;
0322     while (newOffset < text.size() && isDigit(text.at(newOffset))) {
0323         ++newOffset;
0324     }
0325 
0326     if (newOffset >= text.size() || text.at(newOffset) != QLatin1Char('.')) {
0327         return offset;
0328     }
0329     ++newOffset;
0330 
0331     while (newOffset < text.size() && isDigit(text.at(newOffset))) {
0332         ++newOffset;
0333     }
0334 
0335     if (newOffset == offset + 1) { // we only found a decimal point
0336         return offset;
0337     }
0338 
0339     auto expOffset = newOffset;
0340     if (expOffset >= text.size() || (text.at(expOffset) != QLatin1Char('e') && text.at(expOffset) != QLatin1Char('E'))) {
0341         return newOffset;
0342     }
0343     ++expOffset;
0344 
0345     if (expOffset < text.size() && (text.at(expOffset) == QLatin1Char('+') || text.at(expOffset) == QLatin1Char('-'))) {
0346         ++expOffset;
0347     }
0348     bool foundExpDigit = false;
0349     while (expOffset < text.size() && isDigit(text.at(expOffset))) {
0350         ++expOffset;
0351         foundExpDigit = true;
0352     }
0353 
0354     if (!foundExpDigit) {
0355         return newOffset;
0356     }
0357     return expOffset;
0358 }
0359 
0360 MatchResult HlCChar::doMatch(QStringView text, int offset, const QStringList &, DynamicRegexpCache &) const
0361 {
0362     if (text.size() < offset + 3) {
0363         return offset;
0364     }
0365 
0366     if (text.at(offset) != QLatin1Char('\'') || text.at(offset + 1) == QLatin1Char('\'')) {
0367         return offset;
0368     }
0369 
0370     auto newOffset = matchEscapedChar(text, offset + 1);
0371     if (newOffset == offset + 1) {
0372         if (text.at(newOffset) == QLatin1Char('\\')) {
0373             return offset;
0374         } else {
0375             ++newOffset;
0376         }
0377     }
0378     if (newOffset >= text.size()) {
0379         return offset;
0380     }
0381 
0382     if (text.at(newOffset) == QLatin1Char('\'')) {
0383         return newOffset + 1;
0384     }
0385 
0386     return offset;
0387 }
0388 
0389 HlCHex::HlCHex(DefinitionData &def, const HighlightingContextData::Rule::HlCHex &data)
0390     : m_wordDelimiters(def.wordDelimiters)
0391 {
0392     resolveAdditionalWordDelimiters(m_wordDelimiters, data.wordDelimiters);
0393 }
0394 
0395 MatchResult HlCHex::doMatch(QStringView text, int offset, const QStringList &, DynamicRegexpCache &) const
0396 {
0397     if (offset > 0 && !m_wordDelimiters.contains(text.at(offset - 1))) {
0398         return offset;
0399     }
0400 
0401     if (text.size() < offset + 3) {
0402         return offset;
0403     }
0404 
0405     if (text.at(offset) != QLatin1Char('0') || (text.at(offset + 1) != QLatin1Char('x') && text.at(offset + 1) != QLatin1Char('X'))) {
0406         return offset;
0407     }
0408 
0409     if (!isHexChar(text.at(offset + 2))) {
0410         return offset;
0411     }
0412 
0413     offset += 3;
0414     while (offset < text.size() && isHexChar(text.at(offset))) {
0415         ++offset;
0416     }
0417 
0418     // TODO Kate matches U/L suffix, QtC does not?
0419 
0420     return offset;
0421 }
0422 
0423 HlCOct::HlCOct(DefinitionData &def, const HighlightingContextData::Rule::HlCOct &data)
0424     : m_wordDelimiters(def.wordDelimiters)
0425 {
0426     resolveAdditionalWordDelimiters(m_wordDelimiters, data.wordDelimiters);
0427 }
0428 
0429 MatchResult HlCOct::doMatch(QStringView text, int offset, const QStringList &, DynamicRegexpCache &) const
0430 {
0431     if (offset > 0 && !m_wordDelimiters.contains(text.at(offset - 1))) {
0432         return offset;
0433     }
0434 
0435     if (text.size() < offset + 2) {
0436         return offset;
0437     }
0438 
0439     if (text.at(offset) != QLatin1Char('0')) {
0440         return offset;
0441     }
0442 
0443     if (!isOctalChar(text.at(offset + 1))) {
0444         return offset;
0445     }
0446 
0447     offset += 2;
0448     while (offset < text.size() && isOctalChar(text.at(offset))) {
0449         ++offset;
0450     }
0451 
0452     return offset;
0453 }
0454 
0455 MatchResult HlCStringChar::doMatch(QStringView text, int offset, const QStringList &, DynamicRegexpCache &) const
0456 {
0457     return matchEscapedChar(text, offset);
0458 }
0459 
0460 IncludeRules::IncludeRules(const HighlightingContextData::Rule::IncludeRules &data)
0461     : m_contextName(data.contextName)
0462     , m_includeAttribute(data.includeAttribute)
0463 {
0464 }
0465 
0466 MatchResult IncludeRules::doMatch(QStringView text, int offset, const QStringList &, DynamicRegexpCache &) const
0467 {
0468     Q_UNUSED(text);
0469     qCWarning(Log) << "Unresolved include rule";
0470     return offset;
0471 }
0472 
0473 Int::Int(DefinitionData &def, const HighlightingContextData::Rule::Int &data)
0474     : m_wordDelimiters(def.wordDelimiters)
0475 {
0476     resolveAdditionalWordDelimiters(m_wordDelimiters, data.wordDelimiters);
0477 }
0478 
0479 MatchResult Int::doMatch(QStringView text, int offset, const QStringList &, DynamicRegexpCache &) const
0480 {
0481     if (offset > 0 && !m_wordDelimiters.contains(text.at(offset - 1))) {
0482         return offset;
0483     }
0484 
0485     while (offset < text.size() && isDigit(text.at(offset))) {
0486         ++offset;
0487     }
0488     return offset;
0489 }
0490 
0491 Rule::Ptr KeywordListRule::create(DefinitionData &def, const HighlightingContextData::Rule::Keyword &data, QStringView lookupContextName)
0492 {
0493     /**
0494      * get our keyword list, if not found => bail out
0495      */
0496     auto *keywordList = def.keywordList(data.name);
0497     if (!keywordList) {
0498         qCWarning(Log) << "Rule: Unknown keyword list" << data.name << "in context" << lookupContextName << "of definition" << def.name;
0499         return Rule::Ptr();
0500     }
0501 
0502     if (keywordList->isEmpty()) {
0503         return Rule::Ptr();
0504     }
0505 
0506     /**
0507      * we might overwrite the case sensitivity
0508      * then we need to init the list for lookup of that sensitivity setting
0509      */
0510     if (data.hasCaseSensitivityOverride) {
0511         keywordList->initLookupForCaseSensitivity(data.caseSensitivityOverride);
0512     }
0513 
0514     return std::make_shared<KeywordListRule>(*keywordList, def, data);
0515 }
0516 
0517 KeywordListRule::KeywordListRule(const KeywordList &keywordList, DefinitionData &def, const HighlightingContextData::Rule::Keyword &data)
0518     : m_wordDelimiters(def.wordDelimiters)
0519     , m_keywordList(keywordList)
0520     , m_caseSensitivity(data.hasCaseSensitivityOverride ? data.caseSensitivityOverride : keywordList.caseSensitivity())
0521 {
0522     resolveAdditionalWordDelimiters(m_wordDelimiters, data.wordDelimiters);
0523     m_hasSkipOffset = true;
0524 }
0525 
0526 MatchResult KeywordListRule::doMatch(QStringView text, int offset, const QStringList &, DynamicRegexpCache &) const
0527 {
0528     auto newOffset = offset;
0529     while (text.size() > newOffset && !m_wordDelimiters.contains(text.at(newOffset))) {
0530         ++newOffset;
0531     }
0532     if (newOffset == offset) {
0533         return offset;
0534     }
0535 
0536     if (m_keywordList.contains(text.mid(offset, newOffset - offset), m_caseSensitivity)) {
0537         return newOffset;
0538     }
0539 
0540     // we don't match, but we can skip until newOffset as we can't start a keyword in-between
0541     return MatchResult(offset, newOffset);
0542 }
0543 
0544 LineContinue::LineContinue(const HighlightingContextData::Rule::LineContinue &data)
0545     : m_char(data.char1)
0546 {
0547 }
0548 
0549 MatchResult LineContinue::doMatch(QStringView text, int offset, const QStringList &, DynamicRegexpCache &) const
0550 {
0551     if (offset == text.size() - 1 && text.at(offset) == m_char) {
0552         return offset + 1;
0553     }
0554     return offset;
0555 }
0556 
0557 RangeDetect::RangeDetect(const HighlightingContextData::Rule::RangeDetect &data)
0558     : m_begin(data.begin)
0559     , m_end(data.end)
0560 {
0561 }
0562 
0563 MatchResult RangeDetect::doMatch(QStringView text, int offset, const QStringList &, DynamicRegexpCache &) const
0564 {
0565     if (text.size() - offset < 2) {
0566         return offset;
0567     }
0568     if (text.at(offset) != m_begin) {
0569         return offset;
0570     }
0571 
0572     auto newOffset = offset + 1;
0573     while (newOffset < text.size()) {
0574         if (text.at(newOffset) == m_end) {
0575             return newOffset + 1;
0576         }
0577         ++newOffset;
0578     }
0579     return offset;
0580 }
0581 
0582 static QRegularExpression::PatternOptions makePattenOptions(const HighlightingContextData::Rule::RegExpr &data)
0583 {
0584     return (data.isMinimal ? QRegularExpression::InvertedGreedinessOption : QRegularExpression::NoPatternOption)
0585         | (data.caseSensitivity == Qt::CaseInsensitive ? QRegularExpression::CaseInsensitiveOption : QRegularExpression::NoPatternOption)
0586         // DontCaptureOption is removed by resolve() when necessary
0587         | QRegularExpression::DontCaptureOption
0588         // ensure Unicode support is enabled
0589         | QRegularExpression::UseUnicodePropertiesOption;
0590 }
0591 
0592 static void resolveRegex(QRegularExpression &regexp, Context *context)
0593 {
0594     bool enableCapture = context && context->hasDynamicRule();
0595 
0596     // disable DontCaptureOption when reference a context with dynamic rule or
0597     // with invalid regex because DontCaptureOption with back reference capture is an error
0598     if (enableCapture || !regexp.isValid()) {
0599         regexp.setPatternOptions(regexp.patternOptions() & ~QRegularExpression::DontCaptureOption);
0600     }
0601 
0602     if (!regexp.isValid()) {
0603         qCDebug(Log) << "Invalid regexp:" << regexp.pattern();
0604     }
0605 }
0606 
0607 static MatchResult regexMatch(const QRegularExpression &regexp, QStringView text, int offset)
0608 {
0609     /**
0610      * match the pattern
0611      */
0612     const auto result = regexp.matchView(text, offset, QRegularExpression::NormalMatch, QRegularExpression::DontCheckSubjectStringMatchOption);
0613     if (result.capturedStart() == offset) {
0614         /**
0615          * we only need to compute the captured texts if we have real capture groups
0616          * highlightings should only address %1..%.., see e.g. replaceCaptures
0617          * DetectChar ignores %0, too
0618          */
0619         int lastCapturedIndex = result.lastCapturedIndex();
0620         if (lastCapturedIndex > 0) {
0621             QStringList captures;
0622             captures.reserve(lastCapturedIndex);
0623             // ignore the capturing group number 0
0624             for (int i = 1; i <= lastCapturedIndex; ++i)
0625                 captures.push_back(result.captured(i));
0626             return MatchResult(offset + result.capturedLength(), std::move(captures));
0627         }
0628 
0629         /**
0630          * else: ignore the implicit 0 group we always capture, no need to allocate stuff for that
0631          */
0632         return MatchResult(offset + result.capturedLength());
0633     }
0634 
0635     /**
0636      * no match
0637      * we can always compute the skip offset as the highlighter will invalidate the cache for changed captures for dynamic rules!
0638      */
0639     return MatchResult(offset, result.capturedStart());
0640 }
0641 
0642 RegExpr::RegExpr(const HighlightingContextData::Rule::RegExpr &data)
0643     : m_regexp(data.pattern, makePattenOptions(data))
0644 {
0645     m_hasSkipOffset = true;
0646 }
0647 
0648 void RegExpr::resolve()
0649 {
0650     m_isResolved = true;
0651 
0652     resolveRegex(m_regexp, context().context());
0653 }
0654 
0655 MatchResult RegExpr::doMatch(QStringView text, int offset, const QStringList &, DynamicRegexpCache &) const
0656 {
0657     if (Q_UNLIKELY(!m_isResolved)) {
0658         const_cast<RegExpr *>(this)->resolve();
0659     }
0660 
0661     return regexMatch(m_regexp, text, offset);
0662 }
0663 
0664 DynamicRegExpr::DynamicRegExpr(const HighlightingContextData::Rule::RegExpr &data)
0665     : m_pattern(data.pattern)
0666     , m_patternOptions(makePattenOptions(data))
0667 {
0668     m_dynamic = true;
0669     m_hasSkipOffset = true;
0670 }
0671 
0672 void DynamicRegExpr::resolve()
0673 {
0674     m_isResolved = true;
0675 
0676     QRegularExpression regexp(m_pattern, m_patternOptions);
0677     resolveRegex(regexp, context().context());
0678     m_patternOptions = regexp.patternOptions();
0679 }
0680 
0681 MatchResult DynamicRegExpr::doMatch(QStringView text, int offset, const QStringList &captures, DynamicRegexpCache &dynamicRegexpCache) const
0682 {
0683     if (Q_UNLIKELY(!m_isResolved)) {
0684         const_cast<DynamicRegExpr *>(this)->resolve();
0685     }
0686 
0687     /**
0688      * create new pattern with right instantiation
0689      */
0690     auto pattern = replaceCaptures(m_pattern, captures, true);
0691     auto &regexp = dynamicRegexpCache.compileRegexp(std::move(pattern), m_patternOptions);
0692     return regexMatch(regexp, text, offset);
0693 }
0694 
0695 StringDetect::StringDetect(const HighlightingContextData::Rule::StringDetect &data)
0696     : m_string(data.string)
0697     , m_caseSensitivity(data.caseSensitivity)
0698 {
0699 }
0700 
0701 MatchResult StringDetect::doMatch(QStringView text, int offset, const QStringList &, DynamicRegexpCache &) const
0702 {
0703     return matchString(m_string, text, offset, m_caseSensitivity);
0704 }
0705 
0706 DynamicStringDetect::DynamicStringDetect(const HighlightingContextData::Rule::StringDetect &data)
0707     : m_string(data.string)
0708     , m_caseSensitivity(data.caseSensitivity)
0709 {
0710     m_dynamic = true;
0711 }
0712 
0713 MatchResult DynamicStringDetect::doMatch(QStringView text, int offset, const QStringList &captures, DynamicRegexpCache &) const
0714 {
0715     /**
0716      * for dynamic case: create new pattern with right instantiation
0717      */
0718     const auto pattern = replaceCaptures(m_string, captures, false);
0719     return matchString(pattern, text, offset, m_caseSensitivity);
0720 }
0721 
0722 WordDetect::WordDetect(DefinitionData &def, const HighlightingContextData::Rule::WordDetect &data)
0723     : m_wordDelimiters(def.wordDelimiters)
0724     , m_word(data.word)
0725     , m_caseSensitivity(data.caseSensitivity)
0726 {
0727     resolveAdditionalWordDelimiters(m_wordDelimiters, data.wordDelimiters);
0728 }
0729 
0730 MatchResult WordDetect::doMatch(QStringView text, int offset, const QStringList &, DynamicRegexpCache &) const
0731 {
0732     if (text.size() - offset < m_word.size()) {
0733         return offset;
0734     }
0735 
0736     /**
0737      * detect delimiter characters on the inner and outer boundaries of the string
0738      * NOTE: m_word isn't empty
0739      */
0740     if (offset > 0 && !m_wordDelimiters.contains(text.at(offset - 1)) && !m_wordDelimiters.contains(text.at(offset))) {
0741         return offset;
0742     }
0743 
0744     if (text.mid(offset, m_word.size()).compare(m_word, m_caseSensitivity) != 0) {
0745         return offset;
0746     }
0747 
0748     if (text.size() == offset + m_word.size() || m_wordDelimiters.contains(text.at(offset + m_word.size()))
0749         || m_wordDelimiters.contains(text.at(offset + m_word.size() - 1))) {
0750         return offset + m_word.size();
0751     }
0752 
0753     return offset;
0754 }