File indexing completed on 2024-05-12 15:50:01

0001 /*
0002     SPDX-FileCopyrightText: 2014 Christoph Cullmann <cullmann@kde.org>
0003     SPDX-FileCopyrightText: 2020 Jonathan Poelen <jonathan.poelen@gmail.com>
0004 
0005     SPDX-License-Identifier: MIT
0006 */
0007 
0008 #include <QCborValue>
0009 #include <QCoreApplication>
0010 #include <QDebug>
0011 #include <QFile>
0012 #include <QFileInfo>
0013 #include <QMutableMapIterator>
0014 #include <QRegularExpression>
0015 #include <QVariant>
0016 #include <QXmlStreamReader>
0017 
0018 #ifdef QT_XMLPATTERNS_LIB
0019 #include <QXmlSchema>
0020 #include <QXmlSchemaValidator>
0021 #endif
0022 
0023 #include "../lib/worddelimiters_p.h"
0024 #include "../lib/xml_p.h"
0025 
0026 #include <array>
0027 
0028 using KSyntaxHighlighting::WordDelimiters;
0029 using KSyntaxHighlighting::Xml::attrToBool;
0030 
0031 class HlFilesChecker
0032 {
0033 public:
0034     template<typename T>
0035     void setDefinition(const T &verStr, const QString &filename, const QString &name)
0036     {
0037         m_currentDefinition = &*m_definitions.insert(name, Definition{});
0038         m_currentDefinition->languageName = name;
0039         m_currentDefinition->filename = filename;
0040         m_currentDefinition->kateVersionStr = verStr.toString();
0041         m_currentKeywords = nullptr;
0042         m_currentContext = nullptr;
0043 
0044         const auto idx = verStr.indexOf(QLatin1Char('.'));
0045         if (idx <= 0) {
0046             qWarning() << filename << "invalid kateversion" << verStr;
0047             m_success = false;
0048         } else {
0049             m_currentDefinition->kateVersion = {verStr.left(idx).toInt(), verStr.mid(idx + 1).toInt()};
0050         }
0051     }
0052 
0053     void processElement(QXmlStreamReader &xml)
0054     {
0055         if (xml.isStartElement()) {
0056             if (m_currentContext) {
0057                 m_currentContext->rules.push_back(Context::Rule{});
0058                 auto &rule = m_currentContext->rules.back();
0059                 m_success = rule.parseElement(m_currentDefinition->filename, xml) && m_success;
0060                 m_currentContext->hasDynamicRule = m_currentContext->hasDynamicRule || rule.dynamic == XmlBool::True;
0061             } else if (m_currentKeywords) {
0062                 m_success = m_currentKeywords->items.parseElement(m_currentDefinition->filename, xml) && m_success;
0063             } else if (xml.name() == QStringLiteral("context")) {
0064                 processContextElement(xml);
0065             } else if (xml.name() == QStringLiteral("list")) {
0066                 processListElement(xml);
0067             } else if (xml.name() == QStringLiteral("keywords")) {
0068                 m_success = m_currentDefinition->parseKeywords(xml) && m_success;
0069             } else if (xml.name() == QStringLiteral("emptyLine")) {
0070                 m_success = parseEmptyLine(m_currentDefinition->filename, xml) && m_success;
0071             } else if (xml.name() == QStringLiteral("itemData")) {
0072                 m_success = m_currentDefinition->itemDatas.parseElement(m_currentDefinition->filename, xml) && m_success;
0073             }
0074         } else if (xml.isEndElement()) {
0075             if (m_currentContext && xml.name() == QStringLiteral("context")) {
0076                 m_currentContext = nullptr;
0077             } else if (m_currentKeywords && xml.name() == QStringLiteral("list")) {
0078                 m_currentKeywords = nullptr;
0079             }
0080         }
0081     }
0082 
0083     //! Resolve context attribute and include tag
0084     void resolveContexts()
0085     {
0086         QMutableMapIterator<QString, Definition> def(m_definitions);
0087         while (def.hasNext()) {
0088             def.next();
0089             auto &definition = def.value();
0090             auto &contexts = definition.contexts;
0091 
0092             if (contexts.isEmpty()) {
0093                 qWarning() << definition.filename << "has no context";
0094                 m_success = false;
0095                 continue;
0096             }
0097 
0098             auto markAsUsedContext = [](ContextName &contextName) {
0099                 if (!contextName.stay && contextName.context) {
0100                     contextName.context->isOnlyIncluded = false;
0101                 }
0102             };
0103 
0104             QMutableMapIterator<QString, Context> contextIt(contexts);
0105             while (contextIt.hasNext()) {
0106                 contextIt.next();
0107                 auto &context = contextIt.value();
0108                 resolveContextName(definition, context, context.lineEndContext, context.line);
0109                 resolveContextName(definition, context, context.lineEmptyContext, context.line);
0110                 resolveContextName(definition, context, context.fallthroughContext, context.line);
0111                 markAsUsedContext(context.lineEndContext);
0112                 markAsUsedContext(context.lineEmptyContext);
0113                 markAsUsedContext(context.fallthroughContext);
0114                 for (auto &rule : context.rules) {
0115                     rule.parentContext = &context;
0116                     resolveContextName(definition, context, rule.context, rule.line);
0117                     if (rule.type != Context::Rule::Type::IncludeRules) {
0118                         markAsUsedContext(rule.context);
0119                     } else if (rule.includeAttrib == XmlBool::True && rule.context.context) {
0120                         rule.context.context->referencedWithIncludeAttrib = true;
0121                     }
0122                 }
0123             }
0124 
0125             auto *firstContext = &*definition.contexts.find(definition.firstContextName);
0126             firstContext->isOnlyIncluded = false;
0127             definition.firstContext = firstContext;
0128         }
0129 
0130         resolveIncludeRules();
0131     }
0132 
0133     bool check() const
0134     {
0135         bool success = m_success;
0136 
0137         const auto usedContexts = extractUsedContexts();
0138 
0139         QMap<const Definition *, const Definition *> maxVersionByDefinitions;
0140         QMap<const Context::Rule *, IncludedRuleUnreachableBy> unreachableIncludedRules;
0141 
0142         QMapIterator<QString, Definition> def(m_definitions);
0143         while (def.hasNext()) {
0144             def.next();
0145             const auto &definition = def.value();
0146             const auto &filename = definition.filename;
0147 
0148             auto *maxDef = maxKateVersionDefinition(definition, maxVersionByDefinitions);
0149             if (maxDef != &definition) {
0150                 qWarning() << definition.filename << "depends on a language" << maxDef->languageName << "in version" << maxDef->kateVersionStr
0151                            << ". Please, increase kateversion.";
0152                 success = false;
0153             }
0154 
0155             QSet<const Keywords *> referencedKeywords;
0156             QSet<ItemDatas::Style> usedAttributeNames;
0157             QSet<ItemDatas::Style> ignoredAttributeNames;
0158             success = checkKeywordsList(definition, referencedKeywords) && success;
0159             success =
0160                 checkContexts(definition, referencedKeywords, usedAttributeNames, ignoredAttributeNames, usedContexts, unreachableIncludedRules) && success;
0161 
0162             // search for non-existing itemDatas.
0163             const auto invalidNames = usedAttributeNames - definition.itemDatas.styleNames;
0164             for (const auto &styleName : invalidNames) {
0165                 qWarning() << filename << "line" << styleName.line << "reference of non-existing itemData attributes:" << styleName.name;
0166                 success = false;
0167             }
0168 
0169             // search for existing itemDatas, but unusable.
0170             const auto ignoredNames = ignoredAttributeNames - usedAttributeNames;
0171             for (const auto &styleName : ignoredNames) {
0172                 qWarning() << filename << "line" << styleName.line << "attribute" << styleName.name
0173                            << "is never used. All uses are with lookAhead=true or <IncludeRules/>";
0174                 success = false;
0175             }
0176 
0177             // search for unused itemDatas.
0178             auto unusedNames = definition.itemDatas.styleNames - usedAttributeNames;
0179             unusedNames -= ignoredNames;
0180             for (const auto &styleName : std::as_const(unusedNames)) {
0181                 qWarning() << filename << "line" << styleName.line << "unused itemData:" << styleName.name;
0182                 success = false;
0183             }
0184         }
0185 
0186         QMutableMapIterator<const Context::Rule *, IncludedRuleUnreachableBy> unreachableIncludedRuleIt(unreachableIncludedRules);
0187         while (unreachableIncludedRuleIt.hasNext()) {
0188             unreachableIncludedRuleIt.next();
0189             IncludedRuleUnreachableBy &unreachableRulesBy = unreachableIncludedRuleIt.value();
0190             if (unreachableRulesBy.alwaysUnreachable) {
0191                 auto *rule = unreachableIncludedRuleIt.key();
0192 
0193                 if (!rule->parentContext->isOnlyIncluded) {
0194                     continue;
0195                 }
0196 
0197                 // remove duplicates rules
0198                 QSet<const Context::Rule *> rules;
0199                 auto &unreachableBy = unreachableRulesBy.unreachableBy;
0200                 unreachableBy.erase(std::remove_if(unreachableBy.begin(),
0201                                                    unreachableBy.end(),
0202                                                    [&](const RuleAndInclude &ruleAndInclude) {
0203                                                        if (rules.contains(ruleAndInclude.rule)) {
0204                                                            return true;
0205                                                        }
0206                                                        rules.insert(ruleAndInclude.rule);
0207                                                        return false;
0208                                                    }),
0209                                     unreachableBy.end());
0210 
0211                 QString message;
0212                 message.reserve(128);
0213                 for (auto &ruleAndInclude : std::as_const(unreachableBy)) {
0214                     message += QStringLiteral("line ");
0215                     message += QString::number(ruleAndInclude.rule->line);
0216                     message += QStringLiteral(" [");
0217                     message += ruleAndInclude.rule->parentContext->name;
0218                     if (rule->filename != ruleAndInclude.rule->filename) {
0219                         message += QStringLiteral(" (");
0220                         message += ruleAndInclude.rule->filename;
0221                         message += QLatin1Char(')');
0222                     }
0223                     if (ruleAndInclude.includeRules) {
0224                         message += QStringLiteral(" via line ");
0225                         message += QString::number(ruleAndInclude.includeRules->line);
0226                     }
0227                     message += QStringLiteral("], ");
0228                 }
0229                 message.chop(2);
0230 
0231                 qWarning() << rule->filename << "line" << rule->line << "no IncludeRule can reach this rule, hidden by" << message;
0232                 success = false;
0233             }
0234         }
0235 
0236         return success;
0237     }
0238 
0239 private:
0240     enum class XmlBool {
0241         Unspecified,
0242         False,
0243         True,
0244     };
0245 
0246     struct Context;
0247 
0248     struct ContextName {
0249         QString name;
0250         int popCount = 0;
0251         bool stay = false;
0252 
0253         Context *context = nullptr;
0254     };
0255 
0256     struct Parser {
0257         const QString &filename;
0258         QXmlStreamReader &xml;
0259         QXmlStreamAttribute &attr;
0260         bool success;
0261 
0262         //! Read a string type attribute, \c success = \c false when \p str is not empty
0263         //! \return \c true when attr.name() == attrName, otherwise false
0264         bool extractString(QString &str, const QString &attrName)
0265         {
0266             if (attr.name() != attrName) {
0267                 return false;
0268             }
0269 
0270             str = attr.value().toString();
0271             if (str.isEmpty()) {
0272                 qWarning() << filename << "line" << xml.lineNumber() << attrName << "attribute is empty";
0273                 success = false;
0274             }
0275 
0276             return true;
0277         }
0278 
0279         //! Read a bool type attribute, \c success = \c false when \p xmlBool is not \c XmlBool::Unspecified.
0280         //! \return \c true when attr.name() == attrName, otherwise false
0281         bool extractXmlBool(XmlBool &xmlBool, const QString &attrName)
0282         {
0283             if (attr.name() != attrName) {
0284                 return false;
0285             }
0286 
0287             xmlBool = attr.value().isNull() ? XmlBool::Unspecified : attrToBool(attr.value()) ? XmlBool::True : XmlBool::False;
0288 
0289             return true;
0290         }
0291 
0292         //! Read a positive integer type attribute, \c success = \c false when \p positive is already greater than or equal to 0
0293         //! \return \c true when attr.name() == attrName, otherwise false
0294         bool extractPositive(int &positive, const QString &attrName)
0295         {
0296             if (attr.name() != attrName) {
0297                 return false;
0298             }
0299 
0300             bool ok = true;
0301             positive = attr.value().toInt(&ok);
0302 
0303             if (!ok || positive < 0) {
0304                 qWarning() << filename << "line" << xml.lineNumber() << attrName << "should be a positive integer:" << attr.value();
0305                 success = false;
0306             }
0307 
0308             return true;
0309         }
0310 
0311         //! Read a color, \c success = \c false when \p color is already greater than or equal to 0
0312         //! \return \c true when attr.name() == attrName, otherwise false
0313         bool checkColor(const QString &attrName)
0314         {
0315             if (attr.name() != attrName) {
0316                 return false;
0317             }
0318 
0319             const auto value = attr.value();
0320             if (value.isEmpty() /*|| QColor(value).isValid()*/) {
0321                 qWarning() << filename << "line" << xml.lineNumber() << attrName << "should be a color:" << value;
0322                 success = false;
0323             }
0324 
0325             return true;
0326         }
0327 
0328         //! Read a QChar, \c success = \c false when \p c is not \c '\0' or does not have one char
0329         //! \return \c true when attr.name() == attrName, otherwise false
0330         bool extractChar(QChar &c, const QString &attrName)
0331         {
0332             if (attr.name() != attrName) {
0333                 return false;
0334             }
0335 
0336             if (attr.value().size() == 1) {
0337                 c = attr.value()[0];
0338             } else {
0339                 c = QLatin1Char('_');
0340                 qWarning() << filename << "line" << xml.lineNumber() << attrName << "must contain exactly one char:" << attr.value();
0341                 success = false;
0342             }
0343 
0344             return true;
0345         }
0346 
0347         //! \return parsing status when \p isExtracted is \c true, otherwise \c false
0348         bool checkIfExtracted(bool isExtracted)
0349         {
0350             if (isExtracted) {
0351                 return success;
0352             }
0353 
0354             qWarning() << filename << "line" << xml.lineNumber() << "unknown attribute:" << attr.name();
0355             return false;
0356         }
0357     };
0358 
0359     struct Keywords {
0360         struct Items {
0361             struct Item {
0362                 QString content;
0363                 int line;
0364 
0365                 friend uint qHash(const Item &item, uint seed = 0)
0366                 {
0367                     return qHash(item.content, seed);
0368                 }
0369 
0370                 friend bool operator==(const Item &item0, const Item &item1)
0371                 {
0372                     return item0.content == item1.content;
0373                 }
0374             };
0375 
0376             QVector<Item> keywords;
0377             QSet<Item> includes;
0378 
0379             bool parseElement(const QString &filename, QXmlStreamReader &xml)
0380             {
0381                 bool success = true;
0382 
0383                 const int line = xml.lineNumber();
0384                 QString content = xml.readElementText();
0385 
0386                 if (content.isEmpty()) {
0387                     qWarning() << filename << "line" << line << "is empty:" << xml.name();
0388                     success = false;
0389                 }
0390 
0391                 if (xml.name() == QStringLiteral("include")) {
0392                     includes.insert({content, line});
0393                 } else if (xml.name() == QStringLiteral("item")) {
0394                     keywords.append({content, line});
0395                 } else {
0396                     qWarning() << filename << "line" << line << "invalid element:" << xml.name();
0397                     success = false;
0398                 }
0399 
0400                 return success;
0401             }
0402         };
0403 
0404         QString name;
0405         Items items;
0406         int line;
0407 
0408         bool parseElement(const QString &filename, QXmlStreamReader &xml)
0409         {
0410             line = xml.lineNumber();
0411 
0412             bool success = true;
0413             for (auto &attr : xml.attributes()) {
0414                 Parser parser{filename, xml, attr, success};
0415 
0416                 const bool isExtracted = parser.extractString(name, QStringLiteral("name"));
0417 
0418                 success = parser.checkIfExtracted(isExtracted);
0419             }
0420             return success;
0421         }
0422     };
0423 
0424     struct Context {
0425         struct Rule {
0426             enum class Type {
0427                 Unknown,
0428                 AnyChar,
0429                 Detect2Chars,
0430                 DetectChar,
0431                 DetectIdentifier,
0432                 DetectSpaces,
0433                 Float,
0434                 HlCChar,
0435                 HlCHex,
0436                 HlCOct,
0437                 HlCStringChar,
0438                 IncludeRules,
0439                 Int,
0440                 LineContinue,
0441                 RangeDetect,
0442                 RegExpr,
0443                 StringDetect,
0444                 WordDetect,
0445                 keyword,
0446             };
0447 
0448             Type type{};
0449 
0450             bool isDotRegex = false;
0451             int line = -1;
0452 
0453             // commonAttributes
0454             QString attribute;
0455             ContextName context;
0456             QString beginRegion;
0457             QString endRegion;
0458             int column = -1;
0459             XmlBool lookAhead{};
0460             XmlBool firstNonSpace{};
0461 
0462             // StringDetect, WordDetect, keyword
0463             XmlBool insensitive{};
0464 
0465             // DetectChar, StringDetect, RegExpr, keyword
0466             XmlBool dynamic{};
0467 
0468             // Regex
0469             XmlBool minimal{};
0470 
0471             // IncludeRule
0472             XmlBool includeAttrib{};
0473 
0474             // DetectChar, Detect2Chars, LineContinue, RangeDetect
0475             QChar char0;
0476             // Detect2Chars, RangeDetect
0477             QChar char1;
0478 
0479             // AnyChar, DetectChar, StringDetect, RegExpr, WordDetect, keyword
0480             QString string;
0481             // RegExpr without .* as suffix
0482             QString sanitizedString;
0483 
0484             // Float, HlCHex, HlCOct, Int, WordDetect, keyword
0485             QString additionalDeliminator;
0486             QString weakDeliminator;
0487 
0488             // rules included by IncludeRules (without IncludeRule)
0489             QVector<const Rule *> includedRules;
0490 
0491             // IncludeRules included by IncludeRules
0492             QSet<const Rule *> includedIncludeRules;
0493 
0494             Context const *parentContext = nullptr;
0495 
0496             QString filename;
0497 
0498             bool parseElement(const QString &filename, QXmlStreamReader &xml)
0499             {
0500                 this->filename = filename;
0501                 line = xml.lineNumber();
0502 
0503                 using Pair = QPair<QString, Type>;
0504                 static const auto pairs = {
0505                     Pair{QStringLiteral("AnyChar"), Type::AnyChar},
0506                     Pair{QStringLiteral("Detect2Chars"), Type::Detect2Chars},
0507                     Pair{QStringLiteral("DetectChar"), Type::DetectChar},
0508                     Pair{QStringLiteral("DetectIdentifier"), Type::DetectIdentifier},
0509                     Pair{QStringLiteral("DetectSpaces"), Type::DetectSpaces},
0510                     Pair{QStringLiteral("Float"), Type::Float},
0511                     Pair{QStringLiteral("HlCChar"), Type::HlCChar},
0512                     Pair{QStringLiteral("HlCHex"), Type::HlCHex},
0513                     Pair{QStringLiteral("HlCOct"), Type::HlCOct},
0514                     Pair{QStringLiteral("HlCStringChar"), Type::HlCStringChar},
0515                     Pair{QStringLiteral("IncludeRules"), Type::IncludeRules},
0516                     Pair{QStringLiteral("Int"), Type::Int},
0517                     Pair{QStringLiteral("LineContinue"), Type::LineContinue},
0518                     Pair{QStringLiteral("RangeDetect"), Type::RangeDetect},
0519                     Pair{QStringLiteral("RegExpr"), Type::RegExpr},
0520                     Pair{QStringLiteral("StringDetect"), Type::StringDetect},
0521                     Pair{QStringLiteral("WordDetect"), Type::WordDetect},
0522                     Pair{QStringLiteral("keyword"), Type::keyword},
0523                 };
0524 
0525                 for (auto pair : pairs) {
0526                     if (xml.name() == pair.first) {
0527                         type = pair.second;
0528                         bool success = parseAttributes(filename, xml);
0529                         success = checkMandoryAttributes(filename, xml) && success;
0530                         if (success && type == Type::RegExpr) {
0531                             // ., (.) followed by *, +, {1} or nothing
0532                             static const QRegularExpression isDot(QStringLiteral(R"(^\(?\.(?:[*+][*+?]?|[*+]|\{1\})?\$?$)"));
0533                             // remove "(?:" and ")"
0534                             static const QRegularExpression removeParentheses(QStringLiteral(R"(\((?:\?:)?|\))"));
0535                             // remove parentheses on a copy of string
0536                             auto reg = QString(string).replace(removeParentheses, QString());
0537                             isDotRegex = reg.contains(isDot);
0538 
0539                             // Remove .* and .*$ suffix.
0540                             static const QRegularExpression allSuffix(QStringLiteral("(?<!\\\\)[.][*][?+]?[$]?$"));
0541                             sanitizedString = string;
0542                             sanitizedString.replace(allSuffix, QString());
0543                             // string is a catch-all, do not sanitize
0544                             if (sanitizedString.isEmpty() || sanitizedString == QStringLiteral("^")) {
0545                                 sanitizedString = string;
0546                             }
0547                         }
0548                         return success;
0549                     }
0550                 }
0551 
0552                 qWarning() << filename << "line" << xml.lineNumber() << "unknown element:" << xml.name();
0553                 return false;
0554             }
0555 
0556         private:
0557             bool parseAttributes(const QString &filename, QXmlStreamReader &xml)
0558             {
0559                 bool success = true;
0560 
0561                 for (auto &attr : xml.attributes()) {
0562                     Parser parser{filename, xml, attr, success};
0563 
0564                     // clang-format off
0565                     const bool isExtracted
0566                         = parser.extractString(attribute, QStringLiteral("attribute"))
0567                        || parser.extractString(context.name, QStringLiteral("context"))
0568                        || parser.extractXmlBool(lookAhead, QStringLiteral("lookAhead"))
0569                        || parser.extractXmlBool(firstNonSpace, QStringLiteral("firstNonSpace"))
0570                        || parser.extractString(beginRegion, QStringLiteral("beginRegion"))
0571                        || parser.extractString(endRegion, QStringLiteral("endRegion"))
0572                        || parser.extractPositive(column, QStringLiteral("column"))
0573                        || ((type == Type::RegExpr
0574                          || type == Type::StringDetect
0575                          || type == Type::WordDetect
0576                          || type == Type::keyword
0577                          ) && parser.extractXmlBool(insensitive, QStringLiteral("insensitive")))
0578                        || ((type == Type::DetectChar
0579                          || type == Type::RegExpr
0580                          || type == Type::StringDetect
0581                          || type == Type::keyword
0582                          ) && parser.extractXmlBool(dynamic, QStringLiteral("dynamic")))
0583                        || ((type == Type::RegExpr)
0584                            && parser.extractXmlBool(minimal, QStringLiteral("minimal")))
0585                        || ((type == Type::DetectChar
0586                          || type == Type::Detect2Chars
0587                          || type == Type::LineContinue
0588                          || type == Type::RangeDetect
0589                          ) && parser.extractChar(char0, QStringLiteral("char")))
0590                        || ((type == Type::Detect2Chars
0591                          || type == Type::RangeDetect
0592                          ) && parser.extractChar(char1, QStringLiteral("char1")))
0593                        || ((type == Type::AnyChar
0594                          || type == Type::RegExpr
0595                          || type == Type::StringDetect
0596                          || type == Type::WordDetect
0597                          || type == Type::keyword
0598                          ) && parser.extractString(string, QStringLiteral("String")))
0599                        || ((type == Type::IncludeRules)
0600                            && parser.extractXmlBool(includeAttrib, QStringLiteral("includeAttrib")))
0601                        || ((type == Type::Float
0602                          || type == Type::HlCHex
0603                          || type == Type::HlCOct
0604                          || type == Type::Int
0605                          || type == Type::keyword
0606                          || type == Type::WordDetect
0607                          ) && (parser.extractString(additionalDeliminator, QStringLiteral("additionalDeliminator"))
0608                             || parser.extractString(weakDeliminator, QStringLiteral("weakDeliminator"))))
0609                     ;
0610                     // clang-format on
0611 
0612                     success = parser.checkIfExtracted(isExtracted);
0613 
0614                     if (type == Type::LineContinue && char0 == QLatin1Char('\0')) {
0615                         char0 = QLatin1Char('\\');
0616                     }
0617                 }
0618 
0619                 return success;
0620             }
0621 
0622             bool checkMandoryAttributes(const QString &filename, QXmlStreamReader &xml)
0623             {
0624                 QString missingAttr;
0625 
0626                 switch (type) {
0627                 case Type::Unknown:
0628                     return false;
0629 
0630                 case Type::AnyChar:
0631                 case Type::RegExpr:
0632                 case Type::StringDetect:
0633                 case Type::WordDetect:
0634                 case Type::keyword:
0635                     missingAttr = string.isEmpty() ? QStringLiteral("String") : QString();
0636                     break;
0637 
0638                 case Type::DetectChar:
0639                     missingAttr = !char0.unicode() ? QStringLiteral("char") : QString();
0640                     break;
0641 
0642                 case Type::Detect2Chars:
0643                 case Type::RangeDetect:
0644                     missingAttr = !char0.unicode() && !char1.unicode() ? QStringLiteral("char and char1")
0645                         : !char0.unicode()                             ? QStringLiteral("char")
0646                         : !char1.unicode()                             ? QStringLiteral("char1")
0647                                                                        : QString();
0648                     break;
0649 
0650                 case Type::IncludeRules:
0651                     missingAttr = context.name.isEmpty() ? QStringLiteral("context") : QString();
0652                     break;
0653 
0654                 case Type::DetectIdentifier:
0655                 case Type::DetectSpaces:
0656                 case Type::Float:
0657                 case Type::HlCChar:
0658                 case Type::HlCHex:
0659                 case Type::HlCOct:
0660                 case Type::HlCStringChar:
0661                 case Type::Int:
0662                 case Type::LineContinue:
0663                     break;
0664                 }
0665 
0666                 if (!missingAttr.isEmpty()) {
0667                     qWarning() << filename << "line" << xml.lineNumber() << "missing attribute:" << missingAttr;
0668                     return false;
0669                 }
0670 
0671                 return true;
0672             }
0673         };
0674 
0675         int line;
0676         // becomes false when a context (except includeRule) refers to it
0677         bool isOnlyIncluded = true;
0678         // becomes true when an includedRule refers to it with includeAttrib=true
0679         bool referencedWithIncludeAttrib = false;
0680         bool hasDynamicRule = false;
0681         QString name;
0682         QString attribute;
0683         ContextName lineEndContext;
0684         ContextName lineEmptyContext;
0685         ContextName fallthroughContext;
0686         QVector<Rule> rules;
0687         XmlBool dynamic{};
0688         XmlBool fallthrough{};
0689 
0690         bool parseElement(const QString &filename, QXmlStreamReader &xml)
0691         {
0692             line = xml.lineNumber();
0693 
0694             bool success = true;
0695 
0696             for (auto &attr : xml.attributes()) {
0697                 Parser parser{filename, xml, attr, success};
0698                 XmlBool noIndentationBasedFolding{};
0699 
0700                 const bool isExtracted = parser.extractString(name, QStringLiteral("name")) || parser.extractString(attribute, QStringLiteral("attribute"))
0701                     || parser.extractString(lineEndContext.name, QStringLiteral("lineEndContext"))
0702                     || parser.extractString(lineEmptyContext.name, QStringLiteral("lineEmptyContext"))
0703                     || parser.extractString(fallthroughContext.name, QStringLiteral("fallthroughContext"))
0704                     || parser.extractXmlBool(dynamic, QStringLiteral("dynamic")) || parser.extractXmlBool(fallthrough, QStringLiteral("fallthrough"))
0705                     || parser.extractXmlBool(noIndentationBasedFolding, QStringLiteral("noIndentationBasedFolding"));
0706 
0707                 success = parser.checkIfExtracted(isExtracted);
0708             }
0709 
0710             if (name.isEmpty()) {
0711                 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: name";
0712                 success = false;
0713             }
0714 
0715             if (attribute.isEmpty()) {
0716                 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: attribute";
0717                 success = false;
0718             }
0719 
0720             if (lineEndContext.name.isEmpty()) {
0721                 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: lineEndContext";
0722                 success = false;
0723             }
0724 
0725             return success;
0726         }
0727     };
0728 
0729     struct Version {
0730         int majorRevision;
0731         int minorRevision;
0732 
0733         Version(int majorRevision = 0, int minorRevision = 0)
0734             : majorRevision(majorRevision)
0735             , minorRevision(minorRevision)
0736         {
0737         }
0738 
0739         bool operator<(const Version &version) const
0740         {
0741             return majorRevision < version.majorRevision || (majorRevision == version.majorRevision && minorRevision < version.minorRevision);
0742         }
0743     };
0744 
0745     struct ItemDatas {
0746         struct Style {
0747             QString name;
0748             int line;
0749 
0750             friend uint qHash(const Style &style, uint seed = 0)
0751             {
0752                 return qHash(style.name, seed);
0753             }
0754 
0755             friend bool operator==(const Style &style0, const Style &style1)
0756             {
0757                 return style0.name == style1.name;
0758             }
0759         };
0760 
0761         QSet<Style> styleNames;
0762 
0763         bool parseElement(const QString &filename, QXmlStreamReader &xml)
0764         {
0765             bool success = true;
0766 
0767             QString name;
0768             QString defStyleNum;
0769             XmlBool boolean;
0770 
0771             for (auto &attr : xml.attributes()) {
0772                 Parser parser{filename, xml, attr, success};
0773 
0774                 const bool isExtracted = parser.extractString(name, QStringLiteral("name")) || parser.extractString(defStyleNum, QStringLiteral("defStyleNum"))
0775                     || parser.extractXmlBool(boolean, QStringLiteral("bold")) || parser.extractXmlBool(boolean, QStringLiteral("italic"))
0776                     || parser.extractXmlBool(boolean, QStringLiteral("underline")) || parser.extractXmlBool(boolean, QStringLiteral("strikeOut"))
0777                     || parser.extractXmlBool(boolean, QStringLiteral("spellChecking")) || parser.checkColor(QStringLiteral("color"))
0778                     || parser.checkColor(QStringLiteral("selColor")) || parser.checkColor(QStringLiteral("backgroundColor"))
0779                     || parser.checkColor(QStringLiteral("selBackgroundColor"));
0780 
0781                 success = parser.checkIfExtracted(isExtracted);
0782             }
0783 
0784             if (!name.isEmpty()) {
0785                 const auto len = styleNames.size();
0786                 styleNames.insert({name, int(xml.lineNumber())});
0787                 if (len == styleNames.size()) {
0788                     qWarning() << filename << "line" << xml.lineNumber() << "itemData duplicate:" << name;
0789                     success = false;
0790                 }
0791             }
0792 
0793             return success;
0794         }
0795     };
0796 
0797     struct Definition {
0798         QMap<QString, Keywords> keywordsList;
0799         QMap<QString, Context> contexts;
0800         ItemDatas itemDatas;
0801         QString firstContextName;
0802         const Context *firstContext = nullptr;
0803         QString filename;
0804         WordDelimiters wordDelimiters;
0805         XmlBool casesensitive{};
0806         Version kateVersion{};
0807         QString kateVersionStr;
0808         QString languageName;
0809         QSet<const Definition *> referencedDefinitions;
0810 
0811         // Parse <keywords ...>
0812         bool parseKeywords(QXmlStreamReader &xml)
0813         {
0814             wordDelimiters.append(xml.attributes().value(QStringLiteral("additionalDeliminator")));
0815             wordDelimiters.remove(xml.attributes().value(QStringLiteral("weakDeliminator")));
0816             return true;
0817         }
0818     };
0819 
0820     // Parse <context>
0821     void processContextElement(QXmlStreamReader &xml)
0822     {
0823         Context context;
0824         m_success = context.parseElement(m_currentDefinition->filename, xml) && m_success;
0825         if (m_currentDefinition->firstContextName.isEmpty()) {
0826             m_currentDefinition->firstContextName = context.name;
0827         }
0828         if (m_currentDefinition->contexts.contains(context.name)) {
0829             qWarning() << m_currentDefinition->filename << "line" << xml.lineNumber() << "duplicate context:" << context.name;
0830             m_success = false;
0831         }
0832         m_currentContext = &*m_currentDefinition->contexts.insert(context.name, context);
0833     }
0834 
0835     // Parse <list name="...">
0836     void processListElement(QXmlStreamReader &xml)
0837     {
0838         Keywords keywords;
0839         m_success = keywords.parseElement(m_currentDefinition->filename, xml) && m_success;
0840         if (m_currentDefinition->keywordsList.contains(keywords.name)) {
0841             qWarning() << m_currentDefinition->filename << "line" << xml.lineNumber() << "duplicate list:" << keywords.name;
0842             m_success = false;
0843         }
0844         m_currentKeywords = &*m_currentDefinition->keywordsList.insert(keywords.name, keywords);
0845     }
0846 
0847     const Definition *maxKateVersionDefinition(const Definition &definition, QMap<const Definition *, const Definition *> &maxVersionByDefinitions) const
0848     {
0849         auto it = maxVersionByDefinitions.find(&definition);
0850         if (it != maxVersionByDefinitions.end()) {
0851             return it.value();
0852         } else {
0853             auto it = maxVersionByDefinitions.insert(&definition, &definition);
0854             for (const auto &referencedDef : definition.referencedDefinitions) {
0855                 auto *maxDef = maxKateVersionDefinition(*referencedDef, maxVersionByDefinitions);
0856                 if (it.value()->kateVersion < maxDef->kateVersion) {
0857                     it.value() = maxDef;
0858                 }
0859             }
0860             return it.value();
0861         }
0862     }
0863 
0864     // Initialize the referenced rules (Rule::includedRules)
0865     void resolveIncludeRules()
0866     {
0867         QSet<const Context *> usedContexts;
0868         QVector<const Context *> contexts;
0869 
0870         QMutableMapIterator<QString, Definition> def(m_definitions);
0871         while (def.hasNext()) {
0872             def.next();
0873             auto &definition = def.value();
0874             QMutableMapIterator<QString, Context> contextIt(definition.contexts);
0875             while (contextIt.hasNext()) {
0876                 contextIt.next();
0877                 auto &currentContext = contextIt.value();
0878                 for (auto &rule : currentContext.rules) {
0879                     if (rule.type != Context::Rule::Type::IncludeRules) {
0880                         continue;
0881                     }
0882 
0883                     if (rule.context.stay) {
0884                         qWarning() << definition.filename << "line" << rule.line << "IncludeRules refers to himself";
0885                         m_success = false;
0886                         continue;
0887                     }
0888 
0889                     if (rule.context.popCount) {
0890                         qWarning() << definition.filename << "line" << rule.line << "IncludeRules with #pop prefix";
0891                         m_success = false;
0892                     }
0893 
0894                     if (!rule.context.context) {
0895                         m_success = false;
0896                         continue;
0897                     }
0898 
0899                     // resolve includedRules and includedIncludeRules
0900 
0901                     usedContexts.clear();
0902                     usedContexts.insert(rule.context.context);
0903                     contexts.clear();
0904                     contexts.append(rule.context.context);
0905 
0906                     for (int i = 0; i < contexts.size(); ++i) {
0907                         currentContext.hasDynamicRule = contexts[i]->hasDynamicRule;
0908                         for (const auto &includedRule : contexts[i]->rules) {
0909                             if (includedRule.type != Context::Rule::Type::IncludeRules) {
0910                                 rule.includedRules.append(&includedRule);
0911                             } else if (&rule == &includedRule) {
0912                                 qWarning() << definition.filename << "line" << rule.line << "IncludeRules refers to himself by recursivity";
0913                                 m_success = false;
0914                             } else {
0915                                 rule.includedIncludeRules.insert(&includedRule);
0916 
0917                                 if (includedRule.includedRules.isEmpty()) {
0918                                     const auto *context = includedRule.context.context;
0919                                     if (context && !usedContexts.contains(context)) {
0920                                         contexts.append(context);
0921                                         usedContexts.insert(context);
0922                                     }
0923                                 } else {
0924                                     rule.includedRules.append(includedRule.includedRules);
0925                                 }
0926                             }
0927                         }
0928                     }
0929                 }
0930             }
0931         }
0932     }
0933 
0934     //! Recursively extracts the contexts used from the first context of the definitions.
0935     //! This method detects groups of contexts which are only used among themselves.
0936     QSet<const Context *> extractUsedContexts() const
0937     {
0938         QSet<const Context *> usedContexts;
0939         QVector<const Context *> contexts;
0940 
0941         QMapIterator<QString, Definition> def(m_definitions);
0942         while (def.hasNext()) {
0943             def.next();
0944             const auto &definition = def.value();
0945 
0946             if (definition.firstContext) {
0947                 usedContexts.insert(definition.firstContext);
0948                 contexts.clear();
0949                 contexts.append(definition.firstContext);
0950 
0951                 for (int i = 0; i < contexts.size(); ++i) {
0952                     auto appendContext = [&](const Context *context) {
0953                         if (context && !usedContexts.contains(context)) {
0954                             contexts.append(context);
0955                             usedContexts.insert(context);
0956                         }
0957                     };
0958 
0959                     const auto *context = contexts[i];
0960                     appendContext(context->lineEndContext.context);
0961                     appendContext(context->lineEmptyContext.context);
0962                     appendContext(context->fallthroughContext.context);
0963 
0964                     for (auto &rule : context->rules) {
0965                         appendContext(rule.context.context);
0966                     }
0967                 }
0968             }
0969         }
0970 
0971         return usedContexts;
0972     }
0973 
0974     struct RuleAndInclude {
0975         const Context::Rule *rule;
0976         const Context::Rule *includeRules;
0977 
0978         explicit operator bool() const
0979         {
0980             return rule;
0981         }
0982     };
0983 
0984     struct IncludedRuleUnreachableBy {
0985         QVector<RuleAndInclude> unreachableBy;
0986         bool alwaysUnreachable = true;
0987     };
0988 
0989     //! Check contexts and rules
0990     bool checkContexts(const Definition &definition,
0991                        QSet<const Keywords *> &referencedKeywords,
0992                        QSet<ItemDatas::Style> &usedAttributeNames,
0993                        QSet<ItemDatas::Style> &ignoredAttributeNames,
0994                        const QSet<const Context *> &usedContexts,
0995                        QMap<const Context::Rule *, IncludedRuleUnreachableBy> &unreachableIncludedRules) const
0996     {
0997         bool success = true;
0998 
0999         QMapIterator<QString, Context> contextIt(definition.contexts);
1000         while (contextIt.hasNext()) {
1001             contextIt.next();
1002 
1003             const auto &context = contextIt.value();
1004             const auto &filename = definition.filename;
1005 
1006             if (!usedContexts.contains(&context)) {
1007                 qWarning() << filename << "line" << context.line << "unused context:" << context.name;
1008                 success = false;
1009                 continue;
1010             }
1011 
1012             if (context.name.startsWith(QStringLiteral("#pop"))) {
1013                 qWarning() << filename << "line" << context.line << "the context name must not start with '#pop':" << context.name;
1014                 success = false;
1015             }
1016 
1017             if (!context.attribute.isEmpty() && (!context.isOnlyIncluded || context.referencedWithIncludeAttrib)) {
1018                 usedAttributeNames.insert({context.attribute, context.line});
1019             }
1020 
1021             success = checkfallthrough(definition, context) && success;
1022             success = checkUreachableRules(definition.filename, context, unreachableIncludedRules) && success;
1023             success = suggestRuleMerger(definition.filename, context) && success;
1024 
1025             for (const auto &rule : context.rules) {
1026                 if (!rule.attribute.isEmpty()) {
1027                     if (rule.lookAhead != XmlBool::True) {
1028                         usedAttributeNames.insert({rule.attribute, rule.line});
1029                     } else {
1030                         ignoredAttributeNames.insert({rule.attribute, rule.line});
1031                     }
1032                 }
1033                 success = checkLookAhead(rule) && success;
1034                 success = checkStringDetect(rule) && success;
1035                 success = checkKeyword(definition, rule, referencedKeywords) && success;
1036                 success = checkRegExpr(filename, rule, context) && success;
1037                 success = checkDelimiters(definition, rule) && success;
1038             }
1039         }
1040 
1041         return success;
1042     }
1043 
1044     //! Check that a regular expression in a RegExpr rule:
1045     //! - isValid()
1046     //! - character ranges such as [A-Z] are valid and not accidentally e.g. [A-z].
1047     //! - dynamic=true but no place holder used?
1048     //! - is not . with lookAhead="1"
1049     //! - is not ^... without column ou firstNonSpace attribute
1050     //! - is not equivalent to DetectSpaces, DetectChar, Detect2Chars, StringDetect, DetectIdentifier, RangeDetect
1051     //! - has no unused captures
1052     //! - has no unnecessary quantifier with lookAhead
1053     bool checkRegExpr(const QString &filename, const Context::Rule &rule, const Context &context) const
1054     {
1055         if (rule.type == Context::Rule::Type::RegExpr) {
1056             const QRegularExpression regexp(rule.string);
1057             if (!checkRegularExpression(rule.filename, regexp, rule.line)) {
1058                 return false;
1059             }
1060 
1061             // dynamic == true and no place holder?
1062             if (rule.dynamic == XmlBool::True) {
1063                 static const QRegularExpression placeHolder(QStringLiteral("%\\d+"));
1064                 if (!rule.string.contains(placeHolder)) {
1065                     qWarning() << rule.filename << "line" << rule.line << "broken regex:" << rule.string << "problem: dynamic=true but no %\\d+ placeholder";
1066                     return false;
1067                 }
1068             }
1069 
1070             auto reg = (rule.lookAhead == XmlBool::True) ? rule.sanitizedString : rule.string;
1071             if (rule.lookAhead == XmlBool::True) {
1072                 static const QRegularExpression removeAllSuffix(QStringLiteral(
1073                     R"(((?<!\\)\\(?:[DSWdsw]|x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\}|0\d\d|o\{[0-7]+\}|u[0-9a-fA-F]{4})|(?<!\\)[^])}\\]|(?=\\)\\\\)[*][?+]?$)"));
1074                 reg.replace(removeAllSuffix, QString());
1075             }
1076 
1077             reg.replace(QStringLiteral("{1}"), QString());
1078 
1079             // is DetectSpaces
1080             // optional ^ then \s, [\s], [\t ], [ \t] possibly in (...) or (?:...) followed by *, +
1081             static const QRegularExpression isDetectSpaces(
1082                 QStringLiteral(R"(^\^?(?:\((?:\?:)?)?\^?(?:\\s|\[(?:\\s| (?:\t|\\t)|(?:\t|\\t) )\])\)?(?:[*+][*+?]?|[*+])?\)?\)?$)"));
1083             if (rule.string.contains(isDetectSpaces)) {
1084                 char const *extraMsg = rule.string.contains(QLatin1Char('^')) ? "+ column=\"0\" or firstNonSpace=\"1\"" : "";
1085                 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by DetectSpaces / DetectChar / AnyChar" << extraMsg << ":"
1086                            << rule.string;
1087                 return false;
1088             }
1089 
1090 #define REG_ESCAPE_CHAR R"(\\(?:[^0BDPSWbdpswoux]|x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\}|0\d\d|o\{[0-7]+\}|u[0-9a-fA-F]{4}))"
1091 #define REG_CHAR "(?:" REG_ESCAPE_CHAR "|\\[(?:" REG_ESCAPE_CHAR "|.)\\]|[^[.^])"
1092 
1093             // is RangeDetect
1094             static const QRegularExpression isRange(QStringLiteral("^\\^?" REG_CHAR "(?:"
1095                                                                    "\\.\\*[?*]?" REG_CHAR "|"
1096                                                                    "\\[\\^(" REG_ESCAPE_CHAR "|.)\\]\\*[?*]?\\1"
1097                                                                    ")$"));
1098             if ((rule.lookAhead == XmlBool::True || rule.minimal == XmlBool::True || rule.string.contains(QStringLiteral(".*?"))
1099                  || rule.string.contains(QStringLiteral("[^")))
1100                 && reg.contains(isRange)) {
1101                 qWarning() << filename << "line" << rule.line << "RegExpr should be replaced by RangeDetect:" << rule.string;
1102                 return false;
1103             }
1104 
1105             // is LineContinue
1106             static const QRegularExpression isLineContinue(QStringLiteral("^\\^?" REG_CHAR "\\$$"));
1107             if (reg.contains(isLineContinue)) {
1108                 auto extra = (reg[0] == QLatin1Char('^')) ? "with column=\"0\"" : "";
1109                 qWarning() << filename << "line" << rule.line << "RegExpr should be replaced by LineContinue:" << rule.string << extra;
1110                 return false;
1111             }
1112 
1113             // replace \c, \xhhh, \x{hhh...}, \0dd, \o{ddd}, \uhhhh, with _
1114             static const QRegularExpression sanitize1(QStringLiteral(REG_ESCAPE_CHAR));
1115             reg.replace(sanitize1, QStringLiteral("_"));
1116 
1117 #undef REG_CHAR
1118 #undef REG_ESCAPE_CHAR
1119 
1120             // use minimal or lazy operator
1121             static const QRegularExpression isMinimal(QStringLiteral("(?![.][*+?][$]?[)]*$)[.][*+?][^?+]"));
1122             static const QRegularExpression hasNotGreedy(QStringLiteral("[*+?][?+]"));
1123 
1124             if (rule.lookAhead == XmlBool::True && rule.minimal != XmlBool::True && reg.contains(isMinimal) && !reg.contains(hasNotGreedy)
1125                 && (!rule.context.context || !rule.context.context->hasDynamicRule || regexp.captureCount() == 0)
1126                 && (reg.back() != QLatin1Char('$') || reg.contains(QLatin1Char('|')))) {
1127                 qWarning() << filename << "line" << rule.line
1128                            << "RegExpr should be have minimal=\"1\" or use lazy operator (i.g, '.*' -> '.*?'):" << rule.string;
1129                 return false;
1130             }
1131 
1132             // replace [:...:] with ___
1133             static const QRegularExpression sanitize2(QStringLiteral(R"(\[:\w+:\])"));
1134             reg.replace(sanitize2, QStringLiteral("___"));
1135 
1136             // replace [ccc...], [special] with ...
1137             static const QRegularExpression sanitize3(QStringLiteral(R"(\[(?:\^\]?[^]]*|\]?[^]\\]*?\\.[^]]*|\][^]]{2,}|[^]]{3,})\]|(\[\]?[^]]*\]))"));
1138             reg.replace(sanitize3, QStringLiteral("...\\1"));
1139 
1140             // replace [c] with _
1141             static const QRegularExpression sanitize4(QStringLiteral(R"(\[.\])"));
1142             reg.replace(sanitize4, QStringLiteral("_"));
1143 
1144             const int len = reg.size();
1145             // replace [cC] with _
1146             static const QRegularExpression toInsensitive(QStringLiteral(R"(\[(?:([^]])\1)\])"));
1147             reg = reg.toUpper();
1148             reg.replace(toInsensitive, QString());
1149 
1150             // is StringDetect
1151             // ignore (?:, ) and {n}
1152             static const QRegularExpression isStringDetect(QStringLiteral(R"(^\^?(?:[^|\\?*+$^[{(.]|{(?!\d+,\d*}|,\d+})|\(\?:)+$)"));
1153             if (reg.contains(isStringDetect)) {
1154                 char const *extraMsg = rule.string.contains(QLatin1Char('^')) ? "+ column=\"0\" or firstNonSpace=\"1\"" : "";
1155                 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by StringDetect / Detect2Chars / DetectChar" << extraMsg
1156                            << ":" << rule.string;
1157                 if (len != reg.size()) {
1158                     qWarning() << rule.filename << "line" << rule.line << "insensitive=\"1\" missing:" << rule.string;
1159                 }
1160                 return false;
1161             }
1162 
1163             // column="0" or firstNonSpace="1"
1164             if (rule.column == -1 && rule.firstNonSpace != XmlBool::True) {
1165                 // ^ without |
1166                 // (^sas*) -> ok
1167                 // (^sa|s*) -> ko
1168                 // (^(sa|s*)) -> ok
1169                 auto first = std::as_const(reg).begin();
1170                 auto last = std::as_const(reg).end();
1171                 int depth = 0;
1172 
1173                 while (QLatin1Char('(') == *first) {
1174                     ++depth;
1175                     ++first;
1176                     if (QLatin1Char('?') == *first || QLatin1Char(':') == first[1]) {
1177                         first += 2;
1178                     }
1179                 }
1180 
1181                 if (QLatin1Char('^') == *first) {
1182                     const int bolDepth = depth;
1183                     bool replace = true;
1184 
1185                     while (++first != last) {
1186                         if (QLatin1Char('(') == *first) {
1187                             ++depth;
1188                         } else if (QLatin1Char(')') == *first) {
1189                             --depth;
1190                             if (depth < bolDepth) {
1191                                 // (^a)? === (^a|) -> ko
1192                                 if (first + 1 != last && QStringLiteral("*?").contains(first[1])) {
1193                                     replace = false;
1194                                     break;
1195                                 }
1196                             }
1197                         } else if (QLatin1Char('|') == *first) {
1198                             // ignore '|' within subgroup
1199                             if (depth <= bolDepth) {
1200                                 replace = false;
1201                                 break;
1202                             }
1203                         }
1204                     }
1205 
1206                     if (replace) {
1207                         qWarning() << rule.filename << "line" << rule.line << "column=\"0\" or firstNonSpace=\"1\" missing with RegExpr:" << rule.string;
1208                         return false;
1209                     }
1210                 }
1211             }
1212 
1213             // add ^ with column=0
1214             if (rule.column == 0 && !rule.isDotRegex) {
1215                 bool hasStartOfLine = false;
1216                 auto first = std::as_const(reg).begin();
1217                 auto last = std::as_const(reg).end();
1218                 for (; first != last; ++first) {
1219                     if (*first == QLatin1Char('^')) {
1220                         hasStartOfLine = true;
1221                         break;
1222                     } else if (*first == QLatin1Char('(')) {
1223                         if (last - first >= 3 && first[1] == QLatin1Char('?') && first[2] == QLatin1Char(':')) {
1224                             first += 2;
1225                         }
1226                     } else {
1227                         break;
1228                     }
1229                 }
1230 
1231                 if (!hasStartOfLine) {
1232                     qWarning() << rule.filename << "line" << rule.line
1233                                << "start of line missing in the pattern with column=\"0\" (i.e. abc -> ^abc):" << rule.string;
1234                     return false;
1235                 }
1236             }
1237 
1238             bool useCapture = false;
1239 
1240             // detection of unnecessary capture
1241             if (regexp.captureCount()) {
1242                 auto maximalCapture = [](const QString(&referenceNames)[9], const QString &s) {
1243                     int maxCapture = 9;
1244                     while (maxCapture && !s.contains(referenceNames[maxCapture - 1])) {
1245                         --maxCapture;
1246                     }
1247                     return maxCapture;
1248                 };
1249 
1250                 int maxCaptureUsed = 0;
1251                 // maximal dynamic reference
1252                 if (rule.context.context && !rule.context.stay) {
1253                     for (const auto &nextRule : rule.context.context->rules) {
1254                         if (nextRule.dynamic == XmlBool::True) {
1255                             static const QString cap[]{
1256                                 QStringLiteral("%1"),
1257                                 QStringLiteral("%2"),
1258                                 QStringLiteral("%3"),
1259                                 QStringLiteral("%4"),
1260                                 QStringLiteral("%5"),
1261                                 QStringLiteral("%6"),
1262                                 QStringLiteral("%7"),
1263                                 QStringLiteral("%8"),
1264                                 QStringLiteral("%9"),
1265                             };
1266                             int maxDynamicCapture = maximalCapture(cap, nextRule.string);
1267                             maxCaptureUsed = std::max(maxCaptureUsed, maxDynamicCapture);
1268                         }
1269                     }
1270                 }
1271 
1272                 static const QString num1[]{
1273                     QStringLiteral("\\1"),
1274                     QStringLiteral("\\2"),
1275                     QStringLiteral("\\3"),
1276                     QStringLiteral("\\4"),
1277                     QStringLiteral("\\5"),
1278                     QStringLiteral("\\6"),
1279                     QStringLiteral("\\7"),
1280                     QStringLiteral("\\8"),
1281                     QStringLiteral("\\9"),
1282                 };
1283                 static const QString num2[]{
1284                     QStringLiteral("\\g1"),
1285                     QStringLiteral("\\g2"),
1286                     QStringLiteral("\\g3"),
1287                     QStringLiteral("\\g4"),
1288                     QStringLiteral("\\g5"),
1289                     QStringLiteral("\\g6"),
1290                     QStringLiteral("\\g7"),
1291                     QStringLiteral("\\g8"),
1292                     QStringLiteral("\\g9"),
1293                 };
1294                 const int maxBackReference = std::max(maximalCapture(num1, rule.string), maximalCapture(num1, rule.string));
1295 
1296                 const int maxCapture = std::max(maxCaptureUsed, maxBackReference);
1297 
1298                 if (maxCapture && regexp.captureCount() > maxCapture) {
1299                     qWarning() << rule.filename << "line" << rule.line << "RegExpr with" << regexp.captureCount() << "captures but only" << maxCapture
1300                                << "are used. Please, replace '(...)' with '(?:...)':" << rule.string;
1301                     return false;
1302                 }
1303 
1304                 useCapture = maxCapture;
1305             }
1306 
1307             if (!useCapture) {
1308                 // is DetectIdentifier
1309                 static const QRegularExpression isInsensitiveDetectIdentifier(
1310                     QStringLiteral(R"(^(\((\?:)?)?\[((a-z|_){2}|(A-Z|_){2})\]([+][*?]?)?\[((0-9|a-z|_){3}|(0-9|A-Z|_){3})\][*][*?]?(\))?$)"));
1311                 static const QRegularExpression isSensitiveDetectIdentifier(
1312                     QStringLiteral(R"(^(\((\?:)?)?\[(a-z|A-Z|_){3}\]([+][*?]?)?\[(0-9|a-z|A-Z|_){4}\][*][*?]?(\))?$)"));
1313                 auto &isDetectIdentifier = (rule.insensitive == XmlBool::True) ? isInsensitiveDetectIdentifier : isSensitiveDetectIdentifier;
1314                 if (rule.string.contains(isDetectIdentifier)) {
1315                     qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by DetectIdentifier:" << rule.string;
1316                     return false;
1317                 }
1318             }
1319 
1320             if (rule.isDotRegex) {
1321                 // search next rule with same column or firstNonSpace
1322                 int i = &rule - context.rules.data() + 1;
1323                 const bool hasColumn = (rule.column != -1);
1324                 const bool hasFirstNonSpace = (rule.firstNonSpace == XmlBool::True);
1325                 const bool isSpecial = (hasColumn || hasFirstNonSpace);
1326                 for (; i < context.rules.size(); ++i) {
1327                     auto &rule2 = context.rules[i];
1328                     if (rule2.type == Context::Rule::Type::IncludeRules && isSpecial) {
1329                         i = context.rules.size();
1330                         break;
1331                     }
1332 
1333                     const bool hasColumn2 = (rule2.column != -1);
1334                     const bool hasFirstNonSpace2 = (rule2.firstNonSpace == XmlBool::True);
1335                     if ((!isSpecial && !hasColumn2 && !hasFirstNonSpace2) || (hasColumn && rule.column == rule2.column)
1336                         || (hasFirstNonSpace && hasFirstNonSpace2)) {
1337                         break;
1338                     }
1339                 }
1340 
1341                 auto ruleFilename = (filename == rule.filename) ? QString() : QStringLiteral("in ") + rule.filename;
1342                 if (i == context.rules.size()) {
1343                     if (rule.lookAhead == XmlBool::True && rule.firstNonSpace != XmlBool::True && rule.column == -1 && rule.beginRegion.isEmpty()
1344                         && rule.endRegion.isEmpty() && !useCapture) {
1345                         qWarning() << filename << "context line" << context.line << ": RegExpr line" << rule.line << ruleFilename
1346                                    << "should be replaced by fallthroughContext:" << rule.string;
1347                     }
1348                 } else {
1349                     auto &nextRule = context.rules[i];
1350                     auto nextRuleFilename = (filename == nextRule.filename) ? QString() : QStringLiteral("in ") + nextRule.filename;
1351                     qWarning() << filename << "context line" << context.line << "contains unreachable element line" << nextRule.line << nextRuleFilename
1352                                << "because a dot RegExpr is used line" << rule.line << ruleFilename;
1353                 }
1354 
1355                 // unnecessary quantifier
1356                 static const QRegularExpression unnecessaryQuantifier1(QStringLiteral(R"([*+?]([.][*+?]{0,2})?$)"));
1357                 static const QRegularExpression unnecessaryQuantifier2(QStringLiteral(R"([*+?]([.][*+?]{0,2})?[)]*$)"));
1358                 auto &unnecessaryQuantifier = useCapture ? unnecessaryQuantifier1 : unnecessaryQuantifier2;
1359                 if (rule.lookAhead == XmlBool::True && rule.minimal != XmlBool::True && reg.contains(unnecessaryQuantifier)) {
1360                     qWarning() << filename << "line" << rule.line
1361                                << "Last quantifier is not necessary (i.g., 'xyz*' -> 'xy', 'xyz+.' -> 'xyz.'):" << rule.string;
1362                     return false;
1363                 }
1364             }
1365         }
1366 
1367         return true;
1368     }
1369 
1370     // Parse and check <emptyLine>
1371     bool parseEmptyLine(const QString &filename, QXmlStreamReader &xml)
1372     {
1373         bool success = true;
1374 
1375         QString pattern;
1376         XmlBool casesensitive{};
1377 
1378         for (auto &attr : xml.attributes()) {
1379             Parser parser{filename, xml, attr, success};
1380 
1381             const bool isExtracted =
1382                 parser.extractString(pattern, QStringLiteral("regexpr")) || parser.extractXmlBool(casesensitive, QStringLiteral("casesensitive"));
1383 
1384             success = parser.checkIfExtracted(isExtracted);
1385         }
1386 
1387         if (pattern.isEmpty()) {
1388             qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: regexpr";
1389             success = false;
1390         } else {
1391             success = checkRegularExpression(filename, QRegularExpression(pattern), xml.lineNumber());
1392         }
1393 
1394         return success;
1395     }
1396 
1397     //! Check that a regular expression:
1398     //! - isValid()
1399     //! - character ranges such as [A-Z] are valid and not accidentally e.g. [A-z].
1400     bool checkRegularExpression(const QString &filename, const QRegularExpression &regexp, int line) const
1401     {
1402         const auto pattern = regexp.pattern();
1403 
1404         // validate regexp
1405         if (!regexp.isValid()) {
1406             qWarning() << filename << "line" << line << "broken regex:" << pattern << "problem:" << regexp.errorString() << "at offset"
1407                        << regexp.patternErrorOffset();
1408             return false;
1409         }
1410 
1411         // catch possible case typos: [A-z] or [a-Z]
1412         const int azOffset = std::max(pattern.indexOf(QStringLiteral("A-z")), pattern.indexOf(QStringLiteral("a-Z")));
1413         if (azOffset >= 0) {
1414             qWarning() << filename << "line" << line << "broken regex:" << pattern << "problem: [a-Z] or [A-z] at offset" << azOffset;
1415             return false;
1416         }
1417 
1418         return true;
1419     }
1420 
1421     //! Search for rules with lookAhead="true" and context="#stay".
1422     //! This would cause an infinite loop.
1423     bool checkfallthrough(const Definition &definition, const Context &context) const
1424     {
1425         bool success = true;
1426 
1427         if (!context.fallthroughContext.name.isEmpty()) {
1428             if (context.fallthroughContext.stay) {
1429                 qWarning() << definition.filename << "line" << context.line << "possible infinite loop due to fallthroughContext=\"#stay\" in context "
1430                            << context.name;
1431                 success = false;
1432             }
1433 
1434             const bool mandatoryFallthroughAttribute = definition.kateVersion < Version{5, 62};
1435             if (context.fallthrough == XmlBool::True && !mandatoryFallthroughAttribute) {
1436                 qWarning() << definition.filename << "line" << context.line << "fallthrough attribute is unnecessary with kateversion >= 5.62 in context"
1437                            << context.name;
1438                 success = false;
1439             } else if (context.fallthrough != XmlBool::True && mandatoryFallthroughAttribute) {
1440                 qWarning() << definition.filename << "line" << context.line
1441                            << "fallthroughContext attribute without fallthrough=\"1\" attribute is only valid with kateversion >= 5.62 in context"
1442                            << context.name;
1443                 success = false;
1444             }
1445         }
1446 
1447         return success;
1448     }
1449 
1450     //! Search for additionalDeliminator/weakDeliminator which has no effect.
1451     bool checkDelimiters(const Definition &definition, const Context::Rule &rule) const
1452     {
1453         if (rule.additionalDeliminator.isEmpty() && rule.weakDeliminator.isEmpty()) {
1454             return true;
1455         }
1456 
1457         bool success = true;
1458 
1459         if (definition.kateVersion < Version{5, 79}) {
1460             qWarning() << definition.filename << "line" << rule.line
1461                        << "additionalDeliminator and weakDeliminator are only available since version \"5.79\". Please, increase kateversion.";
1462             success = false;
1463         }
1464 
1465         for (QChar c : rule.additionalDeliminator) {
1466             if (!definition.wordDelimiters.contains(c)) {
1467                 return success;
1468             }
1469         }
1470 
1471         for (QChar c : rule.weakDeliminator) {
1472             if (definition.wordDelimiters.contains(c)) {
1473                 return success;
1474             }
1475         }
1476 
1477         qWarning() << rule.filename << "line" << rule.line << "unnecessary use of additionalDeliminator and/or weakDeliminator" << rule.string;
1478         return false;
1479     }
1480 
1481     //! Search for rules with lookAhead="true" and context="#stay".
1482     //! This would cause an infinite loop.
1483     bool checkKeyword(const Definition &definition, const Context::Rule &rule, QSet<const Keywords *> &referencedKeywords) const
1484     {
1485         if (rule.type == Context::Rule::Type::keyword) {
1486             auto it = definition.keywordsList.find(rule.string);
1487             if (it != definition.keywordsList.end()) {
1488                 referencedKeywords.insert(&*it);
1489             } else {
1490                 qWarning() << rule.filename << "line" << rule.line << "reference of non-existing keyword list:" << rule.string;
1491                 return false;
1492             }
1493         }
1494         return true;
1495     }
1496 
1497     //! Search for rules with lookAhead="true" and context="#stay".
1498     //! This would cause an infinite loop.
1499     bool checkLookAhead(const Context::Rule &rule) const
1500     {
1501         if (rule.lookAhead == XmlBool::True && rule.context.stay) {
1502             qWarning() << rule.filename << "line" << rule.line << "infinite loop: lookAhead with context #stay";
1503         }
1504         return true;
1505     }
1506 
1507     //! Check that StringDetect contains more that 2 characters
1508     //! Fix with following command:
1509     //! \code
1510     //!   sed -E
1511     //!   '/StringDetect/{/dynamic="(1|true)|insensitive="(1|true)/!{s/StringDetect(.*)String="(.|&lt;|&gt;|&quot;|&amp;)(.|&lt;|&gt;|&quot;|&amp;)"/Detect2Chars\1char="\2"
1512     //!   char1="\3"/;t;s/StringDetect(.*)String="(.|&lt;|&gt;|&quot;|&amp;)"/DetectChar\1char="\2"/}}' -i file.xml...
1513     //! \endcode
1514     bool checkStringDetect(const Context::Rule &rule) const
1515     {
1516         if (rule.type == Context::Rule::Type::StringDetect) {
1517             // dynamic == true and no place holder?
1518             if (rule.dynamic == XmlBool::True) {
1519                 static const QRegularExpression placeHolder(QStringLiteral("%\\d+"));
1520                 if (!rule.string.contains(placeHolder)) {
1521                     qWarning() << rule.filename << "line" << rule.line << "broken regex:" << rule.string << "problem: dynamic=true but no %\\d+ placeholder";
1522                     return false;
1523                 }
1524             }
1525         }
1526         return true;
1527     }
1528 
1529     //! Check \<include> and delimiter in a keyword list
1530     bool checkKeywordsList(const Definition &definition, QSet<const Keywords *> &referencedKeywords) const
1531     {
1532         bool success = true;
1533 
1534         bool includeNotSupport = (definition.kateVersion < Version{5, 53});
1535         QMapIterator<QString, Keywords> keywordsIt(definition.keywordsList);
1536         while (keywordsIt.hasNext()) {
1537             keywordsIt.next();
1538 
1539             for (const auto &include : keywordsIt.value().items.includes) {
1540                 if (includeNotSupport) {
1541                     qWarning() << definition.filename << "line" << include.line
1542                                << "<include> is only available since version \"5.53\". Please, increase kateversion.";
1543                     success = false;
1544                 }
1545                 success = checkKeywordInclude(definition, include, referencedKeywords) && success;
1546             }
1547 
1548             // Check that keyword list items do not have deliminator character
1549 #if 0
1550             for (const auto& keyword : keywordsIt.value().items.keywords) {
1551                 for (QChar c : keyword.content) {
1552                     if (definition.wordDelimiters.contains(c)) {
1553                         qWarning() << definition.filename << "line" << keyword.line << "keyword with delimiter:" << c << "in" << keyword.content;
1554                         success = false;
1555                     }
1556                 }
1557             }
1558 #endif
1559         }
1560 
1561         return success;
1562     }
1563 
1564     //! Search for non-existing keyword include.
1565     bool checkKeywordInclude(const Definition &definition, const Keywords::Items::Item &include, QSet<const Keywords *> &referencedKeywords) const
1566     {
1567         bool containsKeywordName = true;
1568         int const idx = include.content.indexOf(QStringLiteral("##"));
1569         if (idx == -1) {
1570             auto it = definition.keywordsList.find(include.content);
1571             containsKeywordName = (it != definition.keywordsList.end());
1572             if (containsKeywordName) {
1573                 referencedKeywords.insert(&*it);
1574             }
1575         } else {
1576             auto defName = include.content.mid(idx + 2);
1577             auto listName = include.content.left(idx);
1578             auto it = m_definitions.find(defName);
1579             if (it == m_definitions.end()) {
1580                 qWarning() << definition.filename << "line" << include.line << "unknown definition in" << include.content;
1581                 return false;
1582             }
1583             containsKeywordName = it->keywordsList.contains(listName);
1584         }
1585 
1586         if (!containsKeywordName) {
1587             qWarning() << definition.filename << "line" << include.line << "unknown keyword name in" << include.content;
1588         }
1589 
1590         return containsKeywordName;
1591     }
1592 
1593     //! Check if a rule is hidden by another
1594     //! - rule hidden by DetectChar or AnyChar
1595     //! - DetectSpaces, AnyChar, Int, Float with all their characters hidden by DetectChar or AnyChar
1596     //! - StringDetect, WordDetect, RegExpr with as prefix Detect2Chars or other strings
1597     //! - duplicate rule (Int, Float, keyword with same String, etc)
1598     //! - Rule hidden by a dot regex
1599     bool checkUreachableRules(const QString &filename,
1600                               const Context &context,
1601                               QMap<const Context::Rule *, IncludedRuleUnreachableBy> &unreachableIncludedRules) const
1602     {
1603         if (context.isOnlyIncluded) {
1604             return true;
1605         }
1606 
1607         struct Rule4 {
1608             RuleAndInclude setRule(const Context::Rule &rule, const Context::Rule *includeRules = nullptr)
1609             {
1610                 auto set = [&](RuleAndInclude &ruleAndInclude) {
1611                     auto old = ruleAndInclude;
1612                     ruleAndInclude = {&rule, includeRules};
1613                     return old;
1614                 };
1615 
1616                 if (rule.firstNonSpace == XmlBool::True) {
1617                     return set(firstNonSpace);
1618                 } else if (rule.column == 0) {
1619                     return set(column0);
1620                 } else if (rule.column > 0) {
1621                     return set(columnGreaterThan0[rule.column]);
1622                 } else {
1623                     return set(normal);
1624                 }
1625             }
1626 
1627         private:
1628             RuleAndInclude normal;
1629             RuleAndInclude column0;
1630             QMap<int, RuleAndInclude> columnGreaterThan0;
1631             RuleAndInclude firstNonSpace;
1632         };
1633 
1634         // Associate QChar with RuleAndInclude
1635         struct CharTable {
1636             /// Search RuleAndInclude associated with @p c.
1637             RuleAndInclude find(QChar c) const
1638             {
1639                 if (c.unicode() < 128) {
1640                     return m_asciiMap[c.unicode()];
1641                 }
1642                 auto it = m_utf8Map.find(c);
1643                 return it == m_utf8Map.end() ? RuleAndInclude{nullptr, nullptr} : it.value();
1644             }
1645 
1646             /// Search RuleAndInclude associated with the characters of @p s.
1647             /// \return an empty QVector when at least one character is not found.
1648             QVector<RuleAndInclude> find(QStringView s) const
1649             {
1650                 QVector<RuleAndInclude> result;
1651 
1652                 for (QChar c : s) {
1653                     if (!find(c)) {
1654                         return result;
1655                     }
1656                 }
1657 
1658                 for (QChar c : s) {
1659                     result.append(find(c));
1660                 }
1661 
1662                 return result;
1663             }
1664 
1665             /// Associates @p c with a rule.
1666             void append(QChar c, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
1667             {
1668                 if (c.unicode() < 128) {
1669                     m_asciiMap[c.unicode()] = {&rule, includeRule};
1670                 } else {
1671                     m_utf8Map[c] = {&rule, includeRule};
1672                 }
1673             }
1674 
1675             /// Associates each character of @p s with a rule.
1676             void append(QStringView s, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
1677             {
1678                 for (QChar c : s) {
1679                     append(c, rule, includeRule);
1680                 }
1681             }
1682 
1683         private:
1684             RuleAndInclude m_asciiMap[127]{};
1685             QMap<QChar, RuleAndInclude> m_utf8Map;
1686         };
1687 
1688         struct Char4Tables {
1689             CharTable chars;
1690             CharTable charsColumn0;
1691             QMap<int, CharTable> charsColumnGreaterThan0;
1692             CharTable charsFirstNonSpace;
1693         };
1694 
1695         // View on Char4Tables members
1696         struct CharTableArray {
1697             // Append Char4Tables members that satisfies firstNonSpace and column.
1698             // Char4Tables::char is always added.
1699             CharTableArray(Char4Tables &tables, const Context::Rule &rule)
1700             {
1701                 if (rule.firstNonSpace == XmlBool::True) {
1702                     appendTable(tables.charsFirstNonSpace);
1703                 }
1704 
1705                 if (rule.column == 0) {
1706                     appendTable(tables.charsColumn0);
1707                 } else if (rule.column > 0) {
1708                     appendTable(tables.charsColumnGreaterThan0[rule.column]);
1709                 }
1710 
1711                 appendTable(tables.chars);
1712             }
1713 
1714             // Removes Char4Tables::chars when the rule contains firstNonSpace or column
1715             void removeNonSpecialWhenSpecial()
1716             {
1717                 if (m_size > 1) {
1718                     --m_size;
1719                 }
1720             }
1721 
1722             /// Search RuleAndInclude associated with @p c.
1723             RuleAndInclude find(QChar c) const
1724             {
1725                 for (int i = 0; i < m_size; ++i) {
1726                     if (auto ruleAndInclude = m_charTables[i]->find(c)) {
1727                         return ruleAndInclude;
1728                     }
1729                 }
1730                 return RuleAndInclude{nullptr, nullptr};
1731             }
1732 
1733             /// Search RuleAndInclude associated with the characters of @p s.
1734             /// \return an empty QVector when at least one character is not found.
1735             QVector<RuleAndInclude> find(QStringView s) const
1736             {
1737                 for (int i = 0; i < m_size; ++i) {
1738                     auto result = m_charTables[i]->find(s);
1739                     if (result.size()) {
1740                         while (++i < m_size) {
1741                             result.append(m_charTables[i]->find(s));
1742                         }
1743                         return result;
1744                     }
1745                 }
1746                 return QVector<RuleAndInclude>();
1747             }
1748 
1749             /// Associates @p c with a rule.
1750             void append(QChar c, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
1751             {
1752                 for (int i = 0; i < m_size; ++i) {
1753                     m_charTables[i]->append(c, rule, includeRule);
1754                 }
1755             }
1756 
1757             /// Associates each character of @p s with a rule.
1758             void append(QStringView s, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
1759             {
1760                 for (int i = 0; i < m_size; ++i) {
1761                     m_charTables[i]->append(s, rule, includeRule);
1762                 }
1763             }
1764 
1765         private:
1766             void appendTable(CharTable &t)
1767             {
1768                 m_charTables[m_size] = &t;
1769                 ++m_size;
1770             }
1771 
1772             CharTable *m_charTables[3];
1773             int m_size = 0;
1774         };
1775 
1776         struct ObservableRule {
1777             const Context::Rule *rule;
1778             const Context::Rule *includeRules;
1779 
1780             bool hasResolvedIncludeRules() const
1781             {
1782                 return rule == includeRules;
1783             }
1784         };
1785 
1786         // Iterates over all the rules, including those in includedRules
1787         struct RuleIterator {
1788             RuleIterator(const QVector<ObservableRule> &rules, const ObservableRule &endRule)
1789                 : m_end(&endRule - rules.data())
1790                 , m_rules(rules)
1791             {
1792             }
1793 
1794             /// \return next rule or nullptr
1795             const Context::Rule *next()
1796             {
1797                 // if in includedRules
1798                 if (m_includedRules) {
1799                     ++m_i2;
1800                     if (m_i2 != m_includedRules->size()) {
1801                         return (*m_includedRules)[m_i2];
1802                     }
1803                     ++m_i;
1804                     m_includedRules = nullptr;
1805                 }
1806 
1807                 // if is a includedRules
1808                 while (m_i < m_end && m_rules[m_i].rule->type == Context::Rule::Type::IncludeRules) {
1809                     if (!m_rules[m_i].includeRules && m_rules[m_i].rule->includedRules.size()) {
1810                         m_i2 = 0;
1811                         m_includedRules = &m_rules[m_i].rule->includedRules;
1812                         return (*m_includedRules)[m_i2];
1813                     }
1814                     ++m_i;
1815                 }
1816 
1817                 if (m_i < m_end) {
1818                     ++m_i;
1819                     return m_rules[m_i - 1].rule;
1820                 }
1821 
1822                 return nullptr;
1823             }
1824 
1825             /// \return current IncludeRules or nullptr
1826             const Context::Rule *currentIncludeRules() const
1827             {
1828                 return m_includedRules ? m_rules[m_i].rule : m_rules[m_i].includeRules;
1829             }
1830 
1831         private:
1832             int m_i = 0;
1833             int m_i2;
1834             int m_end;
1835             const QVector<ObservableRule> &m_rules;
1836             const QVector<const Context::Rule *> *m_includedRules = nullptr;
1837         };
1838 
1839         // Dot regex container that satisfies firstNonSpace and column.
1840         struct DotRegex {
1841             /// Append a dot regex rule.
1842             void append(const Context::Rule &rule, const Context::Rule *includedRule)
1843             {
1844                 auto array = extractDotRegexes(rule);
1845                 if (array[0]) {
1846                     *array[0] = {&rule, includedRule};
1847                 }
1848                 if (array[1]) {
1849                     *array[1] = {&rule, includedRule};
1850                 }
1851             }
1852 
1853             /// Search dot regex which hides @p rule
1854             RuleAndInclude find(const Context::Rule &rule)
1855             {
1856                 auto array = extractDotRegexes(rule);
1857                 if (array[0]) {
1858                     return *array[0];
1859                 }
1860                 if (array[1]) {
1861                     return *array[1];
1862                 }
1863                 return RuleAndInclude{};
1864             }
1865 
1866         private:
1867             using Array = std::array<RuleAndInclude *, 2>;
1868 
1869             Array extractDotRegexes(const Context::Rule &rule)
1870             {
1871                 Array ret{};
1872 
1873                 if (rule.firstNonSpace != XmlBool::True && rule.column == -1) {
1874                     ret[0] = &dotRegex;
1875                 } else {
1876                     if (rule.firstNonSpace == XmlBool::True) {
1877                         ret[0] = &dotRegexFirstNonSpace;
1878                     }
1879 
1880                     if (rule.column == 0) {
1881                         ret[1] = &dotRegexColumn0;
1882                     } else if (rule.column > 0) {
1883                         ret[1] = &dotRegexColumnGreaterThan0[rule.column];
1884                     }
1885                 }
1886 
1887                 return ret;
1888             }
1889 
1890             RuleAndInclude dotRegex{};
1891             RuleAndInclude dotRegexColumn0{};
1892             QMap<int, RuleAndInclude> dotRegexColumnGreaterThan0{};
1893             RuleAndInclude dotRegexFirstNonSpace{};
1894         };
1895 
1896         bool success = true;
1897 
1898         // characters of DetectChar/AnyChar
1899         Char4Tables detectChars;
1900         // characters of dynamic DetectChar
1901         Char4Tables dynamicDetectChars;
1902         // characters of LineContinue
1903         Char4Tables lineContinueChars;
1904 
1905         Rule4 intRule{};
1906         Rule4 floatRule{};
1907         Rule4 hlCCharRule{};
1908         Rule4 hlCOctRule{};
1909         Rule4 hlCHexRule{};
1910         Rule4 hlCStringCharRule{};
1911         Rule4 detectIdentifierRule{};
1912 
1913         // Contains includedRules and included includedRules
1914         QMap<Context const *, RuleAndInclude> includeContexts;
1915 
1916         DotRegex dotRegex;
1917 
1918         QVector<ObservableRule> observedRules;
1919         observedRules.reserve(context.rules.size());
1920         for (const Context::Rule &rule : context.rules) {
1921             const Context::Rule *includeRule = nullptr;
1922             if (rule.type == Context::Rule::Type::IncludeRules) {
1923                 auto *context = rule.context.context;
1924                 if (context && context->isOnlyIncluded) {
1925                     includeRule = &rule;
1926                 }
1927             }
1928 
1929             observedRules.push_back({&rule, includeRule});
1930             if (includeRule) {
1931                 for (const Context::Rule *rule2 : rule.includedRules) {
1932                     observedRules.push_back({rule2, includeRule});
1933                 }
1934             }
1935         }
1936 
1937         for (auto &observedRule : observedRules) {
1938             const Context::Rule &rule = *observedRule.rule;
1939             bool isUnreachable = false;
1940             QVector<RuleAndInclude> unreachableBy;
1941 
1942             // declare rule as unreachable if ruleAndInclude is not empty
1943             auto updateUnreachable1 = [&](RuleAndInclude ruleAndInclude) {
1944                 if (ruleAndInclude) {
1945                     isUnreachable = true;
1946                     unreachableBy.append(ruleAndInclude);
1947                 }
1948             };
1949 
1950             // declare rule as unreachable if ruleAndIncludes is not empty
1951             auto updateUnreachable2 = [&](const QVector<RuleAndInclude> &ruleAndIncludes) {
1952                 if (!ruleAndIncludes.isEmpty()) {
1953                     isUnreachable = true;
1954                     unreachableBy.append(ruleAndIncludes);
1955                 }
1956             };
1957 
1958             // check if rule2.firstNonSpace/column is compatible with those of rule
1959             auto isCompatible = [&rule](Context::Rule const &rule2) {
1960                 return (rule2.firstNonSpace != XmlBool::True && rule2.column == -1) || (rule.column == rule2.column && rule.column != -1)
1961                     || (rule.firstNonSpace == rule2.firstNonSpace && rule.firstNonSpace == XmlBool::True);
1962             };
1963 
1964             updateUnreachable1(dotRegex.find(rule));
1965 
1966             switch (rule.type) {
1967             // checks if hidden by DetectChar/AnyChar
1968             // then add the characters to detectChars
1969             case Context::Rule::Type::AnyChar: {
1970                 auto tables = CharTableArray(detectChars, rule);
1971                 updateUnreachable2(tables.find(rule.string));
1972                 tables.removeNonSpecialWhenSpecial();
1973                 tables.append(rule.string, rule);
1974                 break;
1975             }
1976 
1977             // check if is hidden by DetectChar/AnyChar
1978             // then add the characters to detectChars or dynamicDetectChars
1979             case Context::Rule::Type::DetectChar: {
1980                 auto &chars4 = (rule.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars;
1981                 auto tables = CharTableArray(chars4, rule);
1982                 updateUnreachable1(tables.find(rule.char0));
1983                 tables.removeNonSpecialWhenSpecial();
1984                 tables.append(rule.char0, rule);
1985                 break;
1986             }
1987 
1988             // check if hidden by DetectChar/AnyChar
1989             // then add spaces characters to detectChars
1990             case Context::Rule::Type::DetectSpaces: {
1991                 auto tables = CharTableArray(detectChars, rule);
1992                 updateUnreachable2(tables.find(QStringLiteral(" \t")));
1993                 tables.removeNonSpecialWhenSpecial();
1994                 tables.append(QLatin1Char(' '), rule);
1995                 tables.append(QLatin1Char('\t'), rule);
1996                 break;
1997             }
1998 
1999             // check if hidden by DetectChar/AnyChar
2000             case Context::Rule::Type::HlCChar:
2001                 updateUnreachable1(CharTableArray(detectChars, rule).find(QLatin1Char('\'')));
2002                 updateUnreachable1(hlCCharRule.setRule(rule));
2003                 break;
2004 
2005             // check if hidden by DetectChar/AnyChar
2006             case Context::Rule::Type::HlCHex:
2007                 updateUnreachable1(CharTableArray(detectChars, rule).find(QLatin1Char('0')));
2008                 updateUnreachable1(hlCHexRule.setRule(rule));
2009                 break;
2010 
2011             // check if hidden by DetectChar/AnyChar
2012             case Context::Rule::Type::HlCOct:
2013                 updateUnreachable1(CharTableArray(detectChars, rule).find(QLatin1Char('0')));
2014                 updateUnreachable1(hlCOctRule.setRule(rule));
2015                 break;
2016 
2017             // check if hidden by DetectChar/AnyChar
2018             case Context::Rule::Type::HlCStringChar:
2019                 updateUnreachable1(CharTableArray(detectChars, rule).find(QLatin1Char('\\')));
2020                 updateUnreachable1(hlCStringCharRule.setRule(rule));
2021                 break;
2022 
2023             // check if hidden by DetectChar/AnyChar
2024             case Context::Rule::Type::Int:
2025                 updateUnreachable2(CharTableArray(detectChars, rule).find(QStringLiteral("0123456789")));
2026                 updateUnreachable1(intRule.setRule(rule));
2027                 break;
2028 
2029             // check if hidden by DetectChar/AnyChar
2030             case Context::Rule::Type::Float:
2031                 updateUnreachable2(CharTableArray(detectChars, rule).find(QStringLiteral("0123456789.")));
2032                 updateUnreachable1(floatRule.setRule(rule));
2033                 break;
2034 
2035             // check if hidden by another DetectIdentifier rule
2036             case Context::Rule::Type::DetectIdentifier:
2037                 updateUnreachable1(detectIdentifierRule.setRule(rule));
2038                 break;
2039 
2040             // check if hidden by DetectChar/AnyChar or another LineContinue
2041             case Context::Rule::Type::LineContinue: {
2042                 updateUnreachable1(CharTableArray(detectChars, rule).find(rule.char0));
2043 
2044                 auto tables = CharTableArray(lineContinueChars, rule);
2045                 updateUnreachable1(tables.find(rule.char0));
2046                 tables.removeNonSpecialWhenSpecial();
2047                 tables.append(rule.char0, rule);
2048                 break;
2049             }
2050 
2051             // check if hidden by DetectChar/AnyChar or another Detect2Chars/RangeDetect
2052             case Context::Rule::Type::Detect2Chars:
2053             case Context::Rule::Type::RangeDetect:
2054                 updateUnreachable1(CharTableArray(detectChars, rule).find(rule.char0));
2055                 if (!isUnreachable) {
2056                     RuleIterator ruleIterator(observedRules, observedRule);
2057                     while (const auto *rulePtr = ruleIterator.next()) {
2058                         if (isUnreachable) {
2059                             break;
2060                         }
2061                         const auto &rule2 = *rulePtr;
2062                         if (rule2.type == rule.type && isCompatible(rule2) && rule.char0 == rule2.char0 && rule.char1 == rule2.char1) {
2063                             updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2064                         }
2065                     }
2066                 }
2067                 break;
2068 
2069             case Context::Rule::Type::RegExpr: {
2070                 if (rule.isDotRegex) {
2071                     dotRegex.append(rule, nullptr);
2072                     break;
2073                 }
2074 
2075                 // check that `rule` does not have another RegExpr as a prefix
2076                 RuleIterator ruleIterator(observedRules, observedRule);
2077                 while (const auto *rulePtr = ruleIterator.next()) {
2078                     if (isUnreachable) {
2079                         break;
2080                     }
2081                     const auto &rule2 = *rulePtr;
2082                     if (rule2.type == Context::Rule::Type::RegExpr && isCompatible(rule2) && rule.insensitive == rule2.insensitive
2083                         && rule.dynamic == rule2.dynamic && rule.sanitizedString.startsWith(rule2.sanitizedString)) {
2084                         bool add = (rule.sanitizedString.startsWith(rule2.string) || rule.sanitizedString.size() < rule2.sanitizedString.size() + 2);
2085                         if (!add) {
2086                             // \s.* (sanitized = \s) is considered hiding \s*\S
2087                             // we check the quantifiers to see if this is the case
2088                             auto c1 = rule.sanitizedString[rule2.sanitizedString.size()].unicode();
2089                             auto c2 = rule.sanitizedString[rule2.sanitizedString.size() + 1].unicode();
2090                             auto c3 = rule2.sanitizedString.back().unicode();
2091                             if (c3 == '*' || c3 == '?' || c3 == '+') {
2092                                 add = true;
2093                             } else if (c1 == '*' || c1 == '?') {
2094                                 add = !((c2 == '?' || c2 == '+') || (rule.sanitizedString.size() >= rule2.sanitizedString.size() + 3));
2095                             } else {
2096                                 add = true;
2097                             }
2098                         }
2099                         if (add) {
2100                             updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2101                         }
2102                     }
2103                 }
2104 
2105                 Q_FALLTHROUGH();
2106             }
2107             // check if a rule does not have another rule as a prefix
2108             case Context::Rule::Type::WordDetect:
2109             case Context::Rule::Type::StringDetect: {
2110                 // check that dynamic `rule` does not have another dynamic StringDetect as a prefix
2111                 if (rule.type == Context::Rule::Type::StringDetect && rule.dynamic == XmlBool::True) {
2112                     RuleIterator ruleIterator(observedRules, observedRule);
2113                     while (const auto *rulePtr = ruleIterator.next()) {
2114                         if (isUnreachable) {
2115                             break;
2116                         }
2117 
2118                         const auto &rule2 = *rulePtr;
2119                         if (rule2.type != Context::Rule::Type::StringDetect || rule2.dynamic != XmlBool::True || !isCompatible(rule2)) {
2120                             continue;
2121                         }
2122 
2123                         const bool isSensitive = (rule2.insensitive == XmlBool::True);
2124                         const auto caseSensitivity = isSensitive ? Qt::CaseInsensitive : Qt::CaseSensitive;
2125                         if ((isSensitive || rule.insensitive != XmlBool::True) && rule.string.startsWith(rule2.string, caseSensitivity)) {
2126                             updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2127                         }
2128                     }
2129                 }
2130 
2131                 // string used for comparison and truncated from "dynamic" part
2132                 QStringView s = rule.string;
2133 
2134                 // truncate to '%' with dynamic rules
2135                 if (rule.dynamic == XmlBool::True) {
2136                     static const QRegularExpression dynamicPosition(QStringLiteral(R"(^(?:[^%]*|%(?![1-9]))*)"));
2137                     auto result = dynamicPosition.match(rule.string);
2138                     s = s.left(result.capturedLength());
2139                 }
2140 
2141                 QString sanitizedRegex;
2142                 // truncate to special character with RegExpr.
2143                 // If regexp contains '|', `s` becomes empty.
2144                 if (rule.type == Context::Rule::Type::RegExpr) {
2145                     static const QRegularExpression regularChars(QStringLiteral(R"(^(?:[^.?*+^$[{(\\|]+|\\[-.?*+^$[\]{}()\\|]+|\[[^^\\]\])+)"));
2146                     static const QRegularExpression sanitizeChars(QStringLiteral(R"(\\([-.?*+^$[\]{}()\\|])|\[([^^\\])\])"));
2147                     const qsizetype result = regularChars.match(rule.string).capturedLength();
2148                     const qsizetype pos = qMin(result, s.size());
2149                     if (rule.string.indexOf(QLatin1Char('|'), pos) < pos) {
2150                         sanitizedRegex = rule.string.left(qMin(result, s.size()));
2151                         sanitizedRegex.replace(sanitizeChars, QStringLiteral("\\1"));
2152                         s = sanitizedRegex;
2153                     } else {
2154                         s = QStringView();
2155                     }
2156                 }
2157 
2158                 // check if hidden by DetectChar/AnyChar
2159                 if (s.size() > 0) {
2160                     auto t = CharTableArray(detectChars, rule);
2161                     if (rule.insensitive != XmlBool::True) {
2162                         updateUnreachable1(t.find(s[0]));
2163                     } else {
2164                         QChar c2[]{s[0].toLower(), s[0].toUpper()};
2165                         updateUnreachable2(t.find(QStringView(c2, 2)));
2166                     }
2167                 }
2168 
2169                 // check if Detect2Chars, StringDetect, WordDetect is not a prefix of s
2170                 if (s.size() > 0 && !isUnreachable) {
2171                     // combination of uppercase and lowercase
2172                     RuleAndInclude detect2CharsInsensitives[]{{}, {}, {}, {}};
2173 
2174                     RuleIterator ruleIterator(observedRules, observedRule);
2175                     while (const auto *rulePtr = ruleIterator.next()) {
2176                         if (isUnreachable) {
2177                             break;
2178                         }
2179                         const auto &rule2 = *rulePtr;
2180                         const bool isSensitive = (rule2.insensitive == XmlBool::True);
2181                         const auto caseSensitivity = isSensitive ? Qt::CaseInsensitive : Qt::CaseSensitive;
2182 
2183                         switch (rule2.type) {
2184                         // check that it is not a detectChars prefix
2185                         case Context::Rule::Type::Detect2Chars:
2186                             if (isCompatible(rule2) && s.size() >= 2) {
2187                                 if (rule.insensitive != XmlBool::True) {
2188                                     if (rule2.char0 == s[0] && rule2.char1 == s[1]) {
2189                                         updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2190                                     }
2191                                 } else {
2192                                     // when the string is case insensitive,
2193                                     // all 4 upper/lower case combinations must be found
2194                                     auto set = [&](RuleAndInclude &x, QChar c1, QChar c2) {
2195                                         if (!x && rule2.char0 == c1 && rule2.char0 == c2) {
2196                                             x = {&rule2, ruleIterator.currentIncludeRules()};
2197                                         }
2198                                     };
2199                                     set(detect2CharsInsensitives[0], s[0].toLower(), s[1].toLower());
2200                                     set(detect2CharsInsensitives[1], s[0].toLower(), s[1].toUpper());
2201                                     set(detect2CharsInsensitives[2], s[0].toUpper(), s[1].toUpper());
2202                                     set(detect2CharsInsensitives[3], s[0].toUpper(), s[1].toLower());
2203 
2204                                     if (detect2CharsInsensitives[0] && detect2CharsInsensitives[1] && detect2CharsInsensitives[2]
2205                                         && detect2CharsInsensitives[3]) {
2206                                         isUnreachable = true;
2207                                         unreachableBy.append(detect2CharsInsensitives[0]);
2208                                         unreachableBy.append(detect2CharsInsensitives[1]);
2209                                         unreachableBy.append(detect2CharsInsensitives[2]);
2210                                         unreachableBy.append(detect2CharsInsensitives[3]);
2211                                     }
2212                                 }
2213                             }
2214                             break;
2215 
2216                         // check that it is not a StringDetect prefix
2217                         case Context::Rule::Type::StringDetect:
2218                             if (isCompatible(rule2) && rule2.dynamic != XmlBool::True && (isSensitive || rule.insensitive != XmlBool::True)
2219                                 && s.startsWith(rule2.string, caseSensitivity)) {
2220                                 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2221                             }
2222                             break;
2223 
2224                         // check if a WordDetect is hidden by another WordDetect
2225                         case Context::Rule::Type::WordDetect:
2226                             if (rule.type == Context::Rule::Type::WordDetect && isCompatible(rule2) && (isSensitive || rule.insensitive != XmlBool::True)
2227                                 && 0 == rule.string.compare(rule2.string, caseSensitivity)) {
2228                                 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2229                             }
2230                             break;
2231 
2232                         default:;
2233                         }
2234                     }
2235                 }
2236 
2237                 break;
2238             }
2239 
2240             // check if hidden by another keyword rule
2241             case Context::Rule::Type::keyword: {
2242                 RuleIterator ruleIterator(observedRules, observedRule);
2243                 while (const auto *rulePtr = ruleIterator.next()) {
2244                     if (isUnreachable) {
2245                         break;
2246                     }
2247                     const auto &rule2 = *rulePtr;
2248                     if (rule2.type == Context::Rule::Type::keyword && isCompatible(rule2) && rule.string == rule2.string) {
2249                         updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2250                     }
2251                 }
2252                 // TODO check that all keywords are hidden by another rules
2253                 break;
2254             }
2255 
2256             // add characters in those used but without checking if they are already.
2257             //  <DetectChar char="}" />
2258             //  <includedRules .../> <- reference an another <DetectChar char="}" /> who will not be checked
2259             //  <includedRules .../> <- reference a <DetectChar char="{" /> who will be added
2260             //  <DetectChar char="{" /> <- hidden by previous rule
2261             case Context::Rule::Type::IncludeRules:
2262                 if (observedRule.includeRules && !observedRule.hasResolvedIncludeRules()) {
2263                     break;
2264                 }
2265 
2266                 if (auto &ruleAndInclude = includeContexts[rule.context.context]) {
2267                     updateUnreachable1(ruleAndInclude);
2268                 } else {
2269                     ruleAndInclude.rule = &rule;
2270                 }
2271 
2272                 for (const auto *rulePtr : rule.includedIncludeRules) {
2273                     includeContexts.insert(rulePtr->context.context, RuleAndInclude{rulePtr, &rule});
2274                 }
2275 
2276                 if (observedRule.includeRules) {
2277                     break;
2278                 }
2279 
2280                 for (const auto *rulePtr : rule.includedRules) {
2281                     const auto &rule2 = *rulePtr;
2282                     switch (rule2.type) {
2283                     case Context::Rule::Type::AnyChar: {
2284                         auto tables = CharTableArray(detectChars, rule2);
2285                         tables.removeNonSpecialWhenSpecial();
2286                         tables.append(rule2.string, rule2, &rule);
2287                         break;
2288                     }
2289 
2290                     case Context::Rule::Type::DetectChar: {
2291                         auto &chars4 = (rule.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars;
2292                         auto tables = CharTableArray(chars4, rule2);
2293                         tables.removeNonSpecialWhenSpecial();
2294                         tables.append(rule2.char0, rule2, &rule);
2295                         break;
2296                     }
2297 
2298                     case Context::Rule::Type::DetectSpaces: {
2299                         auto tables = CharTableArray(detectChars, rule2);
2300                         tables.removeNonSpecialWhenSpecial();
2301                         tables.append(QLatin1Char(' '), rule2, &rule);
2302                         tables.append(QLatin1Char('\t'), rule2, &rule);
2303                         break;
2304                     }
2305 
2306                     case Context::Rule::Type::HlCChar:
2307                         hlCCharRule.setRule(rule2, &rule);
2308                         break;
2309 
2310                     case Context::Rule::Type::HlCHex:
2311                         hlCHexRule.setRule(rule2, &rule);
2312                         break;
2313 
2314                     case Context::Rule::Type::HlCOct:
2315                         hlCOctRule.setRule(rule2, &rule);
2316                         break;
2317 
2318                     case Context::Rule::Type::HlCStringChar:
2319                         hlCStringCharRule.setRule(rule2, &rule);
2320                         break;
2321 
2322                     case Context::Rule::Type::Int:
2323                         intRule.setRule(rule2, &rule);
2324                         break;
2325 
2326                     case Context::Rule::Type::Float:
2327                         floatRule.setRule(rule2, &rule);
2328                         break;
2329 
2330                     case Context::Rule::Type::LineContinue: {
2331                         auto tables = CharTableArray(lineContinueChars, rule2);
2332                         tables.removeNonSpecialWhenSpecial();
2333                         tables.append(rule2.char0, rule2, &rule);
2334                         break;
2335                     }
2336 
2337                     case Context::Rule::Type::RegExpr:
2338                         if (rule2.isDotRegex) {
2339                             dotRegex.append(rule2, &rule);
2340                         }
2341                         break;
2342 
2343                     case Context::Rule::Type::WordDetect:
2344                     case Context::Rule::Type::StringDetect:
2345                     case Context::Rule::Type::Detect2Chars:
2346                     case Context::Rule::Type::IncludeRules:
2347                     case Context::Rule::Type::DetectIdentifier:
2348                     case Context::Rule::Type::keyword:
2349                     case Context::Rule::Type::Unknown:
2350                     case Context::Rule::Type::RangeDetect:
2351                         break;
2352                     }
2353                 }
2354                 break;
2355 
2356             case Context::Rule::Type::Unknown:
2357                 break;
2358             }
2359 
2360             if (observedRule.includeRules && !observedRule.hasResolvedIncludeRules()) {
2361                 auto &unreachableIncludedRule = unreachableIncludedRules[&rule];
2362                 if (isUnreachable && unreachableIncludedRule.alwaysUnreachable) {
2363                     unreachableIncludedRule.unreachableBy.append(unreachableBy);
2364                 } else {
2365                     unreachableIncludedRule.alwaysUnreachable = false;
2366                 }
2367             } else if (isUnreachable) {
2368                 success = false;
2369                 QString message;
2370                 message.reserve(128);
2371                 for (auto &ruleAndInclude : unreachableBy) {
2372                     message += QStringLiteral("line ");
2373                     if (ruleAndInclude.includeRules) {
2374                         message += QString::number(ruleAndInclude.includeRules->line);
2375                         message += QStringLiteral(" [by '");
2376                         message += ruleAndInclude.includeRules->context.name;
2377                         message += QStringLiteral("' line ");
2378                         message += QString::number(ruleAndInclude.rule->line);
2379                         if (ruleAndInclude.includeRules->filename != ruleAndInclude.rule->filename) {
2380                             message += QStringLiteral(" (");
2381                             message += ruleAndInclude.rule->filename;
2382                             message += QLatin1Char(')');
2383                         }
2384                         message += QLatin1Char(']');
2385                     } else {
2386                         message += QString::number(ruleAndInclude.rule->line);
2387                     }
2388                     message += QStringLiteral(", ");
2389                 }
2390                 message.chop(2);
2391                 qWarning() << filename << "line" << rule.line << "unreachable rule by" << message;
2392             }
2393         }
2394 
2395         return success;
2396     }
2397 
2398     //! Proposes to merge certain rule sequences
2399     //! - several DetectChar/AnyChar into AnyChar
2400     //! - several RegExpr into one RegExpr
2401     bool suggestRuleMerger(const QString &filename, const Context &context) const
2402     {
2403         bool success = true;
2404 
2405         if (context.rules.isEmpty()) {
2406             return success;
2407         }
2408 
2409         auto it = context.rules.begin();
2410         const auto end = context.rules.end() - 1;
2411 
2412         for (; it < end; ++it) {
2413             auto &rule1 = *it;
2414             auto &rule2 = it[1];
2415 
2416             auto isCommonCompatible = [&] {
2417                 if (rule1.lookAhead != rule2.lookAhead) {
2418                     return false;
2419                 }
2420                 // ignore attribute when lookAhead is true
2421                 if (rule1.lookAhead != XmlBool::True && rule1.attribute != rule2.attribute) {
2422                     return false;
2423                 }
2424                 // clang-format off
2425                 return rule1.beginRegion == rule2.beginRegion
2426                     && rule1.endRegion == rule2.endRegion
2427                     && rule1.firstNonSpace == rule2.firstNonSpace
2428                     && rule1.context.context == rule2.context.context
2429                     && rule1.context.popCount == rule2.context.popCount;
2430                 // clang-format on
2431             };
2432 
2433             switch (rule1.type) {
2434             // request to merge AnyChar/DetectChar
2435             case Context::Rule::Type::AnyChar:
2436             case Context::Rule::Type::DetectChar:
2437                 if ((rule2.type == Context::Rule::Type::AnyChar || rule2.type == Context::Rule::Type::DetectChar) && isCommonCompatible()
2438                     && rule1.column == rule2.column) {
2439                     qWarning() << filename << "line" << rule2.line << "can be merged as AnyChar with the previous rule";
2440                     success = false;
2441                 }
2442                 break;
2443 
2444             // request to merge multiple RegExpr
2445             case Context::Rule::Type::RegExpr:
2446                 if (rule2.type == Context::Rule::Type::RegExpr && isCommonCompatible() && rule1.dynamic == rule2.dynamic
2447                     && (rule1.column == rule2.column || (rule1.column <= 0 && rule2.column <= 0))) {
2448                     qWarning() << filename << "line" << rule2.line << "can be merged with the previous rule";
2449                     success = false;
2450                 }
2451                 break;
2452 
2453             case Context::Rule::Type::DetectSpaces:
2454             case Context::Rule::Type::HlCChar:
2455             case Context::Rule::Type::HlCHex:
2456             case Context::Rule::Type::HlCOct:
2457             case Context::Rule::Type::HlCStringChar:
2458             case Context::Rule::Type::Int:
2459             case Context::Rule::Type::Float:
2460             case Context::Rule::Type::LineContinue:
2461             case Context::Rule::Type::WordDetect:
2462             case Context::Rule::Type::StringDetect:
2463             case Context::Rule::Type::Detect2Chars:
2464             case Context::Rule::Type::IncludeRules:
2465             case Context::Rule::Type::DetectIdentifier:
2466             case Context::Rule::Type::keyword:
2467             case Context::Rule::Type::Unknown:
2468             case Context::Rule::Type::RangeDetect:
2469                 break;
2470             }
2471         }
2472 
2473         return success;
2474     }
2475 
2476     //! Initialize the referenced context (ContextName::context)
2477     //! Some input / output examples are:
2478     //! - "#stay"         -> ""
2479     //! - "#pop"          -> ""
2480     //! - "Comment"       -> "Comment"
2481     //! - "#pop!Comment"  -> "Comment"
2482     //! - "##ISO C++"     -> ""
2483     //! - "Comment##ISO C++"-> "Comment" in ISO C++
2484     void resolveContextName(Definition &definition, Context &context, ContextName &contextName, int line)
2485     {
2486         QStringView name = contextName.name;
2487         if (name.isEmpty()) {
2488             contextName.stay = true;
2489         } else if (name.startsWith(QStringLiteral("#stay"))) {
2490             name = name.mid(5);
2491             contextName.stay = true;
2492             contextName.context = &context;
2493             if (!name.isEmpty()) {
2494                 qWarning() << definition.filename << "line" << line << "invalid context in" << context.name;
2495                 m_success = false;
2496             }
2497         } else {
2498             while (name.startsWith(QStringLiteral("#pop"))) {
2499                 name = name.mid(4);
2500                 ++contextName.popCount;
2501             }
2502 
2503             if (contextName.popCount && !name.isEmpty()) {
2504                 if (name.startsWith(QLatin1Char('!')) && name.size() > 1) {
2505                     name = name.mid(1);
2506                 } else {
2507                     qWarning() << definition.filename << "line" << line << "'!' missing between '#pop' and context name" << context.name;
2508                     m_success = false;
2509                 }
2510             }
2511 
2512             if (!name.isEmpty()) {
2513                 const int idx = name.indexOf(QStringLiteral("##"));
2514                 if (idx == -1) {
2515                     auto it = definition.contexts.find(name.toString());
2516                     if (it != definition.contexts.end()) {
2517                         contextName.context = &*it;
2518                     }
2519                 } else {
2520                     auto defName = name.mid(idx + 2);
2521                     auto it = m_definitions.find(defName.toString());
2522                     if (it != m_definitions.end()) {
2523                         auto listName = name.left(idx).toString();
2524                         definition.referencedDefinitions.insert(&*it);
2525                         auto ctxIt = it->contexts.find(listName.isEmpty() ? it->firstContextName : listName);
2526                         if (ctxIt != it->contexts.end()) {
2527                             contextName.context = &*ctxIt;
2528                         }
2529                     } else {
2530                         qWarning() << definition.filename << "line" << line << "unknown definition in" << context.name;
2531                         m_success = false;
2532                     }
2533                 }
2534 
2535                 if (!contextName.context) {
2536                     qWarning() << definition.filename << "line" << line << "unknown context" << name << "in" << context.name;
2537                     m_success = false;
2538                 }
2539             }
2540         }
2541     }
2542 
2543     QMap<QString, Definition> m_definitions;
2544     Definition *m_currentDefinition = nullptr;
2545     Keywords *m_currentKeywords = nullptr;
2546     Context *m_currentContext = nullptr;
2547     bool m_success = true;
2548 };
2549 
2550 namespace
2551 {
2552 QStringList readListing(const QString &fileName)
2553 {
2554     QFile file(fileName);
2555     if (!file.open(QIODevice::ReadOnly)) {
2556         return QStringList();
2557     }
2558 
2559     QXmlStreamReader xml(&file);
2560     QStringList listing;
2561     while (!xml.atEnd()) {
2562         xml.readNext();
2563 
2564         // add only .xml files, no .json or stuff
2565         if (xml.isCharacters() && xml.text().contains(QLatin1String(".xml"))) {
2566             listing.append(xml.text().toString());
2567         }
2568     }
2569 
2570     if (xml.hasError()) {
2571         qWarning() << "XML error while reading" << fileName << " - " << qPrintable(xml.errorString()) << "@ offset" << xml.characterOffset();
2572         listing.clear();
2573     }
2574 
2575     return listing;
2576 }
2577 
2578 /**
2579  * check if the "extensions" attribute have valid wildcards
2580  * @param extensions extensions string to check
2581  * @return valid?
2582  */
2583 bool checkExtensions(QStringView extensions)
2584 {
2585     // get list of extensions
2586     const QList<QStringView> extensionParts = extensions.split(QLatin1Char(';'), Qt::SkipEmptyParts);
2587 
2588     // ok if empty
2589     if (extensionParts.isEmpty()) {
2590         return true;
2591     }
2592 
2593     // check that only valid wildcard things are inside the parts
2594     for (const auto &extension : extensionParts) {
2595         for (const auto c : extension) {
2596             // eat normal things
2597             if (c.isDigit() || c.isLetter()) {
2598                 continue;
2599             }
2600 
2601             // allow some special characters
2602             if (c == QLatin1Char('.') || c == QLatin1Char('-') || c == QLatin1Char('_') || c == QLatin1Char('+')) {
2603                 continue;
2604             }
2605 
2606             // only allowed wildcard things: '?' and '*'
2607             if (c == QLatin1Char('?') || c == QLatin1Char('*')) {
2608                 continue;
2609             }
2610 
2611             qWarning() << "invalid character" << c << "seen in extensions wildcard";
2612             return false;
2613         }
2614     }
2615 
2616     // all checks passed
2617     return true;
2618 }
2619 
2620 }
2621 
2622 int main(int argc, char *argv[])
2623 {
2624     // get app instance
2625     QCoreApplication app(argc, argv);
2626 
2627     // ensure enough arguments are passed
2628     if (app.arguments().size() < 3) {
2629         return 1;
2630     }
2631 
2632 #ifdef QT_XMLPATTERNS_LIB
2633     // open schema
2634     QXmlSchema schema;
2635     if (!schema.load(QUrl::fromLocalFile(app.arguments().at(2)))) {
2636         return 2;
2637     }
2638 #endif
2639 
2640     const QString hlFilenamesListing = app.arguments().value(3);
2641     if (hlFilenamesListing.isEmpty()) {
2642         return 1;
2643     }
2644 
2645     QStringList hlFilenames = readListing(hlFilenamesListing);
2646     if (hlFilenames.isEmpty()) {
2647         qWarning("Failed to read %s", qPrintable(hlFilenamesListing));
2648         return 3;
2649     }
2650 
2651     // text attributes
2652     const QStringList textAttributes = QStringList() << QStringLiteral("name") << QStringLiteral("section") << QStringLiteral("mimetype")
2653                                                      << QStringLiteral("extensions") << QStringLiteral("style") << QStringLiteral("author")
2654                                                      << QStringLiteral("license") << QStringLiteral("indenter");
2655 
2656     // index all given highlightings
2657     HlFilesChecker filesChecker;
2658     QVariantMap hls;
2659     int anyError = 0;
2660     for (const QString &hlFilename : std::as_const(hlFilenames)) {
2661         QFile hlFile(hlFilename);
2662         if (!hlFile.open(QIODevice::ReadOnly)) {
2663             qWarning("Failed to open %s", qPrintable(hlFilename));
2664             anyError = 3;
2665             continue;
2666         }
2667 
2668 #ifdef QT_XMLPATTERNS_LIB
2669         // validate against schema
2670         QXmlSchemaValidator validator(schema);
2671         if (!validator.validate(&hlFile, QUrl::fromLocalFile(hlFile.fileName()))) {
2672             anyError = 4;
2673             continue;
2674         }
2675 #endif
2676 
2677         // read the needed attributes from toplevel language tag
2678         hlFile.reset();
2679         QXmlStreamReader xml(&hlFile);
2680         if (xml.readNextStartElement()) {
2681             if (xml.name() != QLatin1String("language")) {
2682                 anyError = 5;
2683                 continue;
2684             }
2685         } else {
2686             anyError = 6;
2687             continue;
2688         }
2689 
2690         // map to store hl info
2691         QVariantMap hl;
2692 
2693         // transfer text attributes
2694         for (const QString &attribute : std::as_const(textAttributes)) {
2695             hl[attribute] = xml.attributes().value(attribute).toString();
2696         }
2697 
2698         // check if extensions have the right format
2699         if (!checkExtensions(hl[QStringLiteral("extensions")].toString())) {
2700             qWarning() << hlFilename << "'extensions' wildcards invalid:" << hl[QStringLiteral("extensions")].toString();
2701             anyError = 23;
2702         }
2703 
2704         // numerical attributes
2705         hl[QStringLiteral("version")] = xml.attributes().value(QLatin1String("version")).toInt();
2706         hl[QStringLiteral("priority")] = xml.attributes().value(QLatin1String("priority")).toInt();
2707 
2708         // add boolean one
2709         hl[QStringLiteral("hidden")] = attrToBool(xml.attributes().value(QLatin1String("hidden")));
2710 
2711         // remember hl
2712         hls[QFileInfo(hlFile).fileName()] = hl;
2713 
2714         const QString hlName = hl[QStringLiteral("name")].toString();
2715 
2716         filesChecker.setDefinition(xml.attributes().value(QStringLiteral("kateversion")), hlFilename, hlName);
2717 
2718         // scan for broken regex or keywords with spaces
2719         while (!xml.atEnd()) {
2720             xml.readNext();
2721             filesChecker.processElement(xml);
2722         }
2723 
2724         if (xml.hasError()) {
2725             anyError = 33;
2726             qWarning() << hlFilename << "-" << xml.errorString() << "@ offset" << xml.characterOffset();
2727         }
2728     }
2729 
2730     filesChecker.resolveContexts();
2731 
2732     if (!filesChecker.check()) {
2733         anyError = 7;
2734     }
2735 
2736     // bail out if any problem was seen
2737     if (anyError) {
2738         return anyError;
2739     }
2740 
2741     // create outfile, after all has worked!
2742     QFile outFile(app.arguments().at(1));
2743     if (!outFile.open(QIODevice::WriteOnly | QIODevice::Truncate)) {
2744         return 9;
2745     }
2746 
2747     // write out json
2748     outFile.write(QCborValue::fromVariant(QVariant(hls)).toCbor());
2749 
2750     // be done
2751     return 0;
2752 }