File indexing completed on 2024-05-12 04:02:16

0001 /*
0002     SPDX-FileCopyrightText: 2014 Christoph Cullmann <cullmann@kde.org>
0003     SPDX-FileCopyrightText: 2020 Jonathan Poelen <jonathan.poelen@gmail.com>
0004 
0005     SPDX-License-Identifier: MIT
0006 */
0007 
0008 #include <QCborValue>
0009 #include <QCoreApplication>
0010 #include <QDebug>
0011 #include <QFile>
0012 #include <QFileInfo>
0013 #include <QMutableMapIterator>
0014 #include <QRegularExpression>
0015 #include <QScopeGuard>
0016 #include <QVariant>
0017 #include <QXmlStreamReader>
0018 
0019 #ifdef HAS_XERCESC
0020 
0021 #include <xercesc/framework/XMLGrammarPoolImpl.hpp>
0022 
0023 #include <xercesc/parsers/SAX2XMLReaderImpl.hpp>
0024 
0025 #include <xercesc/sax/ErrorHandler.hpp>
0026 #include <xercesc/sax/SAXParseException.hpp>
0027 
0028 #include <xercesc/util/PlatformUtils.hpp>
0029 #include <xercesc/util/XMLString.hpp>
0030 #include <xercesc/util/XMLUni.hpp>
0031 
0032 #include <xercesc/framework/XMLGrammarPoolImpl.hpp>
0033 #include <xercesc/validators/common/Grammar.hpp>
0034 
0035 using namespace xercesc;
0036 
0037 /*
0038  * Ideas taken from:
0039  *
0040  * author    : Boris Kolpackov <boris@codesynthesis.com>
0041  * copyright : not copyrighted - public domain
0042  *
0043  * This program uses Xerces-C++ SAX2 parser to load a set of schema files
0044  * and then to validate a set of XML documents against these schemas. To
0045  * build this program you will need Xerces-C++ 3.0.0 or later. For more
0046  * information, see:
0047  *
0048  * http://www.codesynthesis.com/~boris/blog/2010/03/15/validating-external-schemas-xerces-cxx/
0049  */
0050 
0051 /**
0052  * Error handler object used during xml schema validation.
0053  */
0054 class CustomErrorHandler : public ErrorHandler
0055 {
0056 public:
0057     /**
0058      * Constructor
0059      * @param messages Pointer to the error message string to fill.
0060      */
0061     CustomErrorHandler(QString *messages)
0062         : m_messages(messages)
0063     {
0064     }
0065 
0066     /**
0067      * Check global success/fail state.
0068      * @return True if there was a failure, false otherwise.
0069      */
0070     bool failed() const
0071     {
0072         return m_failed;
0073     }
0074 
0075 private:
0076     /**
0077      * Severity classes for error messages.
0078      */
0079     enum severity { s_warning, s_error, s_fatal };
0080 
0081     /**
0082      * Wrapper for warning exceptions.
0083      * @param e Exception to handle.
0084      */
0085     void warning(const SAXParseException &e) override
0086     {
0087         m_failed = true; // be strict, warnings are evil, too!
0088         handle(e, s_warning);
0089     }
0090 
0091     /**
0092      * Wrapper for error exceptions.
0093      * @param e Exception to handle.
0094      */
0095     void error(const SAXParseException &e) override
0096     {
0097         m_failed = true;
0098         handle(e, s_error);
0099     }
0100 
0101     /**
0102      * Wrapper for fatal error exceptions.
0103      * @param e Exception to handle.
0104      */
0105     void fatalError(const SAXParseException &e) override
0106     {
0107         m_failed = true;
0108         handle(e, s_fatal);
0109     }
0110 
0111     /**
0112      * Reset the error status to "no error".
0113      */
0114     void resetErrors() override
0115     {
0116         m_failed = false;
0117     }
0118 
0119     /**
0120      * Generic handler for error/warning/fatal error message exceptions.
0121      * @param e Exception to handle.
0122      * @param s Enum value encoding the message severtity.
0123      */
0124     void handle(const SAXParseException &e, severity s)
0125     {
0126         // get id to print
0127         const XMLCh *xid(e.getPublicId());
0128         if (!xid)
0129             xid = e.getSystemId();
0130 
0131         m_messages << QString::fromUtf16(xid) << ":" << e.getLineNumber() << ":" << e.getColumnNumber() << " " << (s == s_warning ? "warning: " : "error: ")
0132                    << QString::fromUtf16(e.getMessage()) << Qt::endl;
0133     }
0134 
0135 private:
0136     /**
0137      * Storage for created error messages in this handler.
0138      */
0139     QTextStream m_messages;
0140 
0141     /**
0142      * Global error state. True if there was an error, false otherwise.
0143      */
0144     bool m_failed = false;
0145 };
0146 
0147 void init_parser(SAX2XMLReaderImpl &parser)
0148 {
0149     // Commonly useful configuration.
0150     //
0151     parser.setFeature(XMLUni::fgSAX2CoreNameSpaces, true);
0152     parser.setFeature(XMLUni::fgSAX2CoreNameSpacePrefixes, true);
0153     parser.setFeature(XMLUni::fgSAX2CoreValidation, true);
0154 
0155     // Enable validation.
0156     //
0157     parser.setFeature(XMLUni::fgXercesSchema, true);
0158     parser.setFeature(XMLUni::fgXercesSchemaFullChecking, true);
0159     parser.setFeature(XMLUni::fgXercesValidationErrorAsFatal, true);
0160 
0161     // Use the loaded grammar during parsing.
0162     //
0163     parser.setFeature(XMLUni::fgXercesUseCachedGrammarInParse, true);
0164 
0165     // Don't load schemas from any other source (e.g., from XML document's
0166     // xsi:schemaLocation attributes).
0167     //
0168     parser.setFeature(XMLUni::fgXercesLoadSchema, false);
0169 
0170     // Xerces-C++ 3.1.0 is the first version with working multi import
0171     // support.
0172     //
0173     parser.setFeature(XMLUni::fgXercesHandleMultipleImports, true);
0174 }
0175 
0176 #endif
0177 
0178 #include "../lib/worddelimiters_p.h"
0179 #include "../lib/xml_p.h"
0180 
0181 #include <array>
0182 
0183 using KSyntaxHighlighting::WordDelimiters;
0184 using KSyntaxHighlighting::Xml::attrToBool;
0185 
0186 class HlFilesChecker
0187 {
0188 public:
0189     template<typename T>
0190     void setDefinition(const T &verStr, const QString &filename, const QString &name)
0191     {
0192         m_currentDefinition = &*m_definitions.insert(name, Definition{});
0193         m_currentDefinition->languageName = name;
0194         m_currentDefinition->filename = filename;
0195         m_currentDefinition->kateVersionStr = verStr.toString();
0196         m_currentKeywords = nullptr;
0197         m_currentContext = nullptr;
0198 
0199         const auto idx = verStr.indexOf(QLatin1Char('.'));
0200         if (idx <= 0) {
0201             qWarning() << filename << "invalid kateversion" << verStr;
0202             m_success = false;
0203         } else {
0204             m_currentDefinition->kateVersion = {verStr.left(idx).toInt(), verStr.mid(idx + 1).toInt()};
0205         }
0206     }
0207 
0208     void processElement(QXmlStreamReader &xml)
0209     {
0210         if (xml.isStartElement()) {
0211             if (m_currentContext) {
0212                 m_currentContext->rules.push_back(Context::Rule{});
0213                 auto &rule = m_currentContext->rules.back();
0214                 m_success = rule.parseElement(m_currentDefinition->filename, xml) && m_success;
0215                 m_currentContext->hasDynamicRule = m_currentContext->hasDynamicRule || rule.dynamic == XmlBool::True;
0216             } else if (m_currentKeywords) {
0217                 m_success = m_currentKeywords->items.parseElement(m_currentDefinition->filename, xml) && m_success;
0218             } else if (xml.name() == QStringLiteral("context")) {
0219                 processContextElement(xml);
0220             } else if (xml.name() == QStringLiteral("list")) {
0221                 processListElement(xml);
0222             } else if (xml.name() == QStringLiteral("keywords")) {
0223                 m_success = m_currentDefinition->parseKeywords(xml) && m_success;
0224             } else if (xml.name() == QStringLiteral("emptyLine")) {
0225                 m_success = parseEmptyLine(m_currentDefinition->filename, xml) && m_success;
0226             } else if (xml.name() == QStringLiteral("itemData")) {
0227                 m_success = m_currentDefinition->itemDatas.parseElement(m_currentDefinition->filename, xml) && m_success;
0228             }
0229         } else if (xml.isEndElement()) {
0230             if (m_currentContext && xml.name() == QStringLiteral("context")) {
0231                 m_currentContext = nullptr;
0232             } else if (m_currentKeywords && xml.name() == QStringLiteral("list")) {
0233                 m_currentKeywords = nullptr;
0234             }
0235         }
0236     }
0237 
0238     //! Resolve context attribute and include tag
0239     void resolveContexts()
0240     {
0241         QMutableMapIterator<QString, Definition> def(m_definitions);
0242         while (def.hasNext()) {
0243             def.next();
0244             auto &definition = def.value();
0245             auto &contexts = definition.contexts;
0246 
0247             if (contexts.isEmpty()) {
0248                 qWarning() << definition.filename << "has no context";
0249                 m_success = false;
0250                 continue;
0251             }
0252 
0253             auto markAsUsedContext = [](ContextName &contextName) {
0254                 if (!contextName.stay && contextName.context) {
0255                     contextName.context->isOnlyIncluded = false;
0256                 }
0257             };
0258 
0259             QMutableMapIterator<QString, Context> contextIt(contexts);
0260             while (contextIt.hasNext()) {
0261                 contextIt.next();
0262                 auto &context = contextIt.value();
0263                 resolveContextName(definition, context, context.lineEndContext, context.line);
0264                 resolveContextName(definition, context, context.lineEmptyContext, context.line);
0265                 resolveContextName(definition, context, context.fallthroughContext, context.line);
0266                 markAsUsedContext(context.lineEndContext);
0267                 markAsUsedContext(context.lineEmptyContext);
0268                 markAsUsedContext(context.fallthroughContext);
0269                 for (auto &rule : context.rules) {
0270                     rule.parentContext = &context;
0271                     resolveContextName(definition, context, rule.context, rule.line);
0272                     if (rule.type != Context::Rule::Type::IncludeRules) {
0273                         markAsUsedContext(rule.context);
0274                     } else if (rule.includeAttrib == XmlBool::True && rule.context.context) {
0275                         rule.context.context->referencedWithIncludeAttrib = true;
0276                     }
0277                 }
0278             }
0279 
0280             auto *firstContext = &*definition.contexts.find(definition.firstContextName);
0281             firstContext->isOnlyIncluded = false;
0282             definition.firstContext = firstContext;
0283         }
0284 
0285         resolveIncludeRules();
0286     }
0287 
0288     bool check() const
0289     {
0290         bool success = m_success;
0291 
0292         const auto usedContexts = extractUsedContexts();
0293 
0294         QMap<const Definition *, const Definition *> maxVersionByDefinitions;
0295         QMap<const Context::Rule *, IncludedRuleUnreachableBy> unreachableIncludedRules;
0296 
0297         QMapIterator<QString, Definition> def(m_definitions);
0298         while (def.hasNext()) {
0299             def.next();
0300             const auto &definition = def.value();
0301             const auto &filename = definition.filename;
0302 
0303             auto *maxDef = maxKateVersionDefinition(definition, maxVersionByDefinitions);
0304             if (maxDef != &definition) {
0305                 qWarning() << definition.filename << "depends on a language" << maxDef->languageName << "in version" << maxDef->kateVersionStr
0306                            << ". Please, increase kateversion.";
0307                 success = false;
0308             }
0309 
0310             QSet<ItemDatas::Style> usedAttributeNames;
0311             QSet<ItemDatas::Style> ignoredAttributeNames;
0312             success = checkKeywordsList(definition) && success;
0313             success = checkContexts(definition, usedAttributeNames, ignoredAttributeNames, usedContexts, unreachableIncludedRules) && success;
0314 
0315             // search for non-existing itemDatas.
0316             const auto invalidNames = usedAttributeNames - definition.itemDatas.styleNames;
0317             for (const auto &styleName : invalidNames) {
0318                 qWarning() << filename << "line" << styleName.line << "reference of non-existing itemData attributes:" << styleName.name;
0319                 success = false;
0320             }
0321 
0322             // search for existing itemDatas, but unusable.
0323             const auto ignoredNames = ignoredAttributeNames - usedAttributeNames;
0324             for (const auto &styleName : ignoredNames) {
0325                 qWarning() << filename << "line" << styleName.line << "attribute" << styleName.name
0326                            << "is never used. All uses are with lookAhead=true or <IncludeRules/>";
0327                 success = false;
0328             }
0329 
0330             // search for unused itemDatas.
0331             auto unusedNames = definition.itemDatas.styleNames - usedAttributeNames;
0332             unusedNames -= ignoredNames;
0333             for (const auto &styleName : std::as_const(unusedNames)) {
0334                 qWarning() << filename << "line" << styleName.line << "unused itemData:" << styleName.name;
0335                 success = false;
0336             }
0337         }
0338 
0339         QMutableMapIterator<const Context::Rule *, IncludedRuleUnreachableBy> unreachableIncludedRuleIt(unreachableIncludedRules);
0340         while (unreachableIncludedRuleIt.hasNext()) {
0341             unreachableIncludedRuleIt.next();
0342             IncludedRuleUnreachableBy &unreachableRulesBy = unreachableIncludedRuleIt.value();
0343             if (unreachableRulesBy.alwaysUnreachable) {
0344                 auto *rule = unreachableIncludedRuleIt.key();
0345 
0346                 if (!rule->parentContext->isOnlyIncluded) {
0347                     continue;
0348                 }
0349 
0350                 // remove duplicates rules
0351                 QSet<const Context::Rule *> rules;
0352                 auto &unreachableBy = unreachableRulesBy.unreachableBy;
0353                 unreachableBy.erase(std::remove_if(unreachableBy.begin(),
0354                                                    unreachableBy.end(),
0355                                                    [&](const RuleAndInclude &ruleAndInclude) {
0356                                                        if (rules.contains(ruleAndInclude.rule)) {
0357                                                            return true;
0358                                                        }
0359                                                        rules.insert(ruleAndInclude.rule);
0360                                                        return false;
0361                                                    }),
0362                                     unreachableBy.end());
0363 
0364                 QString message;
0365                 message.reserve(128);
0366                 for (auto &ruleAndInclude : std::as_const(unreachableBy)) {
0367                     message += QStringLiteral("line ");
0368                     message += QString::number(ruleAndInclude.rule->line);
0369                     message += QStringLiteral(" [");
0370                     message += ruleAndInclude.rule->parentContext->name;
0371                     if (rule->filename != ruleAndInclude.rule->filename) {
0372                         message += QStringLiteral(" (");
0373                         message += ruleAndInclude.rule->filename;
0374                         message += QLatin1Char(')');
0375                     }
0376                     if (ruleAndInclude.includeRules) {
0377                         message += QStringLiteral(" via line ");
0378                         message += QString::number(ruleAndInclude.includeRules->line);
0379                     }
0380                     message += QStringLiteral("], ");
0381                 }
0382                 message.chop(2);
0383 
0384                 qWarning() << rule->filename << "line" << rule->line << "no IncludeRule can reach this rule, hidden by" << message;
0385                 success = false;
0386             }
0387         }
0388 
0389         return success;
0390     }
0391 
0392 private:
0393     enum class XmlBool {
0394         Unspecified,
0395         False,
0396         True,
0397     };
0398 
0399     struct Context;
0400 
0401     struct ContextName {
0402         QString name;
0403         int popCount = 0;
0404         bool stay = false;
0405 
0406         Context *context = nullptr;
0407     };
0408 
0409     struct Parser {
0410         const QString &filename;
0411         QXmlStreamReader &xml;
0412         QXmlStreamAttribute &attr;
0413         bool success;
0414 
0415         //! Read a string type attribute, \c success = \c false when \p str is not empty
0416         //! \return \c true when attr.name() == attrName, otherwise false
0417         bool extractString(QString &str, const QString &attrName)
0418         {
0419             if (attr.name() != attrName) {
0420                 return false;
0421             }
0422 
0423             str = attr.value().toString();
0424             if (str.isEmpty()) {
0425                 qWarning() << filename << "line" << xml.lineNumber() << attrName << "attribute is empty";
0426                 success = false;
0427             }
0428 
0429             return true;
0430         }
0431 
0432         //! Read a bool type attribute, \c success = \c false when \p xmlBool is not \c XmlBool::Unspecified.
0433         //! \return \c true when attr.name() == attrName, otherwise false
0434         bool extractXmlBool(XmlBool &xmlBool, const QString &attrName)
0435         {
0436             if (attr.name() != attrName) {
0437                 return false;
0438             }
0439 
0440             xmlBool = attr.value().isNull() ? XmlBool::Unspecified : attrToBool(attr.value()) ? XmlBool::True : XmlBool::False;
0441 
0442             return true;
0443         }
0444 
0445         //! Read a positive integer type attribute, \c success = \c false when \p positive is already greater than or equal to 0
0446         //! \return \c true when attr.name() == attrName, otherwise false
0447         bool extractPositive(int &positive, const QString &attrName)
0448         {
0449             if (attr.name() != attrName) {
0450                 return false;
0451             }
0452 
0453             bool ok = true;
0454             positive = attr.value().toInt(&ok);
0455 
0456             if (!ok || positive < 0) {
0457                 qWarning() << filename << "line" << xml.lineNumber() << attrName << "should be a positive integer:" << attr.value();
0458                 success = false;
0459             }
0460 
0461             return true;
0462         }
0463 
0464         //! Read a color, \c success = \c false when \p color is already greater than or equal to 0
0465         //! \return \c true when attr.name() == attrName, otherwise false
0466         bool checkColor(const QString &attrName)
0467         {
0468             if (attr.name() != attrName) {
0469                 return false;
0470             }
0471 
0472             const auto value = attr.value();
0473             if (value.isEmpty() /*|| QColor(value).isValid()*/) {
0474                 qWarning() << filename << "line" << xml.lineNumber() << attrName << "should be a color:" << value;
0475                 success = false;
0476             }
0477 
0478             return true;
0479         }
0480 
0481         //! Read a QChar, \c success = \c false when \p c is not \c '\0' or does not have one char
0482         //! \return \c true when attr.name() == attrName, otherwise false
0483         bool extractChar(QChar &c, const QString &attrName)
0484         {
0485             if (attr.name() != attrName) {
0486                 return false;
0487             }
0488 
0489             if (attr.value().size() == 1) {
0490                 c = attr.value()[0];
0491             } else {
0492                 c = QLatin1Char('_');
0493                 qWarning() << filename << "line" << xml.lineNumber() << attrName << "must contain exactly one char:" << attr.value();
0494                 success = false;
0495             }
0496 
0497             return true;
0498         }
0499 
0500         //! \return parsing status when \p isExtracted is \c true, otherwise \c false
0501         bool checkIfExtracted(bool isExtracted)
0502         {
0503             if (isExtracted) {
0504                 return success;
0505             }
0506 
0507             qWarning() << filename << "line" << xml.lineNumber() << "unknown attribute:" << attr.name();
0508             return false;
0509         }
0510     };
0511 
0512     struct Keywords {
0513         struct Items {
0514             struct Item {
0515                 QString content;
0516                 int line;
0517 
0518                 friend size_t qHash(const Item &item, size_t seed = 0)
0519                 {
0520                     return qHash(item.content, seed);
0521                 }
0522 
0523                 friend bool operator==(const Item &item0, const Item &item1)
0524                 {
0525                     return item0.content == item1.content;
0526                 }
0527             };
0528 
0529             QList<Item> keywords;
0530             QSet<Item> includes;
0531 
0532             bool parseElement(const QString &filename, QXmlStreamReader &xml)
0533             {
0534                 bool success = true;
0535 
0536                 const int line = xml.lineNumber();
0537                 QString content = xml.readElementText();
0538 
0539                 if (content.isEmpty()) {
0540                     qWarning() << filename << "line" << line << "is empty:" << xml.name();
0541                     success = false;
0542                 }
0543 
0544                 if (xml.name() == QStringLiteral("include")) {
0545                     includes.insert({content, line});
0546                 } else if (xml.name() == QStringLiteral("item")) {
0547                     keywords.append({content, line});
0548                 } else {
0549                     qWarning() << filename << "line" << line << "invalid element:" << xml.name();
0550                     success = false;
0551                 }
0552 
0553                 return success;
0554             }
0555         };
0556 
0557         QString name;
0558         Items items;
0559         int line;
0560 
0561         bool parseElement(const QString &filename, QXmlStreamReader &xml)
0562         {
0563             line = xml.lineNumber();
0564 
0565             bool success = true;
0566             for (auto &attr : xml.attributes()) {
0567                 Parser parser{filename, xml, attr, success};
0568 
0569                 const bool isExtracted = parser.extractString(name, QStringLiteral("name"));
0570 
0571                 success = parser.checkIfExtracted(isExtracted);
0572             }
0573             return success;
0574         }
0575     };
0576 
0577     struct Context {
0578         struct Rule {
0579             enum class Type {
0580                 Unknown,
0581                 AnyChar,
0582                 Detect2Chars,
0583                 DetectChar,
0584                 DetectIdentifier,
0585                 DetectSpaces,
0586                 Float,
0587                 HlCChar,
0588                 HlCHex,
0589                 HlCOct,
0590                 HlCStringChar,
0591                 IncludeRules,
0592                 Int,
0593                 LineContinue,
0594                 RangeDetect,
0595                 RegExpr,
0596                 StringDetect,
0597                 WordDetect,
0598                 keyword,
0599             };
0600 
0601             Type type{};
0602 
0603             bool isDotRegex = false;
0604             int line = -1;
0605 
0606             // commonAttributes
0607             QString attribute;
0608             ContextName context;
0609             QString beginRegion;
0610             QString endRegion;
0611             int column = -1;
0612             XmlBool lookAhead{};
0613             XmlBool firstNonSpace{};
0614 
0615             // StringDetect, WordDetect, keyword
0616             XmlBool insensitive{};
0617 
0618             // DetectChar, StringDetect, RegExpr, keyword
0619             XmlBool dynamic{};
0620 
0621             // Regex
0622             XmlBool minimal{};
0623 
0624             // IncludeRule
0625             XmlBool includeAttrib{};
0626 
0627             // DetectChar, Detect2Chars, LineContinue, RangeDetect
0628             QChar char0;
0629             // Detect2Chars, RangeDetect
0630             QChar char1;
0631 
0632             // AnyChar, DetectChar, StringDetect, RegExpr, WordDetect, keyword
0633             QString string;
0634             // RegExpr without .* as suffix
0635             QString sanitizedString;
0636 
0637             // Float, HlCHex, HlCOct, Int, WordDetect, keyword
0638             QString additionalDeliminator;
0639             QString weakDeliminator;
0640 
0641             // rules included by IncludeRules (without IncludeRule)
0642             QList<const Rule *> includedRules;
0643 
0644             // IncludeRules included by IncludeRules
0645             QSet<const Rule *> includedIncludeRules;
0646 
0647             Context const *parentContext = nullptr;
0648 
0649             QString filename;
0650 
0651             bool parseElement(const QString &filename, QXmlStreamReader &xml)
0652             {
0653                 this->filename = filename;
0654                 line = xml.lineNumber();
0655 
0656                 using Pair = QPair<QString, Type>;
0657                 static const auto pairs = {
0658                     Pair{QStringLiteral("AnyChar"), Type::AnyChar},
0659                     Pair{QStringLiteral("Detect2Chars"), Type::Detect2Chars},
0660                     Pair{QStringLiteral("DetectChar"), Type::DetectChar},
0661                     Pair{QStringLiteral("DetectIdentifier"), Type::DetectIdentifier},
0662                     Pair{QStringLiteral("DetectSpaces"), Type::DetectSpaces},
0663                     Pair{QStringLiteral("Float"), Type::Float},
0664                     Pair{QStringLiteral("HlCChar"), Type::HlCChar},
0665                     Pair{QStringLiteral("HlCHex"), Type::HlCHex},
0666                     Pair{QStringLiteral("HlCOct"), Type::HlCOct},
0667                     Pair{QStringLiteral("HlCStringChar"), Type::HlCStringChar},
0668                     Pair{QStringLiteral("IncludeRules"), Type::IncludeRules},
0669                     Pair{QStringLiteral("Int"), Type::Int},
0670                     Pair{QStringLiteral("LineContinue"), Type::LineContinue},
0671                     Pair{QStringLiteral("RangeDetect"), Type::RangeDetect},
0672                     Pair{QStringLiteral("RegExpr"), Type::RegExpr},
0673                     Pair{QStringLiteral("StringDetect"), Type::StringDetect},
0674                     Pair{QStringLiteral("WordDetect"), Type::WordDetect},
0675                     Pair{QStringLiteral("keyword"), Type::keyword},
0676                 };
0677 
0678                 for (auto pair : pairs) {
0679                     if (xml.name() == pair.first) {
0680                         type = pair.second;
0681                         bool success = parseAttributes(filename, xml);
0682                         success = checkMandoryAttributes(filename, xml) && success;
0683                         if (success && type == Type::RegExpr) {
0684                             // ., (.) followed by *, +, {1} or nothing
0685                             static const QRegularExpression isDot(QStringLiteral(R"(^\(?\.(?:[*+][*+?]?|[*+]|\{1\})?\$?$)"));
0686                             // remove "(?:" and ")"
0687                             static const QRegularExpression removeParentheses(QStringLiteral(R"(\((?:\?:)?|\))"));
0688                             // remove parentheses on a copy of string
0689                             auto reg = QString(string).replace(removeParentheses, QString());
0690                             isDotRegex = reg.contains(isDot);
0691 
0692                             // Remove .* and .*$ suffix.
0693                             static const QRegularExpression allSuffix(QStringLiteral("(?<!\\\\)[.][*][?+]?[$]?$"));
0694                             sanitizedString = string;
0695                             sanitizedString.replace(allSuffix, QString());
0696                             // string is a catch-all, do not sanitize
0697                             if (sanitizedString.isEmpty() || sanitizedString == QStringLiteral("^")) {
0698                                 sanitizedString = string;
0699                             }
0700                         }
0701                         return success;
0702                     }
0703                 }
0704 
0705                 qWarning() << filename << "line" << xml.lineNumber() << "unknown element:" << xml.name();
0706                 return false;
0707             }
0708 
0709         private:
0710             bool parseAttributes(const QString &filename, QXmlStreamReader &xml)
0711             {
0712                 bool success = true;
0713 
0714                 for (auto &attr : xml.attributes()) {
0715                     Parser parser{filename, xml, attr, success};
0716 
0717                     // clang-format off
0718                     const bool isExtracted
0719                         = parser.extractString(attribute, QStringLiteral("attribute"))
0720                        || parser.extractString(context.name, QStringLiteral("context"))
0721                        || parser.extractXmlBool(lookAhead, QStringLiteral("lookAhead"))
0722                        || parser.extractXmlBool(firstNonSpace, QStringLiteral("firstNonSpace"))
0723                        || parser.extractString(beginRegion, QStringLiteral("beginRegion"))
0724                        || parser.extractString(endRegion, QStringLiteral("endRegion"))
0725                        || parser.extractPositive(column, QStringLiteral("column"))
0726                        || ((type == Type::RegExpr
0727                          || type == Type::StringDetect
0728                          || type == Type::WordDetect
0729                          || type == Type::keyword
0730                          ) && parser.extractXmlBool(insensitive, QStringLiteral("insensitive")))
0731                        || ((type == Type::DetectChar
0732                          || type == Type::RegExpr
0733                          || type == Type::StringDetect
0734                          || type == Type::keyword
0735                          ) && parser.extractXmlBool(dynamic, QStringLiteral("dynamic")))
0736                        || ((type == Type::RegExpr)
0737                            && parser.extractXmlBool(minimal, QStringLiteral("minimal")))
0738                        || ((type == Type::DetectChar
0739                          || type == Type::Detect2Chars
0740                          || type == Type::LineContinue
0741                          || type == Type::RangeDetect
0742                          ) && parser.extractChar(char0, QStringLiteral("char")))
0743                        || ((type == Type::Detect2Chars
0744                          || type == Type::RangeDetect
0745                          ) && parser.extractChar(char1, QStringLiteral("char1")))
0746                        || ((type == Type::AnyChar
0747                          || type == Type::RegExpr
0748                          || type == Type::StringDetect
0749                          || type == Type::WordDetect
0750                          || type == Type::keyword
0751                          ) && parser.extractString(string, QStringLiteral("String")))
0752                        || ((type == Type::IncludeRules)
0753                            && parser.extractXmlBool(includeAttrib, QStringLiteral("includeAttrib")))
0754                        || ((type == Type::Float
0755                          || type == Type::HlCHex
0756                          || type == Type::HlCOct
0757                          || type == Type::Int
0758                          || type == Type::keyword
0759                          || type == Type::WordDetect
0760                          ) && (parser.extractString(additionalDeliminator, QStringLiteral("additionalDeliminator"))
0761                             || parser.extractString(weakDeliminator, QStringLiteral("weakDeliminator"))))
0762                     ;
0763                     // clang-format on
0764 
0765                     success = parser.checkIfExtracted(isExtracted);
0766 
0767                     if (type == Type::LineContinue && char0 == QLatin1Char('\0')) {
0768                         char0 = QLatin1Char('\\');
0769                     }
0770                 }
0771 
0772                 return success;
0773             }
0774 
0775             bool checkMandoryAttributes(const QString &filename, QXmlStreamReader &xml)
0776             {
0777                 QString missingAttr;
0778 
0779                 switch (type) {
0780                 case Type::Unknown:
0781                     return false;
0782 
0783                 case Type::AnyChar:
0784                 case Type::RegExpr:
0785                 case Type::StringDetect:
0786                 case Type::WordDetect:
0787                 case Type::keyword:
0788                     missingAttr = string.isEmpty() ? QStringLiteral("String") : QString();
0789                     break;
0790 
0791                 case Type::DetectChar:
0792                     missingAttr = !char0.unicode() ? QStringLiteral("char") : QString();
0793                     break;
0794 
0795                 case Type::Detect2Chars:
0796                 case Type::RangeDetect:
0797                     missingAttr = !char0.unicode() && !char1.unicode() ? QStringLiteral("char and char1")
0798                         : !char0.unicode()                             ? QStringLiteral("char")
0799                         : !char1.unicode()                             ? QStringLiteral("char1")
0800                                                                        : QString();
0801                     break;
0802 
0803                 case Type::IncludeRules:
0804                     missingAttr = context.name.isEmpty() ? QStringLiteral("context") : QString();
0805                     break;
0806 
0807                 case Type::DetectIdentifier:
0808                 case Type::DetectSpaces:
0809                 case Type::Float:
0810                 case Type::HlCChar:
0811                 case Type::HlCHex:
0812                 case Type::HlCOct:
0813                 case Type::HlCStringChar:
0814                 case Type::Int:
0815                 case Type::LineContinue:
0816                     break;
0817                 }
0818 
0819                 if (!missingAttr.isEmpty()) {
0820                     qWarning() << filename << "line" << xml.lineNumber() << "missing attribute:" << missingAttr;
0821                     return false;
0822                 }
0823 
0824                 return true;
0825             }
0826         };
0827 
0828         int line;
0829         // becomes false when a context (except includeRule) refers to it
0830         bool isOnlyIncluded = true;
0831         // becomes true when an includedRule refers to it with includeAttrib=true
0832         bool referencedWithIncludeAttrib = false;
0833         bool hasDynamicRule = false;
0834         QString name;
0835         QString attribute;
0836         ContextName lineEndContext;
0837         ContextName lineEmptyContext;
0838         ContextName fallthroughContext;
0839         QList<Rule> rules;
0840         XmlBool dynamic{};
0841         XmlBool fallthrough{};
0842         XmlBool stopEmptyLineContextSwitchLoop{};
0843 
0844         bool parseElement(const QString &filename, QXmlStreamReader &xml)
0845         {
0846             line = xml.lineNumber();
0847 
0848             bool success = true;
0849 
0850             for (auto &attr : xml.attributes()) {
0851                 Parser parser{filename, xml, attr, success};
0852                 XmlBool noIndentationBasedFolding{};
0853 
0854                 // clang-format off
0855                 const bool isExtracted = parser.extractString(name, QStringLiteral("name"))
0856                     || parser.extractString(attribute, QStringLiteral("attribute"))
0857                     || parser.extractString(lineEndContext.name, QStringLiteral("lineEndContext"))
0858                     || parser.extractString(lineEmptyContext.name, QStringLiteral("lineEmptyContext"))
0859                     || parser.extractString(fallthroughContext.name, QStringLiteral("fallthroughContext"))
0860                     || parser.extractXmlBool(dynamic, QStringLiteral("dynamic"))
0861                     || parser.extractXmlBool(fallthrough, QStringLiteral("fallthrough"))
0862                     || parser.extractXmlBool(stopEmptyLineContextSwitchLoop, QStringLiteral("stopEmptyLineContextSwitchLoop"))
0863                     || parser.extractXmlBool(noIndentationBasedFolding, QStringLiteral("noIndentationBasedFolding"));
0864                 // clang-format on
0865 
0866                 success = parser.checkIfExtracted(isExtracted);
0867             }
0868 
0869             if (name.isEmpty()) {
0870                 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: name";
0871                 success = false;
0872             }
0873 
0874             if (attribute.isEmpty()) {
0875                 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: attribute";
0876                 success = false;
0877             }
0878 
0879             return success;
0880         }
0881     };
0882 
0883     struct Version {
0884         int majorRevision;
0885         int minorRevision;
0886 
0887         Version(int majorRevision = 0, int minorRevision = 0)
0888             : majorRevision(majorRevision)
0889             , minorRevision(minorRevision)
0890         {
0891         }
0892 
0893         bool operator<(const Version &version) const
0894         {
0895             return majorRevision < version.majorRevision || (majorRevision == version.majorRevision && minorRevision < version.minorRevision);
0896         }
0897     };
0898 
0899     struct ItemDatas {
0900         struct Style {
0901             QString name;
0902             int line;
0903 
0904             friend size_t qHash(const Style &style, size_t seed = 0)
0905             {
0906                 return qHash(style.name, seed);
0907             }
0908 
0909             friend bool operator==(const Style &style0, const Style &style1)
0910             {
0911                 return style0.name == style1.name;
0912             }
0913         };
0914 
0915         QSet<Style> styleNames;
0916 
0917         bool parseElement(const QString &filename, QXmlStreamReader &xml)
0918         {
0919             bool success = true;
0920 
0921             QString name;
0922             QString defStyleNum;
0923             XmlBool boolean;
0924 
0925             for (auto &attr : xml.attributes()) {
0926                 Parser parser{filename, xml, attr, success};
0927 
0928                 const bool isExtracted = parser.extractString(name, QStringLiteral("name")) || parser.extractString(defStyleNum, QStringLiteral("defStyleNum"))
0929                     || parser.extractXmlBool(boolean, QStringLiteral("bold")) || parser.extractXmlBool(boolean, QStringLiteral("italic"))
0930                     || parser.extractXmlBool(boolean, QStringLiteral("underline")) || parser.extractXmlBool(boolean, QStringLiteral("strikeOut"))
0931                     || parser.extractXmlBool(boolean, QStringLiteral("spellChecking")) || parser.checkColor(QStringLiteral("color"))
0932                     || parser.checkColor(QStringLiteral("selColor")) || parser.checkColor(QStringLiteral("backgroundColor"))
0933                     || parser.checkColor(QStringLiteral("selBackgroundColor"));
0934 
0935                 success = parser.checkIfExtracted(isExtracted);
0936             }
0937 
0938             if (!name.isEmpty()) {
0939                 const auto len = styleNames.size();
0940                 styleNames.insert({name, int(xml.lineNumber())});
0941                 if (len == styleNames.size()) {
0942                     qWarning() << filename << "line" << xml.lineNumber() << "itemData duplicate:" << name;
0943                     success = false;
0944                 }
0945             }
0946 
0947             return success;
0948         }
0949     };
0950 
0951     struct Definition {
0952         QMap<QString, Keywords> keywordsList;
0953         QMap<QString, Context> contexts;
0954         ItemDatas itemDatas;
0955         QString firstContextName;
0956         const Context *firstContext = nullptr;
0957         QString filename;
0958         WordDelimiters wordDelimiters;
0959         Version kateVersion{};
0960         QString kateVersionStr;
0961         QString languageName;
0962         QSet<const Definition *> referencedDefinitions;
0963 
0964         // Parse <keywords ...>
0965         bool parseKeywords(QXmlStreamReader &xml)
0966         {
0967             wordDelimiters.append(xml.attributes().value(QStringLiteral("additionalDeliminator")));
0968             wordDelimiters.remove(xml.attributes().value(QStringLiteral("weakDeliminator")));
0969             return true;
0970         }
0971     };
0972 
0973     // Parse <context>
0974     void processContextElement(QXmlStreamReader &xml)
0975     {
0976         Context context;
0977         m_success = context.parseElement(m_currentDefinition->filename, xml) && m_success;
0978         if (m_currentDefinition->firstContextName.isEmpty()) {
0979             m_currentDefinition->firstContextName = context.name;
0980         }
0981         if (m_currentDefinition->contexts.contains(context.name)) {
0982             qWarning() << m_currentDefinition->filename << "line" << xml.lineNumber() << "duplicate context:" << context.name;
0983             m_success = false;
0984         }
0985         m_currentContext = &*m_currentDefinition->contexts.insert(context.name, context);
0986     }
0987 
0988     // Parse <list name="...">
0989     void processListElement(QXmlStreamReader &xml)
0990     {
0991         Keywords keywords;
0992         m_success = keywords.parseElement(m_currentDefinition->filename, xml) && m_success;
0993         if (m_currentDefinition->keywordsList.contains(keywords.name)) {
0994             qWarning() << m_currentDefinition->filename << "line" << xml.lineNumber() << "duplicate list:" << keywords.name;
0995             m_success = false;
0996         }
0997         m_currentKeywords = &*m_currentDefinition->keywordsList.insert(keywords.name, keywords);
0998     }
0999 
1000     const Definition *maxKateVersionDefinition(const Definition &definition, QMap<const Definition *, const Definition *> &maxVersionByDefinitions) const
1001     {
1002         auto it = maxVersionByDefinitions.find(&definition);
1003         if (it != maxVersionByDefinitions.end()) {
1004             return it.value();
1005         } else {
1006             auto it = maxVersionByDefinitions.insert(&definition, &definition);
1007             for (const auto &referencedDef : definition.referencedDefinitions) {
1008                 auto *maxDef = maxKateVersionDefinition(*referencedDef, maxVersionByDefinitions);
1009                 if (it.value()->kateVersion < maxDef->kateVersion) {
1010                     it.value() = maxDef;
1011                 }
1012             }
1013             return it.value();
1014         }
1015     }
1016 
1017     // Initialize the referenced rules (Rule::includedRules)
1018     void resolveIncludeRules()
1019     {
1020         QSet<const Context *> usedContexts;
1021         QList<const Context *> contexts;
1022 
1023         QMutableMapIterator<QString, Definition> def(m_definitions);
1024         while (def.hasNext()) {
1025             def.next();
1026             auto &definition = def.value();
1027             QMutableMapIterator<QString, Context> contextIt(definition.contexts);
1028             while (contextIt.hasNext()) {
1029                 contextIt.next();
1030                 auto &currentContext = contextIt.value();
1031                 for (auto &rule : currentContext.rules) {
1032                     if (rule.type != Context::Rule::Type::IncludeRules) {
1033                         continue;
1034                     }
1035 
1036                     if (rule.context.stay) {
1037                         qWarning() << definition.filename << "line" << rule.line << "IncludeRules refers to himself";
1038                         m_success = false;
1039                         continue;
1040                     }
1041 
1042                     if (rule.context.popCount) {
1043                         qWarning() << definition.filename << "line" << rule.line << "IncludeRules with #pop prefix";
1044                         m_success = false;
1045                     }
1046 
1047                     if (!rule.context.context) {
1048                         m_success = false;
1049                         continue;
1050                     }
1051 
1052                     // resolve includedRules and includedIncludeRules
1053 
1054                     usedContexts.clear();
1055                     usedContexts.insert(rule.context.context);
1056                     contexts.clear();
1057                     contexts.append(rule.context.context);
1058 
1059                     for (int i = 0; i < contexts.size(); ++i) {
1060                         currentContext.hasDynamicRule = contexts[i]->hasDynamicRule;
1061                         for (const auto &includedRule : contexts[i]->rules) {
1062                             if (includedRule.type != Context::Rule::Type::IncludeRules) {
1063                                 rule.includedRules.append(&includedRule);
1064                             } else if (&rule == &includedRule) {
1065                                 qWarning() << definition.filename << "line" << rule.line << "IncludeRules refers to himself by recursivity";
1066                                 m_success = false;
1067                             } else {
1068                                 rule.includedIncludeRules.insert(&includedRule);
1069 
1070                                 if (includedRule.includedRules.isEmpty()) {
1071                                     const auto *context = includedRule.context.context;
1072                                     if (context && !usedContexts.contains(context)) {
1073                                         contexts.append(context);
1074                                         usedContexts.insert(context);
1075                                     }
1076                                 } else {
1077                                     rule.includedRules.append(includedRule.includedRules);
1078                                 }
1079                             }
1080                         }
1081                     }
1082                 }
1083             }
1084         }
1085     }
1086 
1087     //! Recursively extracts the contexts used from the first context of the definitions.
1088     //! This method detects groups of contexts which are only used among themselves.
1089     QSet<const Context *> extractUsedContexts() const
1090     {
1091         QSet<const Context *> usedContexts;
1092         QList<const Context *> contexts;
1093 
1094         QMapIterator<QString, Definition> def(m_definitions);
1095         while (def.hasNext()) {
1096             def.next();
1097             const auto &definition = def.value();
1098 
1099             if (definition.firstContext) {
1100                 usedContexts.insert(definition.firstContext);
1101                 contexts.clear();
1102                 contexts.append(definition.firstContext);
1103 
1104                 for (int i = 0; i < contexts.size(); ++i) {
1105                     auto appendContext = [&](const Context *context) {
1106                         if (context && !usedContexts.contains(context)) {
1107                             contexts.append(context);
1108                             usedContexts.insert(context);
1109                         }
1110                     };
1111 
1112                     const auto *context = contexts[i];
1113                     appendContext(context->lineEndContext.context);
1114                     appendContext(context->lineEmptyContext.context);
1115                     appendContext(context->fallthroughContext.context);
1116 
1117                     for (auto &rule : context->rules) {
1118                         appendContext(rule.context.context);
1119                     }
1120                 }
1121             }
1122         }
1123 
1124         return usedContexts;
1125     }
1126 
1127     struct RuleAndInclude {
1128         const Context::Rule *rule;
1129         const Context::Rule *includeRules;
1130 
1131         explicit operator bool() const
1132         {
1133             return rule;
1134         }
1135     };
1136 
1137     struct IncludedRuleUnreachableBy {
1138         QList<RuleAndInclude> unreachableBy;
1139         bool alwaysUnreachable = true;
1140     };
1141 
1142     //! Check contexts and rules
1143     bool checkContexts(const Definition &definition,
1144                        QSet<ItemDatas::Style> &usedAttributeNames,
1145                        QSet<ItemDatas::Style> &ignoredAttributeNames,
1146                        const QSet<const Context *> &usedContexts,
1147                        QMap<const Context::Rule *, IncludedRuleUnreachableBy> &unreachableIncludedRules) const
1148     {
1149         bool success = true;
1150 
1151         QMapIterator<QString, Context> contextIt(definition.contexts);
1152         while (contextIt.hasNext()) {
1153             contextIt.next();
1154 
1155             const auto &context = contextIt.value();
1156             const auto &filename = definition.filename;
1157 
1158             if (!usedContexts.contains(&context)) {
1159                 qWarning() << filename << "line" << context.line << "unused context:" << context.name;
1160                 success = false;
1161                 continue;
1162             }
1163 
1164             if (context.name.startsWith(QStringLiteral("#pop"))) {
1165                 qWarning() << filename << "line" << context.line << "the context name must not start with '#pop':" << context.name;
1166                 success = false;
1167             }
1168 
1169             if (!context.attribute.isEmpty() && (!context.isOnlyIncluded || context.referencedWithIncludeAttrib)) {
1170                 usedAttributeNames.insert({context.attribute, context.line});
1171             }
1172 
1173             success = checkContextAttribute(definition, context) && success;
1174             success = checkUreachableRules(definition.filename, context, unreachableIncludedRules) && success;
1175             success = suggestRuleMerger(definition.filename, context) && success;
1176 
1177             for (const auto &rule : context.rules) {
1178                 if (!rule.attribute.isEmpty()) {
1179                     if (rule.lookAhead != XmlBool::True) {
1180                         usedAttributeNames.insert({rule.attribute, rule.line});
1181                     } else {
1182                         ignoredAttributeNames.insert({rule.attribute, rule.line});
1183                     }
1184                 }
1185                 success = checkLookAhead(rule) && success;
1186                 success = checkStringDetect(rule) && success;
1187                 success = checkKeyword(definition, rule) && success;
1188                 success = checkRegExpr(filename, rule, context) && success;
1189                 success = checkDelimiters(definition, rule) && success;
1190             }
1191         }
1192 
1193         return success;
1194     }
1195 
1196     //! Check that a regular expression in a RegExpr rule:
1197     //! - isValid()
1198     //! - character ranges such as [A-Z] are valid and not accidentally e.g. [A-z].
1199     //! - dynamic=true but no place holder used?
1200     //! - is not . with lookAhead="1"
1201     //! - is not ^... without column ou firstNonSpace attribute
1202     //! - is not equivalent to DetectSpaces, DetectChar, Detect2Chars, StringDetect, DetectIdentifier, RangeDetect, LineContinue or AnyChar
1203     //! - has no unused captures
1204     //! - has no unnecessary quantifier with lookAhead
1205     bool checkRegExpr(const QString &filename, const Context::Rule &rule, const Context &context) const
1206     {
1207         // ignore empty regex because the error is raised during xml parsing
1208         if (rule.type == Context::Rule::Type::RegExpr && !rule.string.isEmpty()) {
1209             const QRegularExpression regexp(rule.string);
1210             if (!checkRegularExpression(rule.filename, regexp, rule.line)) {
1211                 return false;
1212             }
1213 
1214             // dynamic == true and no place holder?
1215             if (rule.dynamic == XmlBool::True) {
1216                 static const QRegularExpression placeHolder(QStringLiteral("%\\d+"));
1217                 if (!rule.string.contains(placeHolder)) {
1218                     qWarning() << rule.filename << "line" << rule.line << "broken regex:" << rule.string << "problem: dynamic=true but no %\\d+ placeholder";
1219                     return false;
1220                 }
1221             }
1222 
1223             auto reg = (rule.lookAhead == XmlBool::True) ? rule.sanitizedString : rule.string;
1224             if (rule.lookAhead == XmlBool::True) {
1225                 static const QRegularExpression removeAllSuffix(QStringLiteral(
1226                     R"(((?<!\\)\\(?:[DSWdsw]|x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\}|0\d\d|o\{[0-7]+\}|u[0-9a-fA-F]{4})|(?<!\\)[^])}\\]|(?=\\)\\\\)[*][?+]?$)"));
1227                 reg.replace(removeAllSuffix, QString());
1228             }
1229 
1230             reg.replace(QStringLiteral("{1}"), QString());
1231 
1232             // is DetectSpaces
1233             // optional ^ then \s, [\s], [\t ], [ \t] possibly in (...) or (?:...) followed by *, +
1234             static const QRegularExpression isDetectSpaces(
1235                 QStringLiteral(R"(^\^?(?:\((?:\?:)?)?\^?(?:\\s|\[(?:\\s| (?:\t|\\t)|(?:\t|\\t) )\])\)?(?:[*+][*+?]?|[*+])?\)?\)?$)"));
1236             if (rule.string.contains(isDetectSpaces)) {
1237                 char const *extraMsg = rule.string.contains(QLatin1Char('^')) ? "+ column=\"0\" or firstNonSpace=\"1\"" : "";
1238                 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by DetectSpaces / DetectChar / AnyChar" << extraMsg << ":"
1239                            << rule.string;
1240                 return false;
1241             }
1242 
1243 #define REG_ESCAPE_CHAR R"(\\(?:[^0BDPSWbdpswoux]|x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\}|0\d\d|o\{[0-7]+\}|u[0-9a-fA-F]{4}))"
1244 #define REG_CHAR "(?:" REG_ESCAPE_CHAR "|\\[(?:" REG_ESCAPE_CHAR "|.)\\]|[^[.^])"
1245 
1246             // is RangeDetect
1247             static const QRegularExpression isRange(QStringLiteral("^\\^?" REG_CHAR "(?:"
1248                                                                    "\\.\\*[?+]?" REG_CHAR "|"
1249                                                                    "\\[\\^(" REG_ESCAPE_CHAR "|.)\\]\\*[?+]?\\1"
1250                                                                    ")$"));
1251             if ((rule.lookAhead == XmlBool::True || rule.minimal == XmlBool::True || rule.string.contains(QStringLiteral(".*?"))
1252                  || rule.string.contains(QStringLiteral("[^")))
1253                 && reg.contains(isRange)) {
1254                 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by RangeDetect:" << rule.string;
1255                 return false;
1256             }
1257 
1258             // is AnyChar
1259             static const QRegularExpression isAnyChar(QStringLiteral(R"(^(\^|\((\?:)?)*\[(?!\^)[-\]]?(\\[^0BDPSWbdpswoux]|[^-\]\\])*\]\)*$)"));
1260             if (rule.string.contains(isAnyChar)) {
1261                 auto extra = (reg[0] == QLatin1Char('^') || reg[1] == QLatin1Char('^')) ? "with column=\"0\"" : "";
1262                 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by AnyChar:" << rule.string << extra;
1263                 return false;
1264             }
1265 
1266             // is LineContinue
1267             static const QRegularExpression isLineContinue(QStringLiteral("^\\^?" REG_CHAR "\\$$"));
1268             if (reg.contains(isLineContinue)) {
1269                 auto extra = (reg[0] == QLatin1Char('^')) ? "with column=\"0\"" : "";
1270                 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by LineContinue:" << rule.string << extra;
1271                 return false;
1272             }
1273 
1274             // replace \c, \xhhh, \x{hhh...}, \0dd, \o{ddd}, \uhhhh, with _
1275             static const QRegularExpression sanitize1(QStringLiteral(REG_ESCAPE_CHAR));
1276             reg.replace(sanitize1, QStringLiteral("_"));
1277 
1278 #undef REG_CHAR
1279 #undef REG_ESCAPE_CHAR
1280 
1281             // use minimal or lazy operator
1282             static const QRegularExpression isMinimal(QStringLiteral("(?![.][*+?][$]?[)]*$)[.][*+?][^?+]"));
1283             static const QRegularExpression hasNotGreedy(QStringLiteral("[*+?][?+]"));
1284 
1285             if (rule.lookAhead == XmlBool::True && rule.minimal != XmlBool::True && reg.contains(isMinimal) && !reg.contains(hasNotGreedy)
1286                 && (!rule.context.context || !rule.context.context->hasDynamicRule || regexp.captureCount() == 0)
1287                 && (reg.back() != QLatin1Char('$') || reg.contains(QLatin1Char('|')))) {
1288                 qWarning() << rule.filename << "line" << rule.line
1289                            << "RegExpr should be have minimal=\"1\" or use lazy operator (i.g, '.*' -> '.*?'):" << rule.string;
1290                 return false;
1291             }
1292 
1293             // replace [:...:] with ___
1294             static const QRegularExpression sanitize2(QStringLiteral(R"(\[:\w+:\])"));
1295             reg.replace(sanitize2, QStringLiteral("___"));
1296 
1297             // replace [ccc...], [special] with ...
1298             static const QRegularExpression sanitize3(QStringLiteral(R"(\[(?:\^\]?[^]]*|\]?[^]\\]*?\\.[^]]*|\][^]]{2,}|[^]]{3,})\]|(\[\]?[^]]*\]))"));
1299             reg.replace(sanitize3, QStringLiteral("...\\1"));
1300 
1301             // replace [c] with _
1302             static const QRegularExpression sanitize4(QStringLiteral(R"(\[.\])"));
1303             reg.replace(sanitize4, QStringLiteral("_"));
1304 
1305             const int len = reg.size();
1306             // replace [cC] with _
1307             static const QRegularExpression toInsensitive(QStringLiteral(R"(\[(?:([^]])\1)\])"));
1308             reg = reg.toUpper();
1309             reg.replace(toInsensitive, QString());
1310 
1311             // is StringDetect
1312             // ignore (?:, ) and {n}
1313             static const QRegularExpression isStringDetect(QStringLiteral(R"(^\^?(?:[^|\\?*+$^[{(.]|{(?!\d+,\d*}|,\d+})|\(\?:)+$)"));
1314             if (reg.contains(isStringDetect)) {
1315                 char const *extraMsg = rule.string.contains(QLatin1Char('^')) ? "+ column=\"0\" or firstNonSpace=\"1\"" : "";
1316                 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by StringDetect / Detect2Chars / DetectChar" << extraMsg
1317                            << ":" << rule.string;
1318                 if (len != reg.size()) {
1319                     qWarning() << rule.filename << "line" << rule.line << "insensitive=\"1\" missing:" << rule.string;
1320                 }
1321                 return false;
1322             }
1323 
1324             // column="0"
1325             if (rule.column == -1) {
1326                 // ^ without |
1327                 // (^sas*) -> ok
1328                 // (^sa|s*) -> ko
1329                 // (^(sa|s*)) -> ok
1330                 auto first = std::as_const(reg).begin();
1331                 auto last = std::as_const(reg).end();
1332                 int depth = 0;
1333 
1334                 while (QLatin1Char('(') == *first) {
1335                     ++depth;
1336                     ++first;
1337                     if (QLatin1Char('?') == *first || QLatin1Char(':') == first[1]) {
1338                         first += 2;
1339                     }
1340                 }
1341 
1342                 if (QLatin1Char('^') == *first) {
1343                     const int bolDepth = depth;
1344                     bool replace = true;
1345 
1346                     while (++first != last) {
1347                         if (QLatin1Char('(') == *first) {
1348                             ++depth;
1349                         } else if (QLatin1Char(')') == *first) {
1350                             --depth;
1351                             if (depth < bolDepth) {
1352                                 // (^a)? === (^a|) -> ko
1353                                 if (first + 1 != last && QStringLiteral("*?").contains(first[1])) {
1354                                     replace = false;
1355                                     break;
1356                                 }
1357                             }
1358                         } else if (QLatin1Char('|') == *first) {
1359                             // ignore '|' within subgroup
1360                             if (depth <= bolDepth) {
1361                                 replace = false;
1362                                 break;
1363                             }
1364                         }
1365                     }
1366 
1367                     if (replace) {
1368                         qWarning() << rule.filename << "line" << rule.line << "column=\"0\" missing with RegExpr:" << rule.string;
1369                         return false;
1370                     }
1371                 }
1372             }
1373 
1374             // add ^ with column=0
1375             if (rule.column == 0 && !rule.isDotRegex) {
1376                 bool hasStartOfLine = false;
1377                 auto first = std::as_const(reg).begin();
1378                 auto last = std::as_const(reg).end();
1379                 for (; first != last; ++first) {
1380                     if (*first == QLatin1Char('^')) {
1381                         hasStartOfLine = true;
1382                         break;
1383                     } else if (*first == QLatin1Char('(')) {
1384                         if (last - first >= 3 && first[1] == QLatin1Char('?') && first[2] == QLatin1Char(':')) {
1385                             first += 2;
1386                         }
1387                     } else {
1388                         break;
1389                     }
1390                 }
1391 
1392                 if (!hasStartOfLine) {
1393                     qWarning() << rule.filename << "line" << rule.line
1394                                << "start of line missing in the pattern with column=\"0\" (i.e. abc -> ^abc):" << rule.string;
1395                     return false;
1396                 }
1397             }
1398 
1399             bool useCapture = false;
1400 
1401             // detection of unnecessary capture
1402             if (regexp.captureCount()) {
1403                 auto maximalCapture = [](const QString(&referenceNames)[9], const QString &s) {
1404                     int maxCapture = 9;
1405                     while (maxCapture && !s.contains(referenceNames[maxCapture - 1])) {
1406                         --maxCapture;
1407                     }
1408                     return maxCapture;
1409                 };
1410 
1411                 int maxCaptureUsed = 0;
1412                 // maximal dynamic reference
1413                 if (rule.context.context && !rule.context.stay) {
1414                     for (const auto &nextRule : rule.context.context->rules) {
1415                         if (nextRule.dynamic == XmlBool::True) {
1416                             static const QString cap[]{
1417                                 QStringLiteral("%1"),
1418                                 QStringLiteral("%2"),
1419                                 QStringLiteral("%3"),
1420                                 QStringLiteral("%4"),
1421                                 QStringLiteral("%5"),
1422                                 QStringLiteral("%6"),
1423                                 QStringLiteral("%7"),
1424                                 QStringLiteral("%8"),
1425                                 QStringLiteral("%9"),
1426                             };
1427                             int maxDynamicCapture = maximalCapture(cap, nextRule.string);
1428                             maxCaptureUsed = std::max(maxCaptureUsed, maxDynamicCapture);
1429                         }
1430                     }
1431                 }
1432 
1433                 static const QString num1[]{
1434                     QStringLiteral("\\1"),
1435                     QStringLiteral("\\2"),
1436                     QStringLiteral("\\3"),
1437                     QStringLiteral("\\4"),
1438                     QStringLiteral("\\5"),
1439                     QStringLiteral("\\6"),
1440                     QStringLiteral("\\7"),
1441                     QStringLiteral("\\8"),
1442                     QStringLiteral("\\9"),
1443                 };
1444                 static const QString num2[]{
1445                     QStringLiteral("\\g1"),
1446                     QStringLiteral("\\g2"),
1447                     QStringLiteral("\\g3"),
1448                     QStringLiteral("\\g4"),
1449                     QStringLiteral("\\g5"),
1450                     QStringLiteral("\\g6"),
1451                     QStringLiteral("\\g7"),
1452                     QStringLiteral("\\g8"),
1453                     QStringLiteral("\\g9"),
1454                 };
1455                 const int maxBackReference = std::max(maximalCapture(num1, rule.string), maximalCapture(num1, rule.string));
1456 
1457                 const int maxCapture = std::max(maxCaptureUsed, maxBackReference);
1458 
1459                 if (maxCapture && regexp.captureCount() > maxCapture) {
1460                     qWarning() << rule.filename << "line" << rule.line << "RegExpr with" << regexp.captureCount() << "captures but only" << maxCapture
1461                                << "are used. Please, replace '(...)' with '(?:...)':" << rule.string;
1462                     return false;
1463                 }
1464 
1465                 useCapture = maxCapture;
1466             }
1467 
1468             if (!useCapture) {
1469                 // is DetectIdentifier
1470                 static const QRegularExpression isDetectIdentifier(
1471                     QStringLiteral(R"(^(\((\?:)?|\^)*\[(\\p\{L\}|_){2}\]([+][?+]?)?\[(\\p\{N\}|\\p\{L\}|_){3}\][*][?+]?\)*$)"));
1472                 if (rule.string.contains(isDetectIdentifier)) {
1473                     qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by DetectIdentifier:" << rule.string;
1474                     return false;
1475                 }
1476             }
1477 
1478             if (rule.isDotRegex) {
1479                 // search next rule with same column or firstNonSpace
1480                 int i = &rule - context.rules.data() + 1;
1481                 const bool hasColumn = (rule.column != -1);
1482                 const bool hasFirstNonSpace = (rule.firstNonSpace == XmlBool::True);
1483                 const bool isSpecial = (hasColumn || hasFirstNonSpace);
1484                 for (; i < context.rules.size(); ++i) {
1485                     auto &rule2 = context.rules[i];
1486                     if (rule2.type == Context::Rule::Type::IncludeRules && isSpecial) {
1487                         i = context.rules.size();
1488                         break;
1489                     }
1490 
1491                     const bool hasColumn2 = (rule2.column != -1);
1492                     const bool hasFirstNonSpace2 = (rule2.firstNonSpace == XmlBool::True);
1493                     if ((!isSpecial && !hasColumn2 && !hasFirstNonSpace2) || (hasColumn && rule.column == rule2.column)
1494                         || (hasFirstNonSpace && hasFirstNonSpace2)) {
1495                         break;
1496                     }
1497                 }
1498 
1499                 auto ruleFilename = (filename == rule.filename) ? QString() : QStringLiteral("in ") + rule.filename;
1500                 if (i == context.rules.size()) {
1501                     if (rule.lookAhead == XmlBool::True && rule.firstNonSpace != XmlBool::True && rule.column == -1 && rule.beginRegion.isEmpty()
1502                         && rule.endRegion.isEmpty() && !useCapture) {
1503                         qWarning() << filename << "context line" << context.line << ": RegExpr line" << rule.line << ruleFilename
1504                                    << "should be replaced by fallthroughContext:" << rule.string;
1505                     }
1506                 } else {
1507                     auto &nextRule = context.rules[i];
1508                     auto nextRuleFilename = (filename == nextRule.filename) ? QString() : QStringLiteral("in ") + nextRule.filename;
1509                     qWarning() << filename << "context line" << context.line << "contains unreachable element line" << nextRule.line << nextRuleFilename
1510                                << "because a dot RegExpr is used line" << rule.line << ruleFilename;
1511                 }
1512 
1513                 // unnecessary quantifier
1514                 static const QRegularExpression unnecessaryQuantifier1(QStringLiteral(R"([*+?]([.][*+?]{0,2})?$)"));
1515                 static const QRegularExpression unnecessaryQuantifier2(QStringLiteral(R"([*+?]([.][*+?]{0,2})?[)]*$)"));
1516                 auto &unnecessaryQuantifier = useCapture ? unnecessaryQuantifier1 : unnecessaryQuantifier2;
1517                 if (rule.lookAhead == XmlBool::True && rule.minimal != XmlBool::True && reg.contains(unnecessaryQuantifier)) {
1518                     qWarning() << rule.filename << "line" << rule.line
1519                                << "Last quantifier is not necessary (i.g., 'xyz*' -> 'xy', 'xyz+.' -> 'xyz.'):" << rule.string;
1520                     return false;
1521                 }
1522             }
1523         }
1524 
1525         return true;
1526     }
1527 
1528     // Parse and check <emptyLine>
1529     bool parseEmptyLine(const QString &filename, QXmlStreamReader &xml)
1530     {
1531         bool success = true;
1532 
1533         QString pattern;
1534         XmlBool casesensitive{};
1535 
1536         for (auto &attr : xml.attributes()) {
1537             Parser parser{filename, xml, attr, success};
1538 
1539             const bool isExtracted =
1540                 parser.extractString(pattern, QStringLiteral("regexpr")) || parser.extractXmlBool(casesensitive, QStringLiteral("casesensitive"));
1541 
1542             success = parser.checkIfExtracted(isExtracted);
1543         }
1544 
1545         if (pattern.isEmpty()) {
1546             qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: regexpr";
1547             success = false;
1548         } else {
1549             success = checkRegularExpression(filename, QRegularExpression(pattern), xml.lineNumber());
1550         }
1551 
1552         return success;
1553     }
1554 
1555     //! Check that a regular expression:
1556     //! - isValid()
1557     //! - character ranges such as [A-Z] are valid and not accidentally e.g. [A-z].
1558     bool checkRegularExpression(const QString &filename, const QRegularExpression &regexp, int line) const
1559     {
1560         const auto pattern = regexp.pattern();
1561 
1562         // validate regexp
1563         if (!regexp.isValid()) {
1564             qWarning() << filename << "line" << line << "broken regex:" << pattern << "problem:" << regexp.errorString() << "at offset"
1565                        << regexp.patternErrorOffset();
1566             return false;
1567         }
1568 
1569         // catch possible case typos: [A-z] or [a-Z]
1570         const int azOffset = std::max(pattern.indexOf(QStringLiteral("A-z")), pattern.indexOf(QStringLiteral("a-Z")));
1571         if (azOffset >= 0) {
1572             qWarning() << filename << "line" << line << "broken regex:" << pattern << "problem: [a-Z] or [A-z] at offset" << azOffset;
1573             return false;
1574         }
1575 
1576         return true;
1577     }
1578 
1579     //! Check fallthrough and fallthroughContext.
1580     //! Check kateversion for stopEmptyLineContextSwitchLoop.
1581     bool checkContextAttribute(const Definition &definition, const Context &context) const
1582     {
1583         bool success = true;
1584 
1585         if (!context.fallthroughContext.name.isEmpty()) {
1586             const bool mandatoryFallthroughAttribute = definition.kateVersion < Version{5, 62};
1587             if (context.fallthrough == XmlBool::True && !mandatoryFallthroughAttribute) {
1588                 qWarning() << definition.filename << "line" << context.line << "fallthrough attribute is unnecessary with kateversion >= 5.62 in context"
1589                            << context.name;
1590                 success = false;
1591             } else if (context.fallthrough != XmlBool::True && mandatoryFallthroughAttribute) {
1592                 qWarning() << definition.filename << "line" << context.line
1593                            << "fallthroughContext attribute without fallthrough=\"1\" attribute is only valid with kateversion >= 5.62 in context"
1594                            << context.name;
1595                 success = false;
1596             }
1597         }
1598 
1599         if (context.stopEmptyLineContextSwitchLoop != XmlBool::Unspecified && definition.kateVersion < Version{5, 103}) {
1600             qWarning() << definition.filename << "line" << context.line
1601                        << "stopEmptyLineContextSwitchLoop attribute is only valid with kateversion >= 5.103 in context" << context.name;
1602             success = false;
1603         }
1604 
1605         return success;
1606     }
1607 
1608     //! Search for additionalDeliminator/weakDeliminator which has no effect.
1609     bool checkDelimiters(const Definition &definition, const Context::Rule &rule) const
1610     {
1611         if (rule.additionalDeliminator.isEmpty() && rule.weakDeliminator.isEmpty()) {
1612             return true;
1613         }
1614 
1615         bool success = true;
1616 
1617         if (definition.kateVersion < Version{5, 79}) {
1618             qWarning() << definition.filename << "line" << rule.line
1619                        << "additionalDeliminator and weakDeliminator are only available since version \"5.79\". Please, increase kateversion.";
1620             success = false;
1621         }
1622 
1623         for (QChar c : rule.additionalDeliminator) {
1624             if (!definition.wordDelimiters.contains(c)) {
1625                 return success;
1626             }
1627         }
1628 
1629         for (QChar c : rule.weakDeliminator) {
1630             if (definition.wordDelimiters.contains(c)) {
1631                 return success;
1632             }
1633         }
1634 
1635         qWarning() << rule.filename << "line" << rule.line << "unnecessary use of additionalDeliminator and/or weakDeliminator" << rule.string;
1636         return false;
1637     }
1638 
1639     //! Check that keyword rule reference an existing keyword list.
1640     bool checkKeyword(const Definition &definition, const Context::Rule &rule) const
1641     {
1642         if (rule.type == Context::Rule::Type::keyword) {
1643             auto it = definition.keywordsList.find(rule.string);
1644             if (it == definition.keywordsList.end()) {
1645                 qWarning() << rule.filename << "line" << rule.line << "reference of non-existing keyword list:" << rule.string;
1646                 return false;
1647             }
1648         }
1649         return true;
1650     }
1651 
1652     //! Search for rules with lookAhead="true" and context="#stay".
1653     //! This would cause an infinite loop.
1654     bool checkLookAhead(const Context::Rule &rule) const
1655     {
1656         if (rule.lookAhead == XmlBool::True && rule.context.stay) {
1657             qWarning() << rule.filename << "line" << rule.line << "infinite loop: lookAhead with context #stay";
1658         }
1659         return true;
1660     }
1661 
1662     //! Check that StringDetect contains a placeHolder when dynamic="1"
1663     bool checkStringDetect(const Context::Rule &rule) const
1664     {
1665         if (rule.type == Context::Rule::Type::StringDetect) {
1666             // dynamic == true and no place holder?
1667             if (rule.dynamic == XmlBool::True) {
1668                 static const QRegularExpression placeHolder(QStringLiteral("%\\d+"));
1669                 if (!rule.string.contains(placeHolder)) {
1670                     qWarning() << rule.filename << "line" << rule.line << "broken regex:" << rule.string << "problem: dynamic=true but no %\\d+ placeholder";
1671                     return false;
1672                 }
1673             }
1674         }
1675         return true;
1676     }
1677 
1678     //! Check \<include> and delimiter in a keyword list
1679     bool checkKeywordsList(const Definition &definition) const
1680     {
1681         bool success = true;
1682 
1683         bool includeNotSupport = (definition.kateVersion < Version{5, 53});
1684         QMapIterator<QString, Keywords> keywordsIt(definition.keywordsList);
1685         while (keywordsIt.hasNext()) {
1686             keywordsIt.next();
1687 
1688             for (const auto &include : keywordsIt.value().items.includes) {
1689                 if (includeNotSupport) {
1690                     qWarning() << definition.filename << "line" << include.line
1691                                << "<include> is only available since version \"5.53\". Please, increase kateversion.";
1692                     success = false;
1693                 }
1694                 success = checkKeywordInclude(definition, include) && success;
1695             }
1696 
1697             // Check that keyword list items do not have deliminator character
1698 #if 0
1699             for (const auto& keyword : keywordsIt.value().items.keywords) {
1700                 for (QChar c : keyword.content) {
1701                     if (definition.wordDelimiters.contains(c)) {
1702                         qWarning() << definition.filename << "line" << keyword.line << "keyword with delimiter:" << c << "in" << keyword.content;
1703                         success = false;
1704                     }
1705                 }
1706             }
1707 #endif
1708         }
1709 
1710         return success;
1711     }
1712 
1713     //! Search for non-existing keyword include.
1714     bool checkKeywordInclude(const Definition &definition, const Keywords::Items::Item &include) const
1715     {
1716         bool containsKeywordName = true;
1717         int const idx = include.content.indexOf(QStringLiteral("##"));
1718         if (idx == -1) {
1719             auto it = definition.keywordsList.find(include.content);
1720             containsKeywordName = (it != definition.keywordsList.end());
1721         } else {
1722             auto defName = include.content.mid(idx + 2);
1723             auto listName = include.content.left(idx);
1724             auto it = m_definitions.find(defName);
1725             if (it == m_definitions.end()) {
1726                 qWarning() << definition.filename << "line" << include.line << "unknown definition in" << include.content;
1727                 return false;
1728             }
1729             containsKeywordName = it->keywordsList.contains(listName);
1730         }
1731 
1732         if (!containsKeywordName) {
1733             qWarning() << definition.filename << "line" << include.line << "unknown keyword name in" << include.content;
1734         }
1735 
1736         return containsKeywordName;
1737     }
1738 
1739     //! Check if a rule is hidden by another
1740     //! - rule hidden by DetectChar or AnyChar
1741     //! - DetectSpaces, AnyChar, Int, Float with all their characters hidden by DetectChar or AnyChar
1742     //! - StringDetect, WordDetect, RegExpr with as prefix Detect2Chars or other strings
1743     //! - duplicate rule (Int, Float, keyword with same String, etc)
1744     //! - Rule hidden by a dot regex
1745     bool checkUreachableRules(const QString &filename,
1746                               const Context &context,
1747                               QMap<const Context::Rule *, IncludedRuleUnreachableBy> &unreachableIncludedRules) const
1748     {
1749         if (context.isOnlyIncluded) {
1750             return true;
1751         }
1752 
1753         struct Rule4 {
1754             RuleAndInclude setRule(const Context::Rule &rule, const Context::Rule *includeRules = nullptr)
1755             {
1756                 auto set = [&](RuleAndInclude &ruleAndInclude) {
1757                     auto old = ruleAndInclude;
1758                     ruleAndInclude = {&rule, includeRules};
1759                     return old;
1760                 };
1761 
1762                 if (rule.firstNonSpace == XmlBool::True) {
1763                     return set(firstNonSpace);
1764                 } else if (rule.column == 0) {
1765                     return set(column0);
1766                 } else if (rule.column > 0) {
1767                     return set(columnGreaterThan0[rule.column]);
1768                 } else {
1769                     return set(normal);
1770                 }
1771             }
1772 
1773         private:
1774             RuleAndInclude normal;
1775             RuleAndInclude column0;
1776             QMap<int, RuleAndInclude> columnGreaterThan0;
1777             RuleAndInclude firstNonSpace;
1778         };
1779 
1780         // Associate QChar with RuleAndInclude
1781         struct CharTable {
1782             /// Search RuleAndInclude associated with @p c.
1783             RuleAndInclude find(QChar c) const
1784             {
1785                 if (c.unicode() < 128) {
1786                     return m_asciiMap[c.unicode()];
1787                 }
1788                 auto it = m_utf8Map.find(c);
1789                 return it == m_utf8Map.end() ? RuleAndInclude{nullptr, nullptr} : it.value();
1790             }
1791 
1792             /// Search RuleAndInclude associated with the characters of @p s.
1793             /// \return an empty QList when at least one character is not found.
1794             QList<RuleAndInclude> find(QStringView s) const
1795             {
1796                 QList<RuleAndInclude> result;
1797 
1798                 for (QChar c : s) {
1799                     if (!find(c)) {
1800                         return result;
1801                     }
1802                 }
1803 
1804                 for (QChar c : s) {
1805                     result.append(find(c));
1806                 }
1807 
1808                 return result;
1809             }
1810 
1811             /// Associates @p c with a rule.
1812             void append(QChar c, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
1813             {
1814                 if (c.unicode() < 128) {
1815                     m_asciiMap[c.unicode()] = {&rule, includeRule};
1816                 } else {
1817                     m_utf8Map[c] = {&rule, includeRule};
1818                 }
1819             }
1820 
1821             /// Associates each character of @p s with a rule.
1822             void append(QStringView s, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
1823             {
1824                 for (QChar c : s) {
1825                     append(c, rule, includeRule);
1826                 }
1827             }
1828 
1829         private:
1830             RuleAndInclude m_asciiMap[127]{};
1831             QMap<QChar, RuleAndInclude> m_utf8Map;
1832         };
1833 
1834         struct Char4Tables {
1835             CharTable chars;
1836             CharTable charsColumn0;
1837             QMap<int, CharTable> charsColumnGreaterThan0;
1838             CharTable charsFirstNonSpace;
1839         };
1840 
1841         // View on Char4Tables members
1842         struct CharTableArray {
1843             // Append Char4Tables members that satisfies firstNonSpace and column.
1844             // Char4Tables::char is always added.
1845             CharTableArray(Char4Tables &tables, const Context::Rule &rule)
1846             {
1847                 if (rule.firstNonSpace == XmlBool::True) {
1848                     appendTable(tables.charsFirstNonSpace);
1849                 }
1850 
1851                 if (rule.column == 0) {
1852                     appendTable(tables.charsColumn0);
1853                 } else if (rule.column > 0) {
1854                     appendTable(tables.charsColumnGreaterThan0[rule.column]);
1855                 }
1856 
1857                 appendTable(tables.chars);
1858             }
1859 
1860             // Removes Char4Tables::chars when the rule contains firstNonSpace or column
1861             void removeNonSpecialWhenSpecial()
1862             {
1863                 if (m_size > 1) {
1864                     --m_size;
1865                 }
1866             }
1867 
1868             /// Search RuleAndInclude associated with @p c.
1869             RuleAndInclude find(QChar c) const
1870             {
1871                 for (int i = 0; i < m_size; ++i) {
1872                     if (auto ruleAndInclude = m_charTables[i]->find(c)) {
1873                         return ruleAndInclude;
1874                     }
1875                 }
1876                 return RuleAndInclude{nullptr, nullptr};
1877             }
1878 
1879             /// Search RuleAndInclude associated with the characters of @p s.
1880             /// \return an empty QList when at least one character is not found.
1881             QList<RuleAndInclude> find(QStringView s) const
1882             {
1883                 for (int i = 0; i < m_size; ++i) {
1884                     auto result = m_charTables[i]->find(s);
1885                     if (result.size()) {
1886                         while (++i < m_size) {
1887                             result.append(m_charTables[i]->find(s));
1888                         }
1889                         return result;
1890                     }
1891                 }
1892                 return QList<RuleAndInclude>();
1893             }
1894 
1895             /// Associates @p c with a rule.
1896             void append(QChar c, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
1897             {
1898                 for (int i = 0; i < m_size; ++i) {
1899                     m_charTables[i]->append(c, rule, includeRule);
1900                 }
1901             }
1902 
1903             /// Associates each character of @p s with a rule.
1904             void append(QStringView s, const Context::Rule &rule, const Context::Rule *includeRule = nullptr)
1905             {
1906                 for (int i = 0; i < m_size; ++i) {
1907                     m_charTables[i]->append(s, rule, includeRule);
1908                 }
1909             }
1910 
1911         private:
1912             void appendTable(CharTable &t)
1913             {
1914                 m_charTables[m_size] = &t;
1915                 ++m_size;
1916             }
1917 
1918             CharTable *m_charTables[3];
1919             int m_size = 0;
1920         };
1921 
1922         struct ObservableRule {
1923             const Context::Rule *rule;
1924             const Context::Rule *includeRules;
1925 
1926             bool hasResolvedIncludeRules() const
1927             {
1928                 return rule == includeRules;
1929             }
1930         };
1931 
1932         // Iterates over all the rules, including those in includedRules
1933         struct RuleIterator {
1934             RuleIterator(const QList<ObservableRule> &rules, const ObservableRule &endRule)
1935                 : m_end(&endRule - rules.data())
1936                 , m_rules(rules)
1937             {
1938             }
1939 
1940             /// \return next rule or nullptr
1941             const Context::Rule *next()
1942             {
1943                 // if in includedRules
1944                 if (m_includedRules) {
1945                     ++m_i2;
1946                     if (m_i2 != m_includedRules->size()) {
1947                         return (*m_includedRules)[m_i2];
1948                     }
1949                     ++m_i;
1950                     m_includedRules = nullptr;
1951                 }
1952 
1953                 // if is a includedRules
1954                 while (m_i < m_end && m_rules[m_i].rule->type == Context::Rule::Type::IncludeRules) {
1955                     if (!m_rules[m_i].includeRules && m_rules[m_i].rule->includedRules.size()) {
1956                         m_i2 = 0;
1957                         m_includedRules = &m_rules[m_i].rule->includedRules;
1958                         return (*m_includedRules)[m_i2];
1959                     }
1960                     ++m_i;
1961                 }
1962 
1963                 if (m_i < m_end) {
1964                     ++m_i;
1965                     return m_rules[m_i - 1].rule;
1966                 }
1967 
1968                 return nullptr;
1969             }
1970 
1971             /// \return current IncludeRules or nullptr
1972             const Context::Rule *currentIncludeRules() const
1973             {
1974                 return m_includedRules ? m_rules[m_i].rule : m_rules[m_i].includeRules;
1975             }
1976 
1977         private:
1978             int m_i = 0;
1979             int m_i2 = 0;
1980             const int m_end;
1981             const QList<ObservableRule> &m_rules;
1982             const QList<const Context::Rule *> *m_includedRules = nullptr;
1983         };
1984 
1985         // Dot regex container that satisfies firstNonSpace and column.
1986         struct DotRegex {
1987             /// Append a dot regex rule.
1988             void append(const Context::Rule &rule, const Context::Rule *includedRule)
1989             {
1990                 auto array = extractDotRegexes(rule);
1991                 if (array[0]) {
1992                     *array[0] = {&rule, includedRule};
1993                 }
1994                 if (array[1]) {
1995                     *array[1] = {&rule, includedRule};
1996                 }
1997             }
1998 
1999             /// Search dot regex which hides @p rule
2000             RuleAndInclude find(const Context::Rule &rule)
2001             {
2002                 auto array = extractDotRegexes(rule);
2003                 if (array[0]) {
2004                     return *array[0];
2005                 }
2006                 if (array[1]) {
2007                     return *array[1];
2008                 }
2009                 return RuleAndInclude{};
2010             }
2011 
2012         private:
2013             using Array = std::array<RuleAndInclude *, 2>;
2014 
2015             Array extractDotRegexes(const Context::Rule &rule)
2016             {
2017                 Array ret{};
2018 
2019                 if (rule.firstNonSpace != XmlBool::True && rule.column == -1) {
2020                     ret[0] = &dotRegex;
2021                 } else {
2022                     if (rule.firstNonSpace == XmlBool::True) {
2023                         ret[0] = &dotRegexFirstNonSpace;
2024                     }
2025 
2026                     if (rule.column == 0) {
2027                         ret[1] = &dotRegexColumn0;
2028                     } else if (rule.column > 0) {
2029                         ret[1] = &dotRegexColumnGreaterThan0[rule.column];
2030                     }
2031                 }
2032 
2033                 return ret;
2034             }
2035 
2036             RuleAndInclude dotRegex{};
2037             RuleAndInclude dotRegexColumn0{};
2038             QMap<int, RuleAndInclude> dotRegexColumnGreaterThan0{};
2039             RuleAndInclude dotRegexFirstNonSpace{};
2040         };
2041 
2042         bool success = true;
2043 
2044         // characters of DetectChar/AnyChar
2045         Char4Tables detectChars;
2046         // characters of dynamic DetectChar
2047         Char4Tables dynamicDetectChars;
2048         // characters of LineContinue
2049         Char4Tables lineContinueChars;
2050 
2051         Rule4 intRule{};
2052         Rule4 floatRule{};
2053         Rule4 hlCCharRule{};
2054         Rule4 hlCOctRule{};
2055         Rule4 hlCHexRule{};
2056         Rule4 hlCStringCharRule{};
2057         Rule4 detectIdentifierRule{};
2058 
2059         // Contains includedRules and included includedRules
2060         QMap<Context const *, RuleAndInclude> includeContexts;
2061 
2062         DotRegex dotRegex;
2063 
2064         QList<ObservableRule> observedRules;
2065         observedRules.reserve(context.rules.size());
2066         for (const Context::Rule &rule : context.rules) {
2067             const Context::Rule *includeRule = nullptr;
2068             if (rule.type == Context::Rule::Type::IncludeRules) {
2069                 auto *context = rule.context.context;
2070                 if (context && context->isOnlyIncluded) {
2071                     includeRule = &rule;
2072                 }
2073             }
2074 
2075             observedRules.push_back({&rule, includeRule});
2076             if (includeRule) {
2077                 for (const Context::Rule *rule2 : rule.includedRules) {
2078                     observedRules.push_back({rule2, includeRule});
2079                 }
2080             }
2081         }
2082 
2083         for (auto &observedRule : observedRules) {
2084             const Context::Rule &rule = *observedRule.rule;
2085             bool isUnreachable = false;
2086             QList<RuleAndInclude> unreachableBy;
2087 
2088             // declare rule as unreachable if ruleAndInclude is not empty
2089             auto updateUnreachable1 = [&](RuleAndInclude ruleAndInclude) {
2090                 if (ruleAndInclude) {
2091                     isUnreachable = true;
2092                     unreachableBy.append(ruleAndInclude);
2093                 }
2094             };
2095 
2096             // declare rule as unreachable if ruleAndIncludes is not empty
2097             auto updateUnreachable2 = [&](const QList<RuleAndInclude> &ruleAndIncludes) {
2098                 if (!ruleAndIncludes.isEmpty()) {
2099                     isUnreachable = true;
2100                     unreachableBy.append(ruleAndIncludes);
2101                 }
2102             };
2103 
2104             // check if rule2.firstNonSpace/column is compatible with those of rule
2105             auto isCompatible = [&rule](Context::Rule const &rule2) {
2106                 return (rule2.firstNonSpace != XmlBool::True && rule2.column == -1) || (rule.column == rule2.column && rule.column != -1)
2107                     || (rule.firstNonSpace == rule2.firstNonSpace && rule.firstNonSpace == XmlBool::True);
2108             };
2109 
2110             updateUnreachable1(dotRegex.find(rule));
2111 
2112             switch (rule.type) {
2113             // checks if hidden by DetectChar/AnyChar
2114             // then add the characters to detectChars
2115             case Context::Rule::Type::AnyChar: {
2116                 auto tables = CharTableArray(detectChars, rule);
2117                 updateUnreachable2(tables.find(rule.string));
2118                 tables.removeNonSpecialWhenSpecial();
2119                 tables.append(rule.string, rule);
2120                 break;
2121             }
2122 
2123             // check if is hidden by DetectChar/AnyChar
2124             // then add the characters to detectChars or dynamicDetectChars
2125             case Context::Rule::Type::DetectChar: {
2126                 auto &chars4 = (rule.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars;
2127                 auto tables = CharTableArray(chars4, rule);
2128                 updateUnreachable1(tables.find(rule.char0));
2129                 tables.removeNonSpecialWhenSpecial();
2130                 tables.append(rule.char0, rule);
2131                 break;
2132             }
2133 
2134             // check if hidden by DetectChar/AnyChar
2135             // then add spaces characters to detectChars
2136             case Context::Rule::Type::DetectSpaces: {
2137                 auto tables = CharTableArray(detectChars, rule);
2138                 updateUnreachable2(tables.find(QStringLiteral(" \t")));
2139                 tables.removeNonSpecialWhenSpecial();
2140                 tables.append(QLatin1Char(' '), rule);
2141                 tables.append(QLatin1Char('\t'), rule);
2142                 break;
2143             }
2144 
2145             // check if hidden by DetectChar/AnyChar
2146             case Context::Rule::Type::HlCChar:
2147                 updateUnreachable1(CharTableArray(detectChars, rule).find(QLatin1Char('\'')));
2148                 updateUnreachable1(hlCCharRule.setRule(rule));
2149                 break;
2150 
2151             // check if hidden by DetectChar/AnyChar
2152             case Context::Rule::Type::HlCHex:
2153                 updateUnreachable1(CharTableArray(detectChars, rule).find(QLatin1Char('0')));
2154                 updateUnreachable1(hlCHexRule.setRule(rule));
2155                 break;
2156 
2157             // check if hidden by DetectChar/AnyChar
2158             case Context::Rule::Type::HlCOct:
2159                 updateUnreachable1(CharTableArray(detectChars, rule).find(QLatin1Char('0')));
2160                 updateUnreachable1(hlCOctRule.setRule(rule));
2161                 break;
2162 
2163             // check if hidden by DetectChar/AnyChar
2164             case Context::Rule::Type::HlCStringChar:
2165                 updateUnreachable1(CharTableArray(detectChars, rule).find(QLatin1Char('\\')));
2166                 updateUnreachable1(hlCStringCharRule.setRule(rule));
2167                 break;
2168 
2169             // check if hidden by DetectChar/AnyChar
2170             case Context::Rule::Type::Int:
2171                 updateUnreachable2(CharTableArray(detectChars, rule).find(QStringLiteral("0123456789")));
2172                 updateUnreachable1(intRule.setRule(rule));
2173                 break;
2174 
2175             // check if hidden by DetectChar/AnyChar
2176             case Context::Rule::Type::Float:
2177                 updateUnreachable2(CharTableArray(detectChars, rule).find(QStringLiteral("0123456789.")));
2178                 updateUnreachable1(floatRule.setRule(rule));
2179                 // check that Float is before Int
2180                 updateUnreachable1(Rule4(intRule).setRule(rule));
2181                 break;
2182 
2183             // check if hidden by another DetectIdentifier rule
2184             case Context::Rule::Type::DetectIdentifier:
2185                 updateUnreachable1(detectIdentifierRule.setRule(rule));
2186                 break;
2187 
2188             // check if hidden by DetectChar/AnyChar or another LineContinue
2189             case Context::Rule::Type::LineContinue: {
2190                 updateUnreachable1(CharTableArray(detectChars, rule).find(rule.char0));
2191 
2192                 auto tables = CharTableArray(lineContinueChars, rule);
2193                 updateUnreachable1(tables.find(rule.char0));
2194                 tables.removeNonSpecialWhenSpecial();
2195                 tables.append(rule.char0, rule);
2196                 break;
2197             }
2198 
2199             // check if hidden by DetectChar/AnyChar or another Detect2Chars/RangeDetect
2200             case Context::Rule::Type::Detect2Chars:
2201             case Context::Rule::Type::RangeDetect:
2202                 updateUnreachable1(CharTableArray(detectChars, rule).find(rule.char0));
2203                 if (!isUnreachable) {
2204                     RuleIterator ruleIterator(observedRules, observedRule);
2205                     while (const auto *rulePtr = ruleIterator.next()) {
2206                         if (isUnreachable) {
2207                             break;
2208                         }
2209                         const auto &rule2 = *rulePtr;
2210                         if (rule2.type == rule.type && isCompatible(rule2) && rule.char0 == rule2.char0 && rule.char1 == rule2.char1) {
2211                             updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2212                         }
2213                     }
2214                 }
2215                 break;
2216 
2217             case Context::Rule::Type::RegExpr: {
2218                 if (rule.isDotRegex) {
2219                     dotRegex.append(rule, nullptr);
2220                     break;
2221                 }
2222 
2223                 // check that `rule` does not have another RegExpr as a prefix
2224                 RuleIterator ruleIterator(observedRules, observedRule);
2225                 while (const auto *rulePtr = ruleIterator.next()) {
2226                     if (isUnreachable) {
2227                         break;
2228                     }
2229                     const auto &rule2 = *rulePtr;
2230                     if (rule2.type == Context::Rule::Type::RegExpr && isCompatible(rule2) && rule.insensitive == rule2.insensitive
2231                         && rule.dynamic == rule2.dynamic && rule.sanitizedString.startsWith(rule2.sanitizedString)) {
2232                         bool add = (rule.sanitizedString.startsWith(rule2.string) || rule.sanitizedString.size() < rule2.sanitizedString.size() + 2);
2233                         if (!add) {
2234                             // \s.* (sanitized = \s) is considered hiding \s*\S
2235                             // we check the quantifiers to see if this is the case
2236                             auto c1 = rule.sanitizedString[rule2.sanitizedString.size()].unicode();
2237                             auto c2 = rule.sanitizedString[rule2.sanitizedString.size() + 1].unicode();
2238                             auto c3 = rule2.sanitizedString.back().unicode();
2239                             if (c3 == '*' || c3 == '?' || c3 == '+') {
2240                                 add = true;
2241                             } else if (c1 == '*' || c1 == '?') {
2242                                 add = !((c2 == '?' || c2 == '+') || (rule.sanitizedString.size() >= rule2.sanitizedString.size() + 3));
2243                             } else {
2244                                 add = true;
2245                             }
2246                         }
2247                         if (add) {
2248                             updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2249                         }
2250                     }
2251                 }
2252 
2253                 Q_FALLTHROUGH();
2254             }
2255             // check if a rule does not have another rule as a prefix
2256             case Context::Rule::Type::WordDetect:
2257             case Context::Rule::Type::StringDetect: {
2258                 // check that dynamic `rule` does not have another dynamic StringDetect as a prefix
2259                 if (rule.type == Context::Rule::Type::StringDetect && rule.dynamic == XmlBool::True) {
2260                     RuleIterator ruleIterator(observedRules, observedRule);
2261                     while (const auto *rulePtr = ruleIterator.next()) {
2262                         if (isUnreachable) {
2263                             break;
2264                         }
2265 
2266                         const auto &rule2 = *rulePtr;
2267                         if (rule2.type != Context::Rule::Type::StringDetect || rule2.dynamic != XmlBool::True || !isCompatible(rule2)) {
2268                             continue;
2269                         }
2270 
2271                         const bool isSensitive = (rule2.insensitive == XmlBool::True);
2272                         const auto caseSensitivity = isSensitive ? Qt::CaseInsensitive : Qt::CaseSensitive;
2273                         if ((isSensitive || rule.insensitive != XmlBool::True) && rule.string.startsWith(rule2.string, caseSensitivity)) {
2274                             updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2275                         }
2276                     }
2277                 }
2278 
2279                 // string used for comparison and truncated from "dynamic" part
2280                 QStringView s = rule.string;
2281 
2282                 // truncate to '%' with dynamic rules
2283                 if (rule.dynamic == XmlBool::True) {
2284                     static const QRegularExpression dynamicPosition(QStringLiteral(R"(^(?:[^%]*|%(?![1-9]))*)"));
2285                     auto result = dynamicPosition.match(rule.string);
2286                     s = s.left(result.capturedLength());
2287                 }
2288 
2289                 QString sanitizedRegex;
2290                 // truncate to special character with RegExpr.
2291                 // If regexp contains '|', `s` becomes empty.
2292                 if (rule.type == Context::Rule::Type::RegExpr) {
2293                     static const QRegularExpression regularChars(QStringLiteral(R"(^(?:[^.?*+^$[{(\\|]+|\\[-.?*+^$[\]{}()\\|]+|\[[^^\\]\])+)"));
2294                     static const QRegularExpression sanitizeChars(QStringLiteral(R"(\\([-.?*+^$[\]{}()\\|])|\[([^^\\])\])"));
2295                     const qsizetype result = regularChars.match(rule.string).capturedLength();
2296                     const qsizetype pos = qMin(result, s.size());
2297                     if (rule.string.indexOf(QLatin1Char('|'), pos) < pos) {
2298                         sanitizedRegex = rule.string.left(qMin(result, s.size()));
2299                         sanitizedRegex.replace(sanitizeChars, QStringLiteral("\\1"));
2300                         s = sanitizedRegex;
2301                     } else {
2302                         s = QStringView();
2303                     }
2304                 }
2305 
2306                 // check if hidden by DetectChar/AnyChar
2307                 if (s.size() > 0) {
2308                     auto t = CharTableArray(detectChars, rule);
2309                     if (rule.insensitive != XmlBool::True) {
2310                         updateUnreachable1(t.find(s[0]));
2311                     } else {
2312                         QChar c2[]{s[0].toLower(), s[0].toUpper()};
2313                         updateUnreachable2(t.find(QStringView(c2, 2)));
2314                     }
2315                 }
2316 
2317                 // check if Detect2Chars, StringDetect, WordDetect is not a prefix of s
2318                 if (s.size() > 0 && !isUnreachable) {
2319                     // combination of uppercase and lowercase
2320                     RuleAndInclude detect2CharsInsensitives[]{{}, {}, {}, {}};
2321 
2322                     RuleIterator ruleIterator(observedRules, observedRule);
2323                     while (const auto *rulePtr = ruleIterator.next()) {
2324                         if (isUnreachable) {
2325                             break;
2326                         }
2327                         const auto &rule2 = *rulePtr;
2328                         const bool isSensitive = (rule2.insensitive == XmlBool::True);
2329                         const auto caseSensitivity = isSensitive ? Qt::CaseInsensitive : Qt::CaseSensitive;
2330 
2331                         switch (rule2.type) {
2332                         // check that it is not a detectChars prefix
2333                         case Context::Rule::Type::Detect2Chars:
2334                             if (isCompatible(rule2) && s.size() >= 2) {
2335                                 if (rule.insensitive != XmlBool::True) {
2336                                     if (rule2.char0 == s[0] && rule2.char1 == s[1]) {
2337                                         updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2338                                     }
2339                                 } else {
2340                                     // when the string is case insensitive,
2341                                     // all 4 upper/lower case combinations must be found
2342                                     auto set = [&](RuleAndInclude &x, QChar c1, QChar c2) {
2343                                         if (!x && rule2.char0 == c1 && rule2.char0 == c2) {
2344                                             x = {&rule2, ruleIterator.currentIncludeRules()};
2345                                         }
2346                                     };
2347                                     set(detect2CharsInsensitives[0], s[0].toLower(), s[1].toLower());
2348                                     set(detect2CharsInsensitives[1], s[0].toLower(), s[1].toUpper());
2349                                     set(detect2CharsInsensitives[2], s[0].toUpper(), s[1].toUpper());
2350                                     set(detect2CharsInsensitives[3], s[0].toUpper(), s[1].toLower());
2351 
2352                                     if (detect2CharsInsensitives[0] && detect2CharsInsensitives[1] && detect2CharsInsensitives[2]
2353                                         && detect2CharsInsensitives[3]) {
2354                                         isUnreachable = true;
2355                                         unreachableBy.append(detect2CharsInsensitives[0]);
2356                                         unreachableBy.append(detect2CharsInsensitives[1]);
2357                                         unreachableBy.append(detect2CharsInsensitives[2]);
2358                                         unreachableBy.append(detect2CharsInsensitives[3]);
2359                                     }
2360                                 }
2361                             }
2362                             break;
2363 
2364                         // check that it is not a StringDetect prefix
2365                         case Context::Rule::Type::StringDetect:
2366                             if (isCompatible(rule2) && rule2.dynamic != XmlBool::True && (isSensitive || rule.insensitive != XmlBool::True)
2367                                 && s.startsWith(rule2.string, caseSensitivity)) {
2368                                 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2369                             }
2370                             break;
2371 
2372                         // check if a WordDetect is hidden by another WordDetect
2373                         case Context::Rule::Type::WordDetect:
2374                             if (rule.type == Context::Rule::Type::WordDetect && isCompatible(rule2) && (isSensitive || rule.insensitive != XmlBool::True)
2375                                 && 0 == rule.string.compare(rule2.string, caseSensitivity)) {
2376                                 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2377                             }
2378                             break;
2379 
2380                         default:;
2381                         }
2382                     }
2383                 }
2384 
2385                 break;
2386             }
2387 
2388             // check if hidden by another keyword rule
2389             case Context::Rule::Type::keyword: {
2390                 RuleIterator ruleIterator(observedRules, observedRule);
2391                 while (const auto *rulePtr = ruleIterator.next()) {
2392                     if (isUnreachable) {
2393                         break;
2394                     }
2395                     const auto &rule2 = *rulePtr;
2396                     if (rule2.type == Context::Rule::Type::keyword && isCompatible(rule2) && rule.string == rule2.string) {
2397                         updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()});
2398                     }
2399                 }
2400                 // TODO check that all keywords are hidden by another rules
2401                 break;
2402             }
2403 
2404             // add characters in those used but without checking if they are already.
2405             //  <DetectChar char="}" />
2406             //  <includedRules .../> <- reference an another <DetectChar char="}" /> who will not be checked
2407             //  <includedRules .../> <- reference a <DetectChar char="{" /> who will be added
2408             //  <DetectChar char="{" /> <- hidden by previous rule
2409             case Context::Rule::Type::IncludeRules:
2410                 if (observedRule.includeRules && !observedRule.hasResolvedIncludeRules()) {
2411                     break;
2412                 }
2413 
2414                 if (auto &ruleAndInclude = includeContexts[rule.context.context]) {
2415                     updateUnreachable1(ruleAndInclude);
2416                 } else {
2417                     ruleAndInclude.rule = &rule;
2418                 }
2419 
2420                 for (const auto *rulePtr : rule.includedIncludeRules) {
2421                     includeContexts.insert(rulePtr->context.context, RuleAndInclude{rulePtr, &rule});
2422                 }
2423 
2424                 if (observedRule.includeRules) {
2425                     break;
2426                 }
2427 
2428                 for (const auto *rulePtr : rule.includedRules) {
2429                     const auto &rule2 = *rulePtr;
2430                     switch (rule2.type) {
2431                     case Context::Rule::Type::AnyChar: {
2432                         auto tables = CharTableArray(detectChars, rule2);
2433                         tables.removeNonSpecialWhenSpecial();
2434                         tables.append(rule2.string, rule2, &rule);
2435                         break;
2436                     }
2437 
2438                     case Context::Rule::Type::DetectChar: {
2439                         auto &chars4 = (rule.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars;
2440                         auto tables = CharTableArray(chars4, rule2);
2441                         tables.removeNonSpecialWhenSpecial();
2442                         tables.append(rule2.char0, rule2, &rule);
2443                         break;
2444                     }
2445 
2446                     case Context::Rule::Type::DetectSpaces: {
2447                         auto tables = CharTableArray(detectChars, rule2);
2448                         tables.removeNonSpecialWhenSpecial();
2449                         tables.append(QLatin1Char(' '), rule2, &rule);
2450                         tables.append(QLatin1Char('\t'), rule2, &rule);
2451                         break;
2452                     }
2453 
2454                     case Context::Rule::Type::HlCChar:
2455                         hlCCharRule.setRule(rule2, &rule);
2456                         break;
2457 
2458                     case Context::Rule::Type::HlCHex:
2459                         hlCHexRule.setRule(rule2, &rule);
2460                         break;
2461 
2462                     case Context::Rule::Type::HlCOct:
2463                         hlCOctRule.setRule(rule2, &rule);
2464                         break;
2465 
2466                     case Context::Rule::Type::HlCStringChar:
2467                         hlCStringCharRule.setRule(rule2, &rule);
2468                         break;
2469 
2470                     case Context::Rule::Type::Int:
2471                         intRule.setRule(rule2, &rule);
2472                         break;
2473 
2474                     case Context::Rule::Type::Float:
2475                         floatRule.setRule(rule2, &rule);
2476                         break;
2477 
2478                     case Context::Rule::Type::LineContinue: {
2479                         auto tables = CharTableArray(lineContinueChars, rule2);
2480                         tables.removeNonSpecialWhenSpecial();
2481                         tables.append(rule2.char0, rule2, &rule);
2482                         break;
2483                     }
2484 
2485                     case Context::Rule::Type::RegExpr:
2486                         if (rule2.isDotRegex) {
2487                             dotRegex.append(rule2, &rule);
2488                         }
2489                         break;
2490 
2491                     case Context::Rule::Type::WordDetect:
2492                     case Context::Rule::Type::StringDetect:
2493                     case Context::Rule::Type::Detect2Chars:
2494                     case Context::Rule::Type::IncludeRules:
2495                     case Context::Rule::Type::DetectIdentifier:
2496                     case Context::Rule::Type::keyword:
2497                     case Context::Rule::Type::Unknown:
2498                     case Context::Rule::Type::RangeDetect:
2499                         break;
2500                     }
2501                 }
2502                 break;
2503 
2504             case Context::Rule::Type::Unknown:
2505                 break;
2506             }
2507 
2508             if (observedRule.includeRules && !observedRule.hasResolvedIncludeRules()) {
2509                 auto &unreachableIncludedRule = unreachableIncludedRules[&rule];
2510                 if (isUnreachable && unreachableIncludedRule.alwaysUnreachable) {
2511                     unreachableIncludedRule.unreachableBy.append(unreachableBy);
2512                 } else {
2513                     unreachableIncludedRule.alwaysUnreachable = false;
2514                 }
2515             } else if (isUnreachable) {
2516                 success = false;
2517                 QString message;
2518                 message.reserve(128);
2519                 for (auto &ruleAndInclude : unreachableBy) {
2520                     message += QStringLiteral("line ");
2521                     if (ruleAndInclude.includeRules) {
2522                         message += QString::number(ruleAndInclude.includeRules->line);
2523                         message += QStringLiteral(" [by '");
2524                         message += ruleAndInclude.includeRules->context.name;
2525                         message += QStringLiteral("' line ");
2526                         message += QString::number(ruleAndInclude.rule->line);
2527                         if (ruleAndInclude.includeRules->filename != ruleAndInclude.rule->filename) {
2528                             message += QStringLiteral(" (");
2529                             message += ruleAndInclude.rule->filename;
2530                             message += QLatin1Char(')');
2531                         }
2532                         message += QLatin1Char(']');
2533                     } else {
2534                         message += QString::number(ruleAndInclude.rule->line);
2535                     }
2536                     message += QStringLiteral(", ");
2537                 }
2538                 message.chop(2);
2539                 qWarning() << filename << "line" << rule.line << "unreachable rule by" << message;
2540             }
2541         }
2542 
2543         return success;
2544     }
2545 
2546     //! Proposes to merge certain rule sequences
2547     //! - several DetectChar/AnyChar into AnyChar
2548     //! - several RegExpr into one RegExpr
2549     bool suggestRuleMerger(const QString &filename, const Context &context) const
2550     {
2551         bool success = true;
2552 
2553         if (context.rules.isEmpty()) {
2554             return success;
2555         }
2556 
2557         auto it = context.rules.begin();
2558         const auto end = context.rules.end() - 1;
2559 
2560         for (; it < end; ++it) {
2561             auto &rule1 = *it;
2562             auto &rule2 = it[1];
2563 
2564             auto isCommonCompatible = [&] {
2565                 if (rule1.lookAhead != rule2.lookAhead) {
2566                     return false;
2567                 }
2568                 // ignore attribute when lookAhead is true
2569                 if (rule1.lookAhead != XmlBool::True && rule1.attribute != rule2.attribute) {
2570                     return false;
2571                 }
2572                 // clang-format off
2573                 return rule1.beginRegion == rule2.beginRegion
2574                     && rule1.endRegion == rule2.endRegion
2575                     && rule1.firstNonSpace == rule2.firstNonSpace
2576                     && rule1.context.context == rule2.context.context
2577                     && rule1.context.popCount == rule2.context.popCount;
2578                 // clang-format on
2579             };
2580 
2581             switch (rule1.type) {
2582             // request to merge AnyChar/DetectChar
2583             case Context::Rule::Type::AnyChar:
2584             case Context::Rule::Type::DetectChar:
2585                 if ((rule2.type == Context::Rule::Type::AnyChar || rule2.type == Context::Rule::Type::DetectChar) && isCommonCompatible()
2586                     && rule1.column == rule2.column) {
2587                     qWarning() << filename << "line" << rule2.line << "can be merged as AnyChar with the previous rule";
2588                     success = false;
2589                 }
2590                 break;
2591 
2592             // request to merge multiple RegExpr
2593             case Context::Rule::Type::RegExpr:
2594                 if (rule2.type == Context::Rule::Type::RegExpr && isCommonCompatible() && rule1.dynamic == rule2.dynamic
2595                     && (rule1.column == rule2.column || (rule1.column <= 0 && rule2.column <= 0))) {
2596                     qWarning() << filename << "line" << rule2.line << "can be merged with the previous rule";
2597                     success = false;
2598                 }
2599                 break;
2600 
2601             case Context::Rule::Type::DetectSpaces:
2602             case Context::Rule::Type::HlCChar:
2603             case Context::Rule::Type::HlCHex:
2604             case Context::Rule::Type::HlCOct:
2605             case Context::Rule::Type::HlCStringChar:
2606             case Context::Rule::Type::Int:
2607             case Context::Rule::Type::Float:
2608             case Context::Rule::Type::LineContinue:
2609             case Context::Rule::Type::WordDetect:
2610             case Context::Rule::Type::StringDetect:
2611             case Context::Rule::Type::Detect2Chars:
2612             case Context::Rule::Type::IncludeRules:
2613             case Context::Rule::Type::DetectIdentifier:
2614             case Context::Rule::Type::keyword:
2615             case Context::Rule::Type::Unknown:
2616             case Context::Rule::Type::RangeDetect:
2617                 break;
2618             }
2619         }
2620 
2621         return success;
2622     }
2623 
2624     //! Initialize the referenced context (ContextName::context)
2625     //! Some input / output examples are:
2626     //! - "#stay"         -> ""
2627     //! - "#pop"          -> ""
2628     //! - "Comment"       -> "Comment"
2629     //! - "#pop!Comment"  -> "Comment"
2630     //! - "##ISO C++"     -> ""
2631     //! - "Comment##ISO C++"-> "Comment" in ISO C++
2632     void resolveContextName(Definition &definition, Context &context, ContextName &contextName, int line)
2633     {
2634         QStringView name = contextName.name;
2635         if (name.isEmpty()) {
2636             contextName.stay = true;
2637         } else if (name.startsWith(QStringLiteral("#stay"))) {
2638             name = name.mid(5);
2639             contextName.stay = true;
2640             contextName.context = &context;
2641             if (!name.isEmpty()) {
2642                 qWarning() << definition.filename << "line" << line << "invalid context in" << context.name;
2643                 m_success = false;
2644             }
2645         } else {
2646             while (name.startsWith(QStringLiteral("#pop"))) {
2647                 name = name.mid(4);
2648                 ++contextName.popCount;
2649             }
2650 
2651             if (contextName.popCount && !name.isEmpty()) {
2652                 if (name.startsWith(QLatin1Char('!')) && name.size() > 1) {
2653                     name = name.mid(1);
2654                 } else {
2655                     qWarning() << definition.filename << "line" << line << "'!' missing between '#pop' and context name" << context.name;
2656                     m_success = false;
2657                 }
2658             }
2659 
2660             if (!name.isEmpty()) {
2661                 const int idx = name.indexOf(QStringLiteral("##"));
2662                 if (idx == -1) {
2663                     auto it = definition.contexts.find(name.toString());
2664                     if (it != definition.contexts.end()) {
2665                         contextName.context = &*it;
2666                     }
2667                 } else {
2668                     auto defName = name.mid(idx + 2);
2669                     auto it = m_definitions.find(defName.toString());
2670                     if (it != m_definitions.end()) {
2671                         auto listName = name.left(idx).toString();
2672                         definition.referencedDefinitions.insert(&*it);
2673                         auto ctxIt = it->contexts.find(listName.isEmpty() ? it->firstContextName : listName);
2674                         if (ctxIt != it->contexts.end()) {
2675                             contextName.context = &*ctxIt;
2676                         }
2677                     } else {
2678                         qWarning() << definition.filename << "line" << line << "unknown definition in" << context.name;
2679                         m_success = false;
2680                     }
2681                 }
2682 
2683                 if (!contextName.context) {
2684                     qWarning() << definition.filename << "line" << line << "unknown context" << name << "in" << context.name;
2685                     m_success = false;
2686                 }
2687             }
2688         }
2689     }
2690 
2691     QMap<QString, Definition> m_definitions;
2692     Definition *m_currentDefinition = nullptr;
2693     Keywords *m_currentKeywords = nullptr;
2694     Context *m_currentContext = nullptr;
2695     bool m_success = true;
2696 };
2697 
2698 namespace
2699 {
2700 QStringList readListing(const QString &fileName)
2701 {
2702     QFile file(fileName);
2703     if (!file.open(QIODevice::ReadOnly)) {
2704         return QStringList();
2705     }
2706 
2707     QXmlStreamReader xml(&file);
2708     QStringList listing;
2709     while (!xml.atEnd()) {
2710         xml.readNext();
2711 
2712         // add only .xml files, no .json or stuff
2713         if (xml.isCharacters() && xml.text().contains(QLatin1String(".xml"))) {
2714             listing.append(xml.text().toString());
2715         }
2716     }
2717 
2718     if (xml.hasError()) {
2719         qWarning() << "XML error while reading" << fileName << " - " << qPrintable(xml.errorString()) << "@ offset" << xml.characterOffset();
2720         listing.clear();
2721     }
2722 
2723     return listing;
2724 }
2725 
2726 /**
2727  * check if the "extensions" attribute have valid wildcards
2728  * @param extensions extensions string to check
2729  * @return valid?
2730  */
2731 bool checkExtensions(QStringView extensions)
2732 {
2733     // get list of extensions
2734     const QList<QStringView> extensionParts = extensions.split(QLatin1Char(';'), Qt::SkipEmptyParts);
2735 
2736     // ok if empty
2737     if (extensionParts.isEmpty()) {
2738         return true;
2739     }
2740 
2741     // check that only valid wildcard things are inside the parts
2742     for (const auto &extension : extensionParts) {
2743         for (const auto c : extension) {
2744             // eat normal things
2745             if (c.isDigit() || c.isLetter()) {
2746                 continue;
2747             }
2748 
2749             // allow some special characters
2750             if (c == QLatin1Char('.') || c == QLatin1Char('-') || c == QLatin1Char('_') || c == QLatin1Char('+')) {
2751                 continue;
2752             }
2753 
2754             // only allowed wildcard things: '?' and '*'
2755             if (c == QLatin1Char('?') || c == QLatin1Char('*')) {
2756                 continue;
2757             }
2758 
2759             qWarning() << "invalid character" << c << "seen in extensions wildcard";
2760             return false;
2761         }
2762     }
2763 
2764     // all checks passed
2765     return true;
2766 }
2767 
2768 }
2769 
2770 int main(int argc, char *argv[])
2771 {
2772     // get app instance
2773     QCoreApplication app(argc, argv);
2774 
2775     // ensure enough arguments are passed
2776     if (app.arguments().size() < 3) {
2777         return 1;
2778     }
2779 
2780 #ifdef HAS_XERCESC
2781     // care for proper init and cleanup
2782     XMLPlatformUtils::Initialize();
2783     auto cleanup = qScopeGuard(XMLPlatformUtils::Terminate);
2784 
2785     /*
2786      * parse XSD first time and cache it
2787      */
2788     XMLGrammarPoolImpl xsd(XMLPlatformUtils::fgMemoryManager);
2789 
2790     // create parser for the XSD
2791     SAX2XMLReaderImpl parser(XMLPlatformUtils::fgMemoryManager, &xsd);
2792     init_parser(parser);
2793     QString messages;
2794     CustomErrorHandler eh(&messages);
2795     parser.setErrorHandler(&eh);
2796 
2797     // load grammar into the pool, on error just abort
2798     const auto xsdFile = app.arguments().at(2);
2799     if (!parser.loadGrammar((const char16_t *)xsdFile.utf16(), Grammar::SchemaGrammarType, true) || eh.failed()) {
2800         qWarning("Failed to parse XSD %s: %s", qPrintable(xsdFile), qPrintable(messages));
2801         return 2;
2802     }
2803 
2804     // lock the pool, no later modifications wanted!
2805     xsd.lockPool();
2806 #endif
2807 
2808     const QString hlFilenamesListing = app.arguments().value(3);
2809     if (hlFilenamesListing.isEmpty()) {
2810         return 1;
2811     }
2812 
2813     QStringList hlFilenames = readListing(hlFilenamesListing);
2814     if (hlFilenames.isEmpty()) {
2815         qWarning("Failed to read %s", qPrintable(hlFilenamesListing));
2816         return 3;
2817     }
2818 
2819     // text attributes
2820     const QStringList textAttributes = QStringList() << QStringLiteral("name") << QStringLiteral("section") << QStringLiteral("mimetype")
2821                                                      << QStringLiteral("extensions") << QStringLiteral("style") << QStringLiteral("author")
2822                                                      << QStringLiteral("license") << QStringLiteral("indenter");
2823 
2824     // index all given highlightings
2825     HlFilesChecker filesChecker;
2826     QVariantMap hls;
2827     int anyError = 0;
2828     for (const QString &hlFilename : std::as_const(hlFilenames)) {
2829         QFile hlFile(hlFilename);
2830         if (!hlFile.open(QIODevice::ReadOnly)) {
2831             qWarning("Failed to open %s", qPrintable(hlFilename));
2832             anyError = 3;
2833             continue;
2834         }
2835 
2836 #ifdef HAS_XERCESC
2837         // create parser
2838         SAX2XMLReaderImpl parser(XMLPlatformUtils::fgMemoryManager, &xsd);
2839         init_parser(parser);
2840         QString messages;
2841         CustomErrorHandler eh(&messages);
2842         parser.setErrorHandler(&eh);
2843 
2844         // parse the XML file
2845         parser.parse((const char16_t *)hlFile.fileName().utf16());
2846 
2847         // report issues
2848         if (eh.failed()) {
2849             qWarning("Failed to validate XML %s: %s", qPrintable(hlFile.fileName()), qPrintable(messages));
2850             anyError = 4;
2851             continue;
2852         }
2853 #endif
2854 
2855         // read the needed attributes from toplevel language tag
2856         hlFile.reset();
2857         QXmlStreamReader xml(&hlFile);
2858         if (xml.readNextStartElement()) {
2859             if (xml.name() != QLatin1String("language")) {
2860                 anyError = 5;
2861                 continue;
2862             }
2863         } else {
2864             anyError = 6;
2865             continue;
2866         }
2867 
2868         // map to store hl info
2869         QVariantMap hl;
2870 
2871         // transfer text attributes
2872         for (const QString &attribute : std::as_const(textAttributes)) {
2873             hl[attribute] = xml.attributes().value(attribute).toString();
2874         }
2875 
2876         // check if extensions have the right format
2877         if (!checkExtensions(hl[QStringLiteral("extensions")].toString())) {
2878             qWarning() << hlFilename << "'extensions' wildcards invalid:" << hl[QStringLiteral("extensions")].toString();
2879             anyError = 23;
2880         }
2881 
2882         // numerical attributes
2883         hl[QStringLiteral("version")] = xml.attributes().value(QLatin1String("version")).toInt();
2884         hl[QStringLiteral("priority")] = xml.attributes().value(QLatin1String("priority")).toInt();
2885 
2886         // add boolean one
2887         hl[QStringLiteral("hidden")] = attrToBool(xml.attributes().value(QLatin1String("hidden")));
2888 
2889         // keep some strings as UTF-8 for faster translations
2890         hl[QStringLiteral("nameUtf8")] = hl[QStringLiteral("name")].toString().toUtf8();
2891         hl[QStringLiteral("sectionUtf8")] = hl[QStringLiteral("section")].toString().toUtf8();
2892 
2893         // remember hl
2894         hls[QFileInfo(hlFile).fileName()] = hl;
2895 
2896         const QString hlName = hl[QStringLiteral("name")].toString();
2897 
2898         filesChecker.setDefinition(xml.attributes().value(QStringLiteral("kateversion")), hlFilename, hlName);
2899 
2900         // scan for broken regex or keywords with spaces
2901         while (!xml.atEnd()) {
2902             xml.readNext();
2903             filesChecker.processElement(xml);
2904         }
2905 
2906         if (xml.hasError()) {
2907             anyError = 33;
2908             qWarning() << hlFilename << "-" << xml.errorString() << "@ offset" << xml.characterOffset();
2909         }
2910     }
2911 
2912     filesChecker.resolveContexts();
2913 
2914     if (!filesChecker.check()) {
2915         anyError = 7;
2916     }
2917 
2918     // bail out if any problem was seen
2919     if (anyError) {
2920         return anyError;
2921     }
2922 
2923     // create outfile, after all has worked!
2924     QFile outFile(app.arguments().at(1));
2925     if (!outFile.open(QIODevice::WriteOnly | QIODevice::Truncate)) {
2926         return 9;
2927     }
2928 
2929     // write out json
2930     outFile.write(QCborValue::fromVariant(QVariant(hls)).toCbor());
2931 
2932     // be done
2933     return 0;
2934 }