File indexing completed on 2024-05-12 04:02:16
0001 /* 0002 SPDX-FileCopyrightText: 2014 Christoph Cullmann <cullmann@kde.org> 0003 SPDX-FileCopyrightText: 2020 Jonathan Poelen <jonathan.poelen@gmail.com> 0004 0005 SPDX-License-Identifier: MIT 0006 */ 0007 0008 #include <QCborValue> 0009 #include <QCoreApplication> 0010 #include <QDebug> 0011 #include <QFile> 0012 #include <QFileInfo> 0013 #include <QMutableMapIterator> 0014 #include <QRegularExpression> 0015 #include <QScopeGuard> 0016 #include <QVariant> 0017 #include <QXmlStreamReader> 0018 0019 #ifdef HAS_XERCESC 0020 0021 #include <xercesc/framework/XMLGrammarPoolImpl.hpp> 0022 0023 #include <xercesc/parsers/SAX2XMLReaderImpl.hpp> 0024 0025 #include <xercesc/sax/ErrorHandler.hpp> 0026 #include <xercesc/sax/SAXParseException.hpp> 0027 0028 #include <xercesc/util/PlatformUtils.hpp> 0029 #include <xercesc/util/XMLString.hpp> 0030 #include <xercesc/util/XMLUni.hpp> 0031 0032 #include <xercesc/framework/XMLGrammarPoolImpl.hpp> 0033 #include <xercesc/validators/common/Grammar.hpp> 0034 0035 using namespace xercesc; 0036 0037 /* 0038 * Ideas taken from: 0039 * 0040 * author : Boris Kolpackov <boris@codesynthesis.com> 0041 * copyright : not copyrighted - public domain 0042 * 0043 * This program uses Xerces-C++ SAX2 parser to load a set of schema files 0044 * and then to validate a set of XML documents against these schemas. To 0045 * build this program you will need Xerces-C++ 3.0.0 or later. For more 0046 * information, see: 0047 * 0048 * http://www.codesynthesis.com/~boris/blog/2010/03/15/validating-external-schemas-xerces-cxx/ 0049 */ 0050 0051 /** 0052 * Error handler object used during xml schema validation. 0053 */ 0054 class CustomErrorHandler : public ErrorHandler 0055 { 0056 public: 0057 /** 0058 * Constructor 0059 * @param messages Pointer to the error message string to fill. 0060 */ 0061 CustomErrorHandler(QString *messages) 0062 : m_messages(messages) 0063 { 0064 } 0065 0066 /** 0067 * Check global success/fail state. 0068 * @return True if there was a failure, false otherwise. 0069 */ 0070 bool failed() const 0071 { 0072 return m_failed; 0073 } 0074 0075 private: 0076 /** 0077 * Severity classes for error messages. 0078 */ 0079 enum severity { s_warning, s_error, s_fatal }; 0080 0081 /** 0082 * Wrapper for warning exceptions. 0083 * @param e Exception to handle. 0084 */ 0085 void warning(const SAXParseException &e) override 0086 { 0087 m_failed = true; // be strict, warnings are evil, too! 0088 handle(e, s_warning); 0089 } 0090 0091 /** 0092 * Wrapper for error exceptions. 0093 * @param e Exception to handle. 0094 */ 0095 void error(const SAXParseException &e) override 0096 { 0097 m_failed = true; 0098 handle(e, s_error); 0099 } 0100 0101 /** 0102 * Wrapper for fatal error exceptions. 0103 * @param e Exception to handle. 0104 */ 0105 void fatalError(const SAXParseException &e) override 0106 { 0107 m_failed = true; 0108 handle(e, s_fatal); 0109 } 0110 0111 /** 0112 * Reset the error status to "no error". 0113 */ 0114 void resetErrors() override 0115 { 0116 m_failed = false; 0117 } 0118 0119 /** 0120 * Generic handler for error/warning/fatal error message exceptions. 0121 * @param e Exception to handle. 0122 * @param s Enum value encoding the message severtity. 0123 */ 0124 void handle(const SAXParseException &e, severity s) 0125 { 0126 // get id to print 0127 const XMLCh *xid(e.getPublicId()); 0128 if (!xid) 0129 xid = e.getSystemId(); 0130 0131 m_messages << QString::fromUtf16(xid) << ":" << e.getLineNumber() << ":" << e.getColumnNumber() << " " << (s == s_warning ? "warning: " : "error: ") 0132 << QString::fromUtf16(e.getMessage()) << Qt::endl; 0133 } 0134 0135 private: 0136 /** 0137 * Storage for created error messages in this handler. 0138 */ 0139 QTextStream m_messages; 0140 0141 /** 0142 * Global error state. True if there was an error, false otherwise. 0143 */ 0144 bool m_failed = false; 0145 }; 0146 0147 void init_parser(SAX2XMLReaderImpl &parser) 0148 { 0149 // Commonly useful configuration. 0150 // 0151 parser.setFeature(XMLUni::fgSAX2CoreNameSpaces, true); 0152 parser.setFeature(XMLUni::fgSAX2CoreNameSpacePrefixes, true); 0153 parser.setFeature(XMLUni::fgSAX2CoreValidation, true); 0154 0155 // Enable validation. 0156 // 0157 parser.setFeature(XMLUni::fgXercesSchema, true); 0158 parser.setFeature(XMLUni::fgXercesSchemaFullChecking, true); 0159 parser.setFeature(XMLUni::fgXercesValidationErrorAsFatal, true); 0160 0161 // Use the loaded grammar during parsing. 0162 // 0163 parser.setFeature(XMLUni::fgXercesUseCachedGrammarInParse, true); 0164 0165 // Don't load schemas from any other source (e.g., from XML document's 0166 // xsi:schemaLocation attributes). 0167 // 0168 parser.setFeature(XMLUni::fgXercesLoadSchema, false); 0169 0170 // Xerces-C++ 3.1.0 is the first version with working multi import 0171 // support. 0172 // 0173 parser.setFeature(XMLUni::fgXercesHandleMultipleImports, true); 0174 } 0175 0176 #endif 0177 0178 #include "../lib/worddelimiters_p.h" 0179 #include "../lib/xml_p.h" 0180 0181 #include <array> 0182 0183 using KSyntaxHighlighting::WordDelimiters; 0184 using KSyntaxHighlighting::Xml::attrToBool; 0185 0186 class HlFilesChecker 0187 { 0188 public: 0189 template<typename T> 0190 void setDefinition(const T &verStr, const QString &filename, const QString &name) 0191 { 0192 m_currentDefinition = &*m_definitions.insert(name, Definition{}); 0193 m_currentDefinition->languageName = name; 0194 m_currentDefinition->filename = filename; 0195 m_currentDefinition->kateVersionStr = verStr.toString(); 0196 m_currentKeywords = nullptr; 0197 m_currentContext = nullptr; 0198 0199 const auto idx = verStr.indexOf(QLatin1Char('.')); 0200 if (idx <= 0) { 0201 qWarning() << filename << "invalid kateversion" << verStr; 0202 m_success = false; 0203 } else { 0204 m_currentDefinition->kateVersion = {verStr.left(idx).toInt(), verStr.mid(idx + 1).toInt()}; 0205 } 0206 } 0207 0208 void processElement(QXmlStreamReader &xml) 0209 { 0210 if (xml.isStartElement()) { 0211 if (m_currentContext) { 0212 m_currentContext->rules.push_back(Context::Rule{}); 0213 auto &rule = m_currentContext->rules.back(); 0214 m_success = rule.parseElement(m_currentDefinition->filename, xml) && m_success; 0215 m_currentContext->hasDynamicRule = m_currentContext->hasDynamicRule || rule.dynamic == XmlBool::True; 0216 } else if (m_currentKeywords) { 0217 m_success = m_currentKeywords->items.parseElement(m_currentDefinition->filename, xml) && m_success; 0218 } else if (xml.name() == QStringLiteral("context")) { 0219 processContextElement(xml); 0220 } else if (xml.name() == QStringLiteral("list")) { 0221 processListElement(xml); 0222 } else if (xml.name() == QStringLiteral("keywords")) { 0223 m_success = m_currentDefinition->parseKeywords(xml) && m_success; 0224 } else if (xml.name() == QStringLiteral("emptyLine")) { 0225 m_success = parseEmptyLine(m_currentDefinition->filename, xml) && m_success; 0226 } else if (xml.name() == QStringLiteral("itemData")) { 0227 m_success = m_currentDefinition->itemDatas.parseElement(m_currentDefinition->filename, xml) && m_success; 0228 } 0229 } else if (xml.isEndElement()) { 0230 if (m_currentContext && xml.name() == QStringLiteral("context")) { 0231 m_currentContext = nullptr; 0232 } else if (m_currentKeywords && xml.name() == QStringLiteral("list")) { 0233 m_currentKeywords = nullptr; 0234 } 0235 } 0236 } 0237 0238 //! Resolve context attribute and include tag 0239 void resolveContexts() 0240 { 0241 QMutableMapIterator<QString, Definition> def(m_definitions); 0242 while (def.hasNext()) { 0243 def.next(); 0244 auto &definition = def.value(); 0245 auto &contexts = definition.contexts; 0246 0247 if (contexts.isEmpty()) { 0248 qWarning() << definition.filename << "has no context"; 0249 m_success = false; 0250 continue; 0251 } 0252 0253 auto markAsUsedContext = [](ContextName &contextName) { 0254 if (!contextName.stay && contextName.context) { 0255 contextName.context->isOnlyIncluded = false; 0256 } 0257 }; 0258 0259 QMutableMapIterator<QString, Context> contextIt(contexts); 0260 while (contextIt.hasNext()) { 0261 contextIt.next(); 0262 auto &context = contextIt.value(); 0263 resolveContextName(definition, context, context.lineEndContext, context.line); 0264 resolveContextName(definition, context, context.lineEmptyContext, context.line); 0265 resolveContextName(definition, context, context.fallthroughContext, context.line); 0266 markAsUsedContext(context.lineEndContext); 0267 markAsUsedContext(context.lineEmptyContext); 0268 markAsUsedContext(context.fallthroughContext); 0269 for (auto &rule : context.rules) { 0270 rule.parentContext = &context; 0271 resolveContextName(definition, context, rule.context, rule.line); 0272 if (rule.type != Context::Rule::Type::IncludeRules) { 0273 markAsUsedContext(rule.context); 0274 } else if (rule.includeAttrib == XmlBool::True && rule.context.context) { 0275 rule.context.context->referencedWithIncludeAttrib = true; 0276 } 0277 } 0278 } 0279 0280 auto *firstContext = &*definition.contexts.find(definition.firstContextName); 0281 firstContext->isOnlyIncluded = false; 0282 definition.firstContext = firstContext; 0283 } 0284 0285 resolveIncludeRules(); 0286 } 0287 0288 bool check() const 0289 { 0290 bool success = m_success; 0291 0292 const auto usedContexts = extractUsedContexts(); 0293 0294 QMap<const Definition *, const Definition *> maxVersionByDefinitions; 0295 QMap<const Context::Rule *, IncludedRuleUnreachableBy> unreachableIncludedRules; 0296 0297 QMapIterator<QString, Definition> def(m_definitions); 0298 while (def.hasNext()) { 0299 def.next(); 0300 const auto &definition = def.value(); 0301 const auto &filename = definition.filename; 0302 0303 auto *maxDef = maxKateVersionDefinition(definition, maxVersionByDefinitions); 0304 if (maxDef != &definition) { 0305 qWarning() << definition.filename << "depends on a language" << maxDef->languageName << "in version" << maxDef->kateVersionStr 0306 << ". Please, increase kateversion."; 0307 success = false; 0308 } 0309 0310 QSet<ItemDatas::Style> usedAttributeNames; 0311 QSet<ItemDatas::Style> ignoredAttributeNames; 0312 success = checkKeywordsList(definition) && success; 0313 success = checkContexts(definition, usedAttributeNames, ignoredAttributeNames, usedContexts, unreachableIncludedRules) && success; 0314 0315 // search for non-existing itemDatas. 0316 const auto invalidNames = usedAttributeNames - definition.itemDatas.styleNames; 0317 for (const auto &styleName : invalidNames) { 0318 qWarning() << filename << "line" << styleName.line << "reference of non-existing itemData attributes:" << styleName.name; 0319 success = false; 0320 } 0321 0322 // search for existing itemDatas, but unusable. 0323 const auto ignoredNames = ignoredAttributeNames - usedAttributeNames; 0324 for (const auto &styleName : ignoredNames) { 0325 qWarning() << filename << "line" << styleName.line << "attribute" << styleName.name 0326 << "is never used. All uses are with lookAhead=true or <IncludeRules/>"; 0327 success = false; 0328 } 0329 0330 // search for unused itemDatas. 0331 auto unusedNames = definition.itemDatas.styleNames - usedAttributeNames; 0332 unusedNames -= ignoredNames; 0333 for (const auto &styleName : std::as_const(unusedNames)) { 0334 qWarning() << filename << "line" << styleName.line << "unused itemData:" << styleName.name; 0335 success = false; 0336 } 0337 } 0338 0339 QMutableMapIterator<const Context::Rule *, IncludedRuleUnreachableBy> unreachableIncludedRuleIt(unreachableIncludedRules); 0340 while (unreachableIncludedRuleIt.hasNext()) { 0341 unreachableIncludedRuleIt.next(); 0342 IncludedRuleUnreachableBy &unreachableRulesBy = unreachableIncludedRuleIt.value(); 0343 if (unreachableRulesBy.alwaysUnreachable) { 0344 auto *rule = unreachableIncludedRuleIt.key(); 0345 0346 if (!rule->parentContext->isOnlyIncluded) { 0347 continue; 0348 } 0349 0350 // remove duplicates rules 0351 QSet<const Context::Rule *> rules; 0352 auto &unreachableBy = unreachableRulesBy.unreachableBy; 0353 unreachableBy.erase(std::remove_if(unreachableBy.begin(), 0354 unreachableBy.end(), 0355 [&](const RuleAndInclude &ruleAndInclude) { 0356 if (rules.contains(ruleAndInclude.rule)) { 0357 return true; 0358 } 0359 rules.insert(ruleAndInclude.rule); 0360 return false; 0361 }), 0362 unreachableBy.end()); 0363 0364 QString message; 0365 message.reserve(128); 0366 for (auto &ruleAndInclude : std::as_const(unreachableBy)) { 0367 message += QStringLiteral("line "); 0368 message += QString::number(ruleAndInclude.rule->line); 0369 message += QStringLiteral(" ["); 0370 message += ruleAndInclude.rule->parentContext->name; 0371 if (rule->filename != ruleAndInclude.rule->filename) { 0372 message += QStringLiteral(" ("); 0373 message += ruleAndInclude.rule->filename; 0374 message += QLatin1Char(')'); 0375 } 0376 if (ruleAndInclude.includeRules) { 0377 message += QStringLiteral(" via line "); 0378 message += QString::number(ruleAndInclude.includeRules->line); 0379 } 0380 message += QStringLiteral("], "); 0381 } 0382 message.chop(2); 0383 0384 qWarning() << rule->filename << "line" << rule->line << "no IncludeRule can reach this rule, hidden by" << message; 0385 success = false; 0386 } 0387 } 0388 0389 return success; 0390 } 0391 0392 private: 0393 enum class XmlBool { 0394 Unspecified, 0395 False, 0396 True, 0397 }; 0398 0399 struct Context; 0400 0401 struct ContextName { 0402 QString name; 0403 int popCount = 0; 0404 bool stay = false; 0405 0406 Context *context = nullptr; 0407 }; 0408 0409 struct Parser { 0410 const QString &filename; 0411 QXmlStreamReader &xml; 0412 QXmlStreamAttribute &attr; 0413 bool success; 0414 0415 //! Read a string type attribute, \c success = \c false when \p str is not empty 0416 //! \return \c true when attr.name() == attrName, otherwise false 0417 bool extractString(QString &str, const QString &attrName) 0418 { 0419 if (attr.name() != attrName) { 0420 return false; 0421 } 0422 0423 str = attr.value().toString(); 0424 if (str.isEmpty()) { 0425 qWarning() << filename << "line" << xml.lineNumber() << attrName << "attribute is empty"; 0426 success = false; 0427 } 0428 0429 return true; 0430 } 0431 0432 //! Read a bool type attribute, \c success = \c false when \p xmlBool is not \c XmlBool::Unspecified. 0433 //! \return \c true when attr.name() == attrName, otherwise false 0434 bool extractXmlBool(XmlBool &xmlBool, const QString &attrName) 0435 { 0436 if (attr.name() != attrName) { 0437 return false; 0438 } 0439 0440 xmlBool = attr.value().isNull() ? XmlBool::Unspecified : attrToBool(attr.value()) ? XmlBool::True : XmlBool::False; 0441 0442 return true; 0443 } 0444 0445 //! Read a positive integer type attribute, \c success = \c false when \p positive is already greater than or equal to 0 0446 //! \return \c true when attr.name() == attrName, otherwise false 0447 bool extractPositive(int &positive, const QString &attrName) 0448 { 0449 if (attr.name() != attrName) { 0450 return false; 0451 } 0452 0453 bool ok = true; 0454 positive = attr.value().toInt(&ok); 0455 0456 if (!ok || positive < 0) { 0457 qWarning() << filename << "line" << xml.lineNumber() << attrName << "should be a positive integer:" << attr.value(); 0458 success = false; 0459 } 0460 0461 return true; 0462 } 0463 0464 //! Read a color, \c success = \c false when \p color is already greater than or equal to 0 0465 //! \return \c true when attr.name() == attrName, otherwise false 0466 bool checkColor(const QString &attrName) 0467 { 0468 if (attr.name() != attrName) { 0469 return false; 0470 } 0471 0472 const auto value = attr.value(); 0473 if (value.isEmpty() /*|| QColor(value).isValid()*/) { 0474 qWarning() << filename << "line" << xml.lineNumber() << attrName << "should be a color:" << value; 0475 success = false; 0476 } 0477 0478 return true; 0479 } 0480 0481 //! Read a QChar, \c success = \c false when \p c is not \c '\0' or does not have one char 0482 //! \return \c true when attr.name() == attrName, otherwise false 0483 bool extractChar(QChar &c, const QString &attrName) 0484 { 0485 if (attr.name() != attrName) { 0486 return false; 0487 } 0488 0489 if (attr.value().size() == 1) { 0490 c = attr.value()[0]; 0491 } else { 0492 c = QLatin1Char('_'); 0493 qWarning() << filename << "line" << xml.lineNumber() << attrName << "must contain exactly one char:" << attr.value(); 0494 success = false; 0495 } 0496 0497 return true; 0498 } 0499 0500 //! \return parsing status when \p isExtracted is \c true, otherwise \c false 0501 bool checkIfExtracted(bool isExtracted) 0502 { 0503 if (isExtracted) { 0504 return success; 0505 } 0506 0507 qWarning() << filename << "line" << xml.lineNumber() << "unknown attribute:" << attr.name(); 0508 return false; 0509 } 0510 }; 0511 0512 struct Keywords { 0513 struct Items { 0514 struct Item { 0515 QString content; 0516 int line; 0517 0518 friend size_t qHash(const Item &item, size_t seed = 0) 0519 { 0520 return qHash(item.content, seed); 0521 } 0522 0523 friend bool operator==(const Item &item0, const Item &item1) 0524 { 0525 return item0.content == item1.content; 0526 } 0527 }; 0528 0529 QList<Item> keywords; 0530 QSet<Item> includes; 0531 0532 bool parseElement(const QString &filename, QXmlStreamReader &xml) 0533 { 0534 bool success = true; 0535 0536 const int line = xml.lineNumber(); 0537 QString content = xml.readElementText(); 0538 0539 if (content.isEmpty()) { 0540 qWarning() << filename << "line" << line << "is empty:" << xml.name(); 0541 success = false; 0542 } 0543 0544 if (xml.name() == QStringLiteral("include")) { 0545 includes.insert({content, line}); 0546 } else if (xml.name() == QStringLiteral("item")) { 0547 keywords.append({content, line}); 0548 } else { 0549 qWarning() << filename << "line" << line << "invalid element:" << xml.name(); 0550 success = false; 0551 } 0552 0553 return success; 0554 } 0555 }; 0556 0557 QString name; 0558 Items items; 0559 int line; 0560 0561 bool parseElement(const QString &filename, QXmlStreamReader &xml) 0562 { 0563 line = xml.lineNumber(); 0564 0565 bool success = true; 0566 for (auto &attr : xml.attributes()) { 0567 Parser parser{filename, xml, attr, success}; 0568 0569 const bool isExtracted = parser.extractString(name, QStringLiteral("name")); 0570 0571 success = parser.checkIfExtracted(isExtracted); 0572 } 0573 return success; 0574 } 0575 }; 0576 0577 struct Context { 0578 struct Rule { 0579 enum class Type { 0580 Unknown, 0581 AnyChar, 0582 Detect2Chars, 0583 DetectChar, 0584 DetectIdentifier, 0585 DetectSpaces, 0586 Float, 0587 HlCChar, 0588 HlCHex, 0589 HlCOct, 0590 HlCStringChar, 0591 IncludeRules, 0592 Int, 0593 LineContinue, 0594 RangeDetect, 0595 RegExpr, 0596 StringDetect, 0597 WordDetect, 0598 keyword, 0599 }; 0600 0601 Type type{}; 0602 0603 bool isDotRegex = false; 0604 int line = -1; 0605 0606 // commonAttributes 0607 QString attribute; 0608 ContextName context; 0609 QString beginRegion; 0610 QString endRegion; 0611 int column = -1; 0612 XmlBool lookAhead{}; 0613 XmlBool firstNonSpace{}; 0614 0615 // StringDetect, WordDetect, keyword 0616 XmlBool insensitive{}; 0617 0618 // DetectChar, StringDetect, RegExpr, keyword 0619 XmlBool dynamic{}; 0620 0621 // Regex 0622 XmlBool minimal{}; 0623 0624 // IncludeRule 0625 XmlBool includeAttrib{}; 0626 0627 // DetectChar, Detect2Chars, LineContinue, RangeDetect 0628 QChar char0; 0629 // Detect2Chars, RangeDetect 0630 QChar char1; 0631 0632 // AnyChar, DetectChar, StringDetect, RegExpr, WordDetect, keyword 0633 QString string; 0634 // RegExpr without .* as suffix 0635 QString sanitizedString; 0636 0637 // Float, HlCHex, HlCOct, Int, WordDetect, keyword 0638 QString additionalDeliminator; 0639 QString weakDeliminator; 0640 0641 // rules included by IncludeRules (without IncludeRule) 0642 QList<const Rule *> includedRules; 0643 0644 // IncludeRules included by IncludeRules 0645 QSet<const Rule *> includedIncludeRules; 0646 0647 Context const *parentContext = nullptr; 0648 0649 QString filename; 0650 0651 bool parseElement(const QString &filename, QXmlStreamReader &xml) 0652 { 0653 this->filename = filename; 0654 line = xml.lineNumber(); 0655 0656 using Pair = QPair<QString, Type>; 0657 static const auto pairs = { 0658 Pair{QStringLiteral("AnyChar"), Type::AnyChar}, 0659 Pair{QStringLiteral("Detect2Chars"), Type::Detect2Chars}, 0660 Pair{QStringLiteral("DetectChar"), Type::DetectChar}, 0661 Pair{QStringLiteral("DetectIdentifier"), Type::DetectIdentifier}, 0662 Pair{QStringLiteral("DetectSpaces"), Type::DetectSpaces}, 0663 Pair{QStringLiteral("Float"), Type::Float}, 0664 Pair{QStringLiteral("HlCChar"), Type::HlCChar}, 0665 Pair{QStringLiteral("HlCHex"), Type::HlCHex}, 0666 Pair{QStringLiteral("HlCOct"), Type::HlCOct}, 0667 Pair{QStringLiteral("HlCStringChar"), Type::HlCStringChar}, 0668 Pair{QStringLiteral("IncludeRules"), Type::IncludeRules}, 0669 Pair{QStringLiteral("Int"), Type::Int}, 0670 Pair{QStringLiteral("LineContinue"), Type::LineContinue}, 0671 Pair{QStringLiteral("RangeDetect"), Type::RangeDetect}, 0672 Pair{QStringLiteral("RegExpr"), Type::RegExpr}, 0673 Pair{QStringLiteral("StringDetect"), Type::StringDetect}, 0674 Pair{QStringLiteral("WordDetect"), Type::WordDetect}, 0675 Pair{QStringLiteral("keyword"), Type::keyword}, 0676 }; 0677 0678 for (auto pair : pairs) { 0679 if (xml.name() == pair.first) { 0680 type = pair.second; 0681 bool success = parseAttributes(filename, xml); 0682 success = checkMandoryAttributes(filename, xml) && success; 0683 if (success && type == Type::RegExpr) { 0684 // ., (.) followed by *, +, {1} or nothing 0685 static const QRegularExpression isDot(QStringLiteral(R"(^\(?\.(?:[*+][*+?]?|[*+]|\{1\})?\$?$)")); 0686 // remove "(?:" and ")" 0687 static const QRegularExpression removeParentheses(QStringLiteral(R"(\((?:\?:)?|\))")); 0688 // remove parentheses on a copy of string 0689 auto reg = QString(string).replace(removeParentheses, QString()); 0690 isDotRegex = reg.contains(isDot); 0691 0692 // Remove .* and .*$ suffix. 0693 static const QRegularExpression allSuffix(QStringLiteral("(?<!\\\\)[.][*][?+]?[$]?$")); 0694 sanitizedString = string; 0695 sanitizedString.replace(allSuffix, QString()); 0696 // string is a catch-all, do not sanitize 0697 if (sanitizedString.isEmpty() || sanitizedString == QStringLiteral("^")) { 0698 sanitizedString = string; 0699 } 0700 } 0701 return success; 0702 } 0703 } 0704 0705 qWarning() << filename << "line" << xml.lineNumber() << "unknown element:" << xml.name(); 0706 return false; 0707 } 0708 0709 private: 0710 bool parseAttributes(const QString &filename, QXmlStreamReader &xml) 0711 { 0712 bool success = true; 0713 0714 for (auto &attr : xml.attributes()) { 0715 Parser parser{filename, xml, attr, success}; 0716 0717 // clang-format off 0718 const bool isExtracted 0719 = parser.extractString(attribute, QStringLiteral("attribute")) 0720 || parser.extractString(context.name, QStringLiteral("context")) 0721 || parser.extractXmlBool(lookAhead, QStringLiteral("lookAhead")) 0722 || parser.extractXmlBool(firstNonSpace, QStringLiteral("firstNonSpace")) 0723 || parser.extractString(beginRegion, QStringLiteral("beginRegion")) 0724 || parser.extractString(endRegion, QStringLiteral("endRegion")) 0725 || parser.extractPositive(column, QStringLiteral("column")) 0726 || ((type == Type::RegExpr 0727 || type == Type::StringDetect 0728 || type == Type::WordDetect 0729 || type == Type::keyword 0730 ) && parser.extractXmlBool(insensitive, QStringLiteral("insensitive"))) 0731 || ((type == Type::DetectChar 0732 || type == Type::RegExpr 0733 || type == Type::StringDetect 0734 || type == Type::keyword 0735 ) && parser.extractXmlBool(dynamic, QStringLiteral("dynamic"))) 0736 || ((type == Type::RegExpr) 0737 && parser.extractXmlBool(minimal, QStringLiteral("minimal"))) 0738 || ((type == Type::DetectChar 0739 || type == Type::Detect2Chars 0740 || type == Type::LineContinue 0741 || type == Type::RangeDetect 0742 ) && parser.extractChar(char0, QStringLiteral("char"))) 0743 || ((type == Type::Detect2Chars 0744 || type == Type::RangeDetect 0745 ) && parser.extractChar(char1, QStringLiteral("char1"))) 0746 || ((type == Type::AnyChar 0747 || type == Type::RegExpr 0748 || type == Type::StringDetect 0749 || type == Type::WordDetect 0750 || type == Type::keyword 0751 ) && parser.extractString(string, QStringLiteral("String"))) 0752 || ((type == Type::IncludeRules) 0753 && parser.extractXmlBool(includeAttrib, QStringLiteral("includeAttrib"))) 0754 || ((type == Type::Float 0755 || type == Type::HlCHex 0756 || type == Type::HlCOct 0757 || type == Type::Int 0758 || type == Type::keyword 0759 || type == Type::WordDetect 0760 ) && (parser.extractString(additionalDeliminator, QStringLiteral("additionalDeliminator")) 0761 || parser.extractString(weakDeliminator, QStringLiteral("weakDeliminator")))) 0762 ; 0763 // clang-format on 0764 0765 success = parser.checkIfExtracted(isExtracted); 0766 0767 if (type == Type::LineContinue && char0 == QLatin1Char('\0')) { 0768 char0 = QLatin1Char('\\'); 0769 } 0770 } 0771 0772 return success; 0773 } 0774 0775 bool checkMandoryAttributes(const QString &filename, QXmlStreamReader &xml) 0776 { 0777 QString missingAttr; 0778 0779 switch (type) { 0780 case Type::Unknown: 0781 return false; 0782 0783 case Type::AnyChar: 0784 case Type::RegExpr: 0785 case Type::StringDetect: 0786 case Type::WordDetect: 0787 case Type::keyword: 0788 missingAttr = string.isEmpty() ? QStringLiteral("String") : QString(); 0789 break; 0790 0791 case Type::DetectChar: 0792 missingAttr = !char0.unicode() ? QStringLiteral("char") : QString(); 0793 break; 0794 0795 case Type::Detect2Chars: 0796 case Type::RangeDetect: 0797 missingAttr = !char0.unicode() && !char1.unicode() ? QStringLiteral("char and char1") 0798 : !char0.unicode() ? QStringLiteral("char") 0799 : !char1.unicode() ? QStringLiteral("char1") 0800 : QString(); 0801 break; 0802 0803 case Type::IncludeRules: 0804 missingAttr = context.name.isEmpty() ? QStringLiteral("context") : QString(); 0805 break; 0806 0807 case Type::DetectIdentifier: 0808 case Type::DetectSpaces: 0809 case Type::Float: 0810 case Type::HlCChar: 0811 case Type::HlCHex: 0812 case Type::HlCOct: 0813 case Type::HlCStringChar: 0814 case Type::Int: 0815 case Type::LineContinue: 0816 break; 0817 } 0818 0819 if (!missingAttr.isEmpty()) { 0820 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute:" << missingAttr; 0821 return false; 0822 } 0823 0824 return true; 0825 } 0826 }; 0827 0828 int line; 0829 // becomes false when a context (except includeRule) refers to it 0830 bool isOnlyIncluded = true; 0831 // becomes true when an includedRule refers to it with includeAttrib=true 0832 bool referencedWithIncludeAttrib = false; 0833 bool hasDynamicRule = false; 0834 QString name; 0835 QString attribute; 0836 ContextName lineEndContext; 0837 ContextName lineEmptyContext; 0838 ContextName fallthroughContext; 0839 QList<Rule> rules; 0840 XmlBool dynamic{}; 0841 XmlBool fallthrough{}; 0842 XmlBool stopEmptyLineContextSwitchLoop{}; 0843 0844 bool parseElement(const QString &filename, QXmlStreamReader &xml) 0845 { 0846 line = xml.lineNumber(); 0847 0848 bool success = true; 0849 0850 for (auto &attr : xml.attributes()) { 0851 Parser parser{filename, xml, attr, success}; 0852 XmlBool noIndentationBasedFolding{}; 0853 0854 // clang-format off 0855 const bool isExtracted = parser.extractString(name, QStringLiteral("name")) 0856 || parser.extractString(attribute, QStringLiteral("attribute")) 0857 || parser.extractString(lineEndContext.name, QStringLiteral("lineEndContext")) 0858 || parser.extractString(lineEmptyContext.name, QStringLiteral("lineEmptyContext")) 0859 || parser.extractString(fallthroughContext.name, QStringLiteral("fallthroughContext")) 0860 || parser.extractXmlBool(dynamic, QStringLiteral("dynamic")) 0861 || parser.extractXmlBool(fallthrough, QStringLiteral("fallthrough")) 0862 || parser.extractXmlBool(stopEmptyLineContextSwitchLoop, QStringLiteral("stopEmptyLineContextSwitchLoop")) 0863 || parser.extractXmlBool(noIndentationBasedFolding, QStringLiteral("noIndentationBasedFolding")); 0864 // clang-format on 0865 0866 success = parser.checkIfExtracted(isExtracted); 0867 } 0868 0869 if (name.isEmpty()) { 0870 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: name"; 0871 success = false; 0872 } 0873 0874 if (attribute.isEmpty()) { 0875 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: attribute"; 0876 success = false; 0877 } 0878 0879 return success; 0880 } 0881 }; 0882 0883 struct Version { 0884 int majorRevision; 0885 int minorRevision; 0886 0887 Version(int majorRevision = 0, int minorRevision = 0) 0888 : majorRevision(majorRevision) 0889 , minorRevision(minorRevision) 0890 { 0891 } 0892 0893 bool operator<(const Version &version) const 0894 { 0895 return majorRevision < version.majorRevision || (majorRevision == version.majorRevision && minorRevision < version.minorRevision); 0896 } 0897 }; 0898 0899 struct ItemDatas { 0900 struct Style { 0901 QString name; 0902 int line; 0903 0904 friend size_t qHash(const Style &style, size_t seed = 0) 0905 { 0906 return qHash(style.name, seed); 0907 } 0908 0909 friend bool operator==(const Style &style0, const Style &style1) 0910 { 0911 return style0.name == style1.name; 0912 } 0913 }; 0914 0915 QSet<Style> styleNames; 0916 0917 bool parseElement(const QString &filename, QXmlStreamReader &xml) 0918 { 0919 bool success = true; 0920 0921 QString name; 0922 QString defStyleNum; 0923 XmlBool boolean; 0924 0925 for (auto &attr : xml.attributes()) { 0926 Parser parser{filename, xml, attr, success}; 0927 0928 const bool isExtracted = parser.extractString(name, QStringLiteral("name")) || parser.extractString(defStyleNum, QStringLiteral("defStyleNum")) 0929 || parser.extractXmlBool(boolean, QStringLiteral("bold")) || parser.extractXmlBool(boolean, QStringLiteral("italic")) 0930 || parser.extractXmlBool(boolean, QStringLiteral("underline")) || parser.extractXmlBool(boolean, QStringLiteral("strikeOut")) 0931 || parser.extractXmlBool(boolean, QStringLiteral("spellChecking")) || parser.checkColor(QStringLiteral("color")) 0932 || parser.checkColor(QStringLiteral("selColor")) || parser.checkColor(QStringLiteral("backgroundColor")) 0933 || parser.checkColor(QStringLiteral("selBackgroundColor")); 0934 0935 success = parser.checkIfExtracted(isExtracted); 0936 } 0937 0938 if (!name.isEmpty()) { 0939 const auto len = styleNames.size(); 0940 styleNames.insert({name, int(xml.lineNumber())}); 0941 if (len == styleNames.size()) { 0942 qWarning() << filename << "line" << xml.lineNumber() << "itemData duplicate:" << name; 0943 success = false; 0944 } 0945 } 0946 0947 return success; 0948 } 0949 }; 0950 0951 struct Definition { 0952 QMap<QString, Keywords> keywordsList; 0953 QMap<QString, Context> contexts; 0954 ItemDatas itemDatas; 0955 QString firstContextName; 0956 const Context *firstContext = nullptr; 0957 QString filename; 0958 WordDelimiters wordDelimiters; 0959 Version kateVersion{}; 0960 QString kateVersionStr; 0961 QString languageName; 0962 QSet<const Definition *> referencedDefinitions; 0963 0964 // Parse <keywords ...> 0965 bool parseKeywords(QXmlStreamReader &xml) 0966 { 0967 wordDelimiters.append(xml.attributes().value(QStringLiteral("additionalDeliminator"))); 0968 wordDelimiters.remove(xml.attributes().value(QStringLiteral("weakDeliminator"))); 0969 return true; 0970 } 0971 }; 0972 0973 // Parse <context> 0974 void processContextElement(QXmlStreamReader &xml) 0975 { 0976 Context context; 0977 m_success = context.parseElement(m_currentDefinition->filename, xml) && m_success; 0978 if (m_currentDefinition->firstContextName.isEmpty()) { 0979 m_currentDefinition->firstContextName = context.name; 0980 } 0981 if (m_currentDefinition->contexts.contains(context.name)) { 0982 qWarning() << m_currentDefinition->filename << "line" << xml.lineNumber() << "duplicate context:" << context.name; 0983 m_success = false; 0984 } 0985 m_currentContext = &*m_currentDefinition->contexts.insert(context.name, context); 0986 } 0987 0988 // Parse <list name="..."> 0989 void processListElement(QXmlStreamReader &xml) 0990 { 0991 Keywords keywords; 0992 m_success = keywords.parseElement(m_currentDefinition->filename, xml) && m_success; 0993 if (m_currentDefinition->keywordsList.contains(keywords.name)) { 0994 qWarning() << m_currentDefinition->filename << "line" << xml.lineNumber() << "duplicate list:" << keywords.name; 0995 m_success = false; 0996 } 0997 m_currentKeywords = &*m_currentDefinition->keywordsList.insert(keywords.name, keywords); 0998 } 0999 1000 const Definition *maxKateVersionDefinition(const Definition &definition, QMap<const Definition *, const Definition *> &maxVersionByDefinitions) const 1001 { 1002 auto it = maxVersionByDefinitions.find(&definition); 1003 if (it != maxVersionByDefinitions.end()) { 1004 return it.value(); 1005 } else { 1006 auto it = maxVersionByDefinitions.insert(&definition, &definition); 1007 for (const auto &referencedDef : definition.referencedDefinitions) { 1008 auto *maxDef = maxKateVersionDefinition(*referencedDef, maxVersionByDefinitions); 1009 if (it.value()->kateVersion < maxDef->kateVersion) { 1010 it.value() = maxDef; 1011 } 1012 } 1013 return it.value(); 1014 } 1015 } 1016 1017 // Initialize the referenced rules (Rule::includedRules) 1018 void resolveIncludeRules() 1019 { 1020 QSet<const Context *> usedContexts; 1021 QList<const Context *> contexts; 1022 1023 QMutableMapIterator<QString, Definition> def(m_definitions); 1024 while (def.hasNext()) { 1025 def.next(); 1026 auto &definition = def.value(); 1027 QMutableMapIterator<QString, Context> contextIt(definition.contexts); 1028 while (contextIt.hasNext()) { 1029 contextIt.next(); 1030 auto ¤tContext = contextIt.value(); 1031 for (auto &rule : currentContext.rules) { 1032 if (rule.type != Context::Rule::Type::IncludeRules) { 1033 continue; 1034 } 1035 1036 if (rule.context.stay) { 1037 qWarning() << definition.filename << "line" << rule.line << "IncludeRules refers to himself"; 1038 m_success = false; 1039 continue; 1040 } 1041 1042 if (rule.context.popCount) { 1043 qWarning() << definition.filename << "line" << rule.line << "IncludeRules with #pop prefix"; 1044 m_success = false; 1045 } 1046 1047 if (!rule.context.context) { 1048 m_success = false; 1049 continue; 1050 } 1051 1052 // resolve includedRules and includedIncludeRules 1053 1054 usedContexts.clear(); 1055 usedContexts.insert(rule.context.context); 1056 contexts.clear(); 1057 contexts.append(rule.context.context); 1058 1059 for (int i = 0; i < contexts.size(); ++i) { 1060 currentContext.hasDynamicRule = contexts[i]->hasDynamicRule; 1061 for (const auto &includedRule : contexts[i]->rules) { 1062 if (includedRule.type != Context::Rule::Type::IncludeRules) { 1063 rule.includedRules.append(&includedRule); 1064 } else if (&rule == &includedRule) { 1065 qWarning() << definition.filename << "line" << rule.line << "IncludeRules refers to himself by recursivity"; 1066 m_success = false; 1067 } else { 1068 rule.includedIncludeRules.insert(&includedRule); 1069 1070 if (includedRule.includedRules.isEmpty()) { 1071 const auto *context = includedRule.context.context; 1072 if (context && !usedContexts.contains(context)) { 1073 contexts.append(context); 1074 usedContexts.insert(context); 1075 } 1076 } else { 1077 rule.includedRules.append(includedRule.includedRules); 1078 } 1079 } 1080 } 1081 } 1082 } 1083 } 1084 } 1085 } 1086 1087 //! Recursively extracts the contexts used from the first context of the definitions. 1088 //! This method detects groups of contexts which are only used among themselves. 1089 QSet<const Context *> extractUsedContexts() const 1090 { 1091 QSet<const Context *> usedContexts; 1092 QList<const Context *> contexts; 1093 1094 QMapIterator<QString, Definition> def(m_definitions); 1095 while (def.hasNext()) { 1096 def.next(); 1097 const auto &definition = def.value(); 1098 1099 if (definition.firstContext) { 1100 usedContexts.insert(definition.firstContext); 1101 contexts.clear(); 1102 contexts.append(definition.firstContext); 1103 1104 for (int i = 0; i < contexts.size(); ++i) { 1105 auto appendContext = [&](const Context *context) { 1106 if (context && !usedContexts.contains(context)) { 1107 contexts.append(context); 1108 usedContexts.insert(context); 1109 } 1110 }; 1111 1112 const auto *context = contexts[i]; 1113 appendContext(context->lineEndContext.context); 1114 appendContext(context->lineEmptyContext.context); 1115 appendContext(context->fallthroughContext.context); 1116 1117 for (auto &rule : context->rules) { 1118 appendContext(rule.context.context); 1119 } 1120 } 1121 } 1122 } 1123 1124 return usedContexts; 1125 } 1126 1127 struct RuleAndInclude { 1128 const Context::Rule *rule; 1129 const Context::Rule *includeRules; 1130 1131 explicit operator bool() const 1132 { 1133 return rule; 1134 } 1135 }; 1136 1137 struct IncludedRuleUnreachableBy { 1138 QList<RuleAndInclude> unreachableBy; 1139 bool alwaysUnreachable = true; 1140 }; 1141 1142 //! Check contexts and rules 1143 bool checkContexts(const Definition &definition, 1144 QSet<ItemDatas::Style> &usedAttributeNames, 1145 QSet<ItemDatas::Style> &ignoredAttributeNames, 1146 const QSet<const Context *> &usedContexts, 1147 QMap<const Context::Rule *, IncludedRuleUnreachableBy> &unreachableIncludedRules) const 1148 { 1149 bool success = true; 1150 1151 QMapIterator<QString, Context> contextIt(definition.contexts); 1152 while (contextIt.hasNext()) { 1153 contextIt.next(); 1154 1155 const auto &context = contextIt.value(); 1156 const auto &filename = definition.filename; 1157 1158 if (!usedContexts.contains(&context)) { 1159 qWarning() << filename << "line" << context.line << "unused context:" << context.name; 1160 success = false; 1161 continue; 1162 } 1163 1164 if (context.name.startsWith(QStringLiteral("#pop"))) { 1165 qWarning() << filename << "line" << context.line << "the context name must not start with '#pop':" << context.name; 1166 success = false; 1167 } 1168 1169 if (!context.attribute.isEmpty() && (!context.isOnlyIncluded || context.referencedWithIncludeAttrib)) { 1170 usedAttributeNames.insert({context.attribute, context.line}); 1171 } 1172 1173 success = checkContextAttribute(definition, context) && success; 1174 success = checkUreachableRules(definition.filename, context, unreachableIncludedRules) && success; 1175 success = suggestRuleMerger(definition.filename, context) && success; 1176 1177 for (const auto &rule : context.rules) { 1178 if (!rule.attribute.isEmpty()) { 1179 if (rule.lookAhead != XmlBool::True) { 1180 usedAttributeNames.insert({rule.attribute, rule.line}); 1181 } else { 1182 ignoredAttributeNames.insert({rule.attribute, rule.line}); 1183 } 1184 } 1185 success = checkLookAhead(rule) && success; 1186 success = checkStringDetect(rule) && success; 1187 success = checkKeyword(definition, rule) && success; 1188 success = checkRegExpr(filename, rule, context) && success; 1189 success = checkDelimiters(definition, rule) && success; 1190 } 1191 } 1192 1193 return success; 1194 } 1195 1196 //! Check that a regular expression in a RegExpr rule: 1197 //! - isValid() 1198 //! - character ranges such as [A-Z] are valid and not accidentally e.g. [A-z]. 1199 //! - dynamic=true but no place holder used? 1200 //! - is not . with lookAhead="1" 1201 //! - is not ^... without column ou firstNonSpace attribute 1202 //! - is not equivalent to DetectSpaces, DetectChar, Detect2Chars, StringDetect, DetectIdentifier, RangeDetect, LineContinue or AnyChar 1203 //! - has no unused captures 1204 //! - has no unnecessary quantifier with lookAhead 1205 bool checkRegExpr(const QString &filename, const Context::Rule &rule, const Context &context) const 1206 { 1207 // ignore empty regex because the error is raised during xml parsing 1208 if (rule.type == Context::Rule::Type::RegExpr && !rule.string.isEmpty()) { 1209 const QRegularExpression regexp(rule.string); 1210 if (!checkRegularExpression(rule.filename, regexp, rule.line)) { 1211 return false; 1212 } 1213 1214 // dynamic == true and no place holder? 1215 if (rule.dynamic == XmlBool::True) { 1216 static const QRegularExpression placeHolder(QStringLiteral("%\\d+")); 1217 if (!rule.string.contains(placeHolder)) { 1218 qWarning() << rule.filename << "line" << rule.line << "broken regex:" << rule.string << "problem: dynamic=true but no %\\d+ placeholder"; 1219 return false; 1220 } 1221 } 1222 1223 auto reg = (rule.lookAhead == XmlBool::True) ? rule.sanitizedString : rule.string; 1224 if (rule.lookAhead == XmlBool::True) { 1225 static const QRegularExpression removeAllSuffix(QStringLiteral( 1226 R"(((?<!\\)\\(?:[DSWdsw]|x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\}|0\d\d|o\{[0-7]+\}|u[0-9a-fA-F]{4})|(?<!\\)[^])}\\]|(?=\\)\\\\)[*][?+]?$)")); 1227 reg.replace(removeAllSuffix, QString()); 1228 } 1229 1230 reg.replace(QStringLiteral("{1}"), QString()); 1231 1232 // is DetectSpaces 1233 // optional ^ then \s, [\s], [\t ], [ \t] possibly in (...) or (?:...) followed by *, + 1234 static const QRegularExpression isDetectSpaces( 1235 QStringLiteral(R"(^\^?(?:\((?:\?:)?)?\^?(?:\\s|\[(?:\\s| (?:\t|\\t)|(?:\t|\\t) )\])\)?(?:[*+][*+?]?|[*+])?\)?\)?$)")); 1236 if (rule.string.contains(isDetectSpaces)) { 1237 char const *extraMsg = rule.string.contains(QLatin1Char('^')) ? "+ column=\"0\" or firstNonSpace=\"1\"" : ""; 1238 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by DetectSpaces / DetectChar / AnyChar" << extraMsg << ":" 1239 << rule.string; 1240 return false; 1241 } 1242 1243 #define REG_ESCAPE_CHAR R"(\\(?:[^0BDPSWbdpswoux]|x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\}|0\d\d|o\{[0-7]+\}|u[0-9a-fA-F]{4}))" 1244 #define REG_CHAR "(?:" REG_ESCAPE_CHAR "|\\[(?:" REG_ESCAPE_CHAR "|.)\\]|[^[.^])" 1245 1246 // is RangeDetect 1247 static const QRegularExpression isRange(QStringLiteral("^\\^?" REG_CHAR "(?:" 1248 "\\.\\*[?+]?" REG_CHAR "|" 1249 "\\[\\^(" REG_ESCAPE_CHAR "|.)\\]\\*[?+]?\\1" 1250 ")$")); 1251 if ((rule.lookAhead == XmlBool::True || rule.minimal == XmlBool::True || rule.string.contains(QStringLiteral(".*?")) 1252 || rule.string.contains(QStringLiteral("[^"))) 1253 && reg.contains(isRange)) { 1254 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by RangeDetect:" << rule.string; 1255 return false; 1256 } 1257 1258 // is AnyChar 1259 static const QRegularExpression isAnyChar(QStringLiteral(R"(^(\^|\((\?:)?)*\[(?!\^)[-\]]?(\\[^0BDPSWbdpswoux]|[^-\]\\])*\]\)*$)")); 1260 if (rule.string.contains(isAnyChar)) { 1261 auto extra = (reg[0] == QLatin1Char('^') || reg[1] == QLatin1Char('^')) ? "with column=\"0\"" : ""; 1262 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by AnyChar:" << rule.string << extra; 1263 return false; 1264 } 1265 1266 // is LineContinue 1267 static const QRegularExpression isLineContinue(QStringLiteral("^\\^?" REG_CHAR "\\$$")); 1268 if (reg.contains(isLineContinue)) { 1269 auto extra = (reg[0] == QLatin1Char('^')) ? "with column=\"0\"" : ""; 1270 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by LineContinue:" << rule.string << extra; 1271 return false; 1272 } 1273 1274 // replace \c, \xhhh, \x{hhh...}, \0dd, \o{ddd}, \uhhhh, with _ 1275 static const QRegularExpression sanitize1(QStringLiteral(REG_ESCAPE_CHAR)); 1276 reg.replace(sanitize1, QStringLiteral("_")); 1277 1278 #undef REG_CHAR 1279 #undef REG_ESCAPE_CHAR 1280 1281 // use minimal or lazy operator 1282 static const QRegularExpression isMinimal(QStringLiteral("(?![.][*+?][$]?[)]*$)[.][*+?][^?+]")); 1283 static const QRegularExpression hasNotGreedy(QStringLiteral("[*+?][?+]")); 1284 1285 if (rule.lookAhead == XmlBool::True && rule.minimal != XmlBool::True && reg.contains(isMinimal) && !reg.contains(hasNotGreedy) 1286 && (!rule.context.context || !rule.context.context->hasDynamicRule || regexp.captureCount() == 0) 1287 && (reg.back() != QLatin1Char('$') || reg.contains(QLatin1Char('|')))) { 1288 qWarning() << rule.filename << "line" << rule.line 1289 << "RegExpr should be have minimal=\"1\" or use lazy operator (i.g, '.*' -> '.*?'):" << rule.string; 1290 return false; 1291 } 1292 1293 // replace [:...:] with ___ 1294 static const QRegularExpression sanitize2(QStringLiteral(R"(\[:\w+:\])")); 1295 reg.replace(sanitize2, QStringLiteral("___")); 1296 1297 // replace [ccc...], [special] with ... 1298 static const QRegularExpression sanitize3(QStringLiteral(R"(\[(?:\^\]?[^]]*|\]?[^]\\]*?\\.[^]]*|\][^]]{2,}|[^]]{3,})\]|(\[\]?[^]]*\]))")); 1299 reg.replace(sanitize3, QStringLiteral("...\\1")); 1300 1301 // replace [c] with _ 1302 static const QRegularExpression sanitize4(QStringLiteral(R"(\[.\])")); 1303 reg.replace(sanitize4, QStringLiteral("_")); 1304 1305 const int len = reg.size(); 1306 // replace [cC] with _ 1307 static const QRegularExpression toInsensitive(QStringLiteral(R"(\[(?:([^]])\1)\])")); 1308 reg = reg.toUpper(); 1309 reg.replace(toInsensitive, QString()); 1310 1311 // is StringDetect 1312 // ignore (?:, ) and {n} 1313 static const QRegularExpression isStringDetect(QStringLiteral(R"(^\^?(?:[^|\\?*+$^[{(.]|{(?!\d+,\d*}|,\d+})|\(\?:)+$)")); 1314 if (reg.contains(isStringDetect)) { 1315 char const *extraMsg = rule.string.contains(QLatin1Char('^')) ? "+ column=\"0\" or firstNonSpace=\"1\"" : ""; 1316 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by StringDetect / Detect2Chars / DetectChar" << extraMsg 1317 << ":" << rule.string; 1318 if (len != reg.size()) { 1319 qWarning() << rule.filename << "line" << rule.line << "insensitive=\"1\" missing:" << rule.string; 1320 } 1321 return false; 1322 } 1323 1324 // column="0" 1325 if (rule.column == -1) { 1326 // ^ without | 1327 // (^sas*) -> ok 1328 // (^sa|s*) -> ko 1329 // (^(sa|s*)) -> ok 1330 auto first = std::as_const(reg).begin(); 1331 auto last = std::as_const(reg).end(); 1332 int depth = 0; 1333 1334 while (QLatin1Char('(') == *first) { 1335 ++depth; 1336 ++first; 1337 if (QLatin1Char('?') == *first || QLatin1Char(':') == first[1]) { 1338 first += 2; 1339 } 1340 } 1341 1342 if (QLatin1Char('^') == *first) { 1343 const int bolDepth = depth; 1344 bool replace = true; 1345 1346 while (++first != last) { 1347 if (QLatin1Char('(') == *first) { 1348 ++depth; 1349 } else if (QLatin1Char(')') == *first) { 1350 --depth; 1351 if (depth < bolDepth) { 1352 // (^a)? === (^a|) -> ko 1353 if (first + 1 != last && QStringLiteral("*?").contains(first[1])) { 1354 replace = false; 1355 break; 1356 } 1357 } 1358 } else if (QLatin1Char('|') == *first) { 1359 // ignore '|' within subgroup 1360 if (depth <= bolDepth) { 1361 replace = false; 1362 break; 1363 } 1364 } 1365 } 1366 1367 if (replace) { 1368 qWarning() << rule.filename << "line" << rule.line << "column=\"0\" missing with RegExpr:" << rule.string; 1369 return false; 1370 } 1371 } 1372 } 1373 1374 // add ^ with column=0 1375 if (rule.column == 0 && !rule.isDotRegex) { 1376 bool hasStartOfLine = false; 1377 auto first = std::as_const(reg).begin(); 1378 auto last = std::as_const(reg).end(); 1379 for (; first != last; ++first) { 1380 if (*first == QLatin1Char('^')) { 1381 hasStartOfLine = true; 1382 break; 1383 } else if (*first == QLatin1Char('(')) { 1384 if (last - first >= 3 && first[1] == QLatin1Char('?') && first[2] == QLatin1Char(':')) { 1385 first += 2; 1386 } 1387 } else { 1388 break; 1389 } 1390 } 1391 1392 if (!hasStartOfLine) { 1393 qWarning() << rule.filename << "line" << rule.line 1394 << "start of line missing in the pattern with column=\"0\" (i.e. abc -> ^abc):" << rule.string; 1395 return false; 1396 } 1397 } 1398 1399 bool useCapture = false; 1400 1401 // detection of unnecessary capture 1402 if (regexp.captureCount()) { 1403 auto maximalCapture = [](const QString(&referenceNames)[9], const QString &s) { 1404 int maxCapture = 9; 1405 while (maxCapture && !s.contains(referenceNames[maxCapture - 1])) { 1406 --maxCapture; 1407 } 1408 return maxCapture; 1409 }; 1410 1411 int maxCaptureUsed = 0; 1412 // maximal dynamic reference 1413 if (rule.context.context && !rule.context.stay) { 1414 for (const auto &nextRule : rule.context.context->rules) { 1415 if (nextRule.dynamic == XmlBool::True) { 1416 static const QString cap[]{ 1417 QStringLiteral("%1"), 1418 QStringLiteral("%2"), 1419 QStringLiteral("%3"), 1420 QStringLiteral("%4"), 1421 QStringLiteral("%5"), 1422 QStringLiteral("%6"), 1423 QStringLiteral("%7"), 1424 QStringLiteral("%8"), 1425 QStringLiteral("%9"), 1426 }; 1427 int maxDynamicCapture = maximalCapture(cap, nextRule.string); 1428 maxCaptureUsed = std::max(maxCaptureUsed, maxDynamicCapture); 1429 } 1430 } 1431 } 1432 1433 static const QString num1[]{ 1434 QStringLiteral("\\1"), 1435 QStringLiteral("\\2"), 1436 QStringLiteral("\\3"), 1437 QStringLiteral("\\4"), 1438 QStringLiteral("\\5"), 1439 QStringLiteral("\\6"), 1440 QStringLiteral("\\7"), 1441 QStringLiteral("\\8"), 1442 QStringLiteral("\\9"), 1443 }; 1444 static const QString num2[]{ 1445 QStringLiteral("\\g1"), 1446 QStringLiteral("\\g2"), 1447 QStringLiteral("\\g3"), 1448 QStringLiteral("\\g4"), 1449 QStringLiteral("\\g5"), 1450 QStringLiteral("\\g6"), 1451 QStringLiteral("\\g7"), 1452 QStringLiteral("\\g8"), 1453 QStringLiteral("\\g9"), 1454 }; 1455 const int maxBackReference = std::max(maximalCapture(num1, rule.string), maximalCapture(num1, rule.string)); 1456 1457 const int maxCapture = std::max(maxCaptureUsed, maxBackReference); 1458 1459 if (maxCapture && regexp.captureCount() > maxCapture) { 1460 qWarning() << rule.filename << "line" << rule.line << "RegExpr with" << regexp.captureCount() << "captures but only" << maxCapture 1461 << "are used. Please, replace '(...)' with '(?:...)':" << rule.string; 1462 return false; 1463 } 1464 1465 useCapture = maxCapture; 1466 } 1467 1468 if (!useCapture) { 1469 // is DetectIdentifier 1470 static const QRegularExpression isDetectIdentifier( 1471 QStringLiteral(R"(^(\((\?:)?|\^)*\[(\\p\{L\}|_){2}\]([+][?+]?)?\[(\\p\{N\}|\\p\{L\}|_){3}\][*][?+]?\)*$)")); 1472 if (rule.string.contains(isDetectIdentifier)) { 1473 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by DetectIdentifier:" << rule.string; 1474 return false; 1475 } 1476 } 1477 1478 if (rule.isDotRegex) { 1479 // search next rule with same column or firstNonSpace 1480 int i = &rule - context.rules.data() + 1; 1481 const bool hasColumn = (rule.column != -1); 1482 const bool hasFirstNonSpace = (rule.firstNonSpace == XmlBool::True); 1483 const bool isSpecial = (hasColumn || hasFirstNonSpace); 1484 for (; i < context.rules.size(); ++i) { 1485 auto &rule2 = context.rules[i]; 1486 if (rule2.type == Context::Rule::Type::IncludeRules && isSpecial) { 1487 i = context.rules.size(); 1488 break; 1489 } 1490 1491 const bool hasColumn2 = (rule2.column != -1); 1492 const bool hasFirstNonSpace2 = (rule2.firstNonSpace == XmlBool::True); 1493 if ((!isSpecial && !hasColumn2 && !hasFirstNonSpace2) || (hasColumn && rule.column == rule2.column) 1494 || (hasFirstNonSpace && hasFirstNonSpace2)) { 1495 break; 1496 } 1497 } 1498 1499 auto ruleFilename = (filename == rule.filename) ? QString() : QStringLiteral("in ") + rule.filename; 1500 if (i == context.rules.size()) { 1501 if (rule.lookAhead == XmlBool::True && rule.firstNonSpace != XmlBool::True && rule.column == -1 && rule.beginRegion.isEmpty() 1502 && rule.endRegion.isEmpty() && !useCapture) { 1503 qWarning() << filename << "context line" << context.line << ": RegExpr line" << rule.line << ruleFilename 1504 << "should be replaced by fallthroughContext:" << rule.string; 1505 } 1506 } else { 1507 auto &nextRule = context.rules[i]; 1508 auto nextRuleFilename = (filename == nextRule.filename) ? QString() : QStringLiteral("in ") + nextRule.filename; 1509 qWarning() << filename << "context line" << context.line << "contains unreachable element line" << nextRule.line << nextRuleFilename 1510 << "because a dot RegExpr is used line" << rule.line << ruleFilename; 1511 } 1512 1513 // unnecessary quantifier 1514 static const QRegularExpression unnecessaryQuantifier1(QStringLiteral(R"([*+?]([.][*+?]{0,2})?$)")); 1515 static const QRegularExpression unnecessaryQuantifier2(QStringLiteral(R"([*+?]([.][*+?]{0,2})?[)]*$)")); 1516 auto &unnecessaryQuantifier = useCapture ? unnecessaryQuantifier1 : unnecessaryQuantifier2; 1517 if (rule.lookAhead == XmlBool::True && rule.minimal != XmlBool::True && reg.contains(unnecessaryQuantifier)) { 1518 qWarning() << rule.filename << "line" << rule.line 1519 << "Last quantifier is not necessary (i.g., 'xyz*' -> 'xy', 'xyz+.' -> 'xyz.'):" << rule.string; 1520 return false; 1521 } 1522 } 1523 } 1524 1525 return true; 1526 } 1527 1528 // Parse and check <emptyLine> 1529 bool parseEmptyLine(const QString &filename, QXmlStreamReader &xml) 1530 { 1531 bool success = true; 1532 1533 QString pattern; 1534 XmlBool casesensitive{}; 1535 1536 for (auto &attr : xml.attributes()) { 1537 Parser parser{filename, xml, attr, success}; 1538 1539 const bool isExtracted = 1540 parser.extractString(pattern, QStringLiteral("regexpr")) || parser.extractXmlBool(casesensitive, QStringLiteral("casesensitive")); 1541 1542 success = parser.checkIfExtracted(isExtracted); 1543 } 1544 1545 if (pattern.isEmpty()) { 1546 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: regexpr"; 1547 success = false; 1548 } else { 1549 success = checkRegularExpression(filename, QRegularExpression(pattern), xml.lineNumber()); 1550 } 1551 1552 return success; 1553 } 1554 1555 //! Check that a regular expression: 1556 //! - isValid() 1557 //! - character ranges such as [A-Z] are valid and not accidentally e.g. [A-z]. 1558 bool checkRegularExpression(const QString &filename, const QRegularExpression ®exp, int line) const 1559 { 1560 const auto pattern = regexp.pattern(); 1561 1562 // validate regexp 1563 if (!regexp.isValid()) { 1564 qWarning() << filename << "line" << line << "broken regex:" << pattern << "problem:" << regexp.errorString() << "at offset" 1565 << regexp.patternErrorOffset(); 1566 return false; 1567 } 1568 1569 // catch possible case typos: [A-z] or [a-Z] 1570 const int azOffset = std::max(pattern.indexOf(QStringLiteral("A-z")), pattern.indexOf(QStringLiteral("a-Z"))); 1571 if (azOffset >= 0) { 1572 qWarning() << filename << "line" << line << "broken regex:" << pattern << "problem: [a-Z] or [A-z] at offset" << azOffset; 1573 return false; 1574 } 1575 1576 return true; 1577 } 1578 1579 //! Check fallthrough and fallthroughContext. 1580 //! Check kateversion for stopEmptyLineContextSwitchLoop. 1581 bool checkContextAttribute(const Definition &definition, const Context &context) const 1582 { 1583 bool success = true; 1584 1585 if (!context.fallthroughContext.name.isEmpty()) { 1586 const bool mandatoryFallthroughAttribute = definition.kateVersion < Version{5, 62}; 1587 if (context.fallthrough == XmlBool::True && !mandatoryFallthroughAttribute) { 1588 qWarning() << definition.filename << "line" << context.line << "fallthrough attribute is unnecessary with kateversion >= 5.62 in context" 1589 << context.name; 1590 success = false; 1591 } else if (context.fallthrough != XmlBool::True && mandatoryFallthroughAttribute) { 1592 qWarning() << definition.filename << "line" << context.line 1593 << "fallthroughContext attribute without fallthrough=\"1\" attribute is only valid with kateversion >= 5.62 in context" 1594 << context.name; 1595 success = false; 1596 } 1597 } 1598 1599 if (context.stopEmptyLineContextSwitchLoop != XmlBool::Unspecified && definition.kateVersion < Version{5, 103}) { 1600 qWarning() << definition.filename << "line" << context.line 1601 << "stopEmptyLineContextSwitchLoop attribute is only valid with kateversion >= 5.103 in context" << context.name; 1602 success = false; 1603 } 1604 1605 return success; 1606 } 1607 1608 //! Search for additionalDeliminator/weakDeliminator which has no effect. 1609 bool checkDelimiters(const Definition &definition, const Context::Rule &rule) const 1610 { 1611 if (rule.additionalDeliminator.isEmpty() && rule.weakDeliminator.isEmpty()) { 1612 return true; 1613 } 1614 1615 bool success = true; 1616 1617 if (definition.kateVersion < Version{5, 79}) { 1618 qWarning() << definition.filename << "line" << rule.line 1619 << "additionalDeliminator and weakDeliminator are only available since version \"5.79\". Please, increase kateversion."; 1620 success = false; 1621 } 1622 1623 for (QChar c : rule.additionalDeliminator) { 1624 if (!definition.wordDelimiters.contains(c)) { 1625 return success; 1626 } 1627 } 1628 1629 for (QChar c : rule.weakDeliminator) { 1630 if (definition.wordDelimiters.contains(c)) { 1631 return success; 1632 } 1633 } 1634 1635 qWarning() << rule.filename << "line" << rule.line << "unnecessary use of additionalDeliminator and/or weakDeliminator" << rule.string; 1636 return false; 1637 } 1638 1639 //! Check that keyword rule reference an existing keyword list. 1640 bool checkKeyword(const Definition &definition, const Context::Rule &rule) const 1641 { 1642 if (rule.type == Context::Rule::Type::keyword) { 1643 auto it = definition.keywordsList.find(rule.string); 1644 if (it == definition.keywordsList.end()) { 1645 qWarning() << rule.filename << "line" << rule.line << "reference of non-existing keyword list:" << rule.string; 1646 return false; 1647 } 1648 } 1649 return true; 1650 } 1651 1652 //! Search for rules with lookAhead="true" and context="#stay". 1653 //! This would cause an infinite loop. 1654 bool checkLookAhead(const Context::Rule &rule) const 1655 { 1656 if (rule.lookAhead == XmlBool::True && rule.context.stay) { 1657 qWarning() << rule.filename << "line" << rule.line << "infinite loop: lookAhead with context #stay"; 1658 } 1659 return true; 1660 } 1661 1662 //! Check that StringDetect contains a placeHolder when dynamic="1" 1663 bool checkStringDetect(const Context::Rule &rule) const 1664 { 1665 if (rule.type == Context::Rule::Type::StringDetect) { 1666 // dynamic == true and no place holder? 1667 if (rule.dynamic == XmlBool::True) { 1668 static const QRegularExpression placeHolder(QStringLiteral("%\\d+")); 1669 if (!rule.string.contains(placeHolder)) { 1670 qWarning() << rule.filename << "line" << rule.line << "broken regex:" << rule.string << "problem: dynamic=true but no %\\d+ placeholder"; 1671 return false; 1672 } 1673 } 1674 } 1675 return true; 1676 } 1677 1678 //! Check \<include> and delimiter in a keyword list 1679 bool checkKeywordsList(const Definition &definition) const 1680 { 1681 bool success = true; 1682 1683 bool includeNotSupport = (definition.kateVersion < Version{5, 53}); 1684 QMapIterator<QString, Keywords> keywordsIt(definition.keywordsList); 1685 while (keywordsIt.hasNext()) { 1686 keywordsIt.next(); 1687 1688 for (const auto &include : keywordsIt.value().items.includes) { 1689 if (includeNotSupport) { 1690 qWarning() << definition.filename << "line" << include.line 1691 << "<include> is only available since version \"5.53\". Please, increase kateversion."; 1692 success = false; 1693 } 1694 success = checkKeywordInclude(definition, include) && success; 1695 } 1696 1697 // Check that keyword list items do not have deliminator character 1698 #if 0 1699 for (const auto& keyword : keywordsIt.value().items.keywords) { 1700 for (QChar c : keyword.content) { 1701 if (definition.wordDelimiters.contains(c)) { 1702 qWarning() << definition.filename << "line" << keyword.line << "keyword with delimiter:" << c << "in" << keyword.content; 1703 success = false; 1704 } 1705 } 1706 } 1707 #endif 1708 } 1709 1710 return success; 1711 } 1712 1713 //! Search for non-existing keyword include. 1714 bool checkKeywordInclude(const Definition &definition, const Keywords::Items::Item &include) const 1715 { 1716 bool containsKeywordName = true; 1717 int const idx = include.content.indexOf(QStringLiteral("##")); 1718 if (idx == -1) { 1719 auto it = definition.keywordsList.find(include.content); 1720 containsKeywordName = (it != definition.keywordsList.end()); 1721 } else { 1722 auto defName = include.content.mid(idx + 2); 1723 auto listName = include.content.left(idx); 1724 auto it = m_definitions.find(defName); 1725 if (it == m_definitions.end()) { 1726 qWarning() << definition.filename << "line" << include.line << "unknown definition in" << include.content; 1727 return false; 1728 } 1729 containsKeywordName = it->keywordsList.contains(listName); 1730 } 1731 1732 if (!containsKeywordName) { 1733 qWarning() << definition.filename << "line" << include.line << "unknown keyword name in" << include.content; 1734 } 1735 1736 return containsKeywordName; 1737 } 1738 1739 //! Check if a rule is hidden by another 1740 //! - rule hidden by DetectChar or AnyChar 1741 //! - DetectSpaces, AnyChar, Int, Float with all their characters hidden by DetectChar or AnyChar 1742 //! - StringDetect, WordDetect, RegExpr with as prefix Detect2Chars or other strings 1743 //! - duplicate rule (Int, Float, keyword with same String, etc) 1744 //! - Rule hidden by a dot regex 1745 bool checkUreachableRules(const QString &filename, 1746 const Context &context, 1747 QMap<const Context::Rule *, IncludedRuleUnreachableBy> &unreachableIncludedRules) const 1748 { 1749 if (context.isOnlyIncluded) { 1750 return true; 1751 } 1752 1753 struct Rule4 { 1754 RuleAndInclude setRule(const Context::Rule &rule, const Context::Rule *includeRules = nullptr) 1755 { 1756 auto set = [&](RuleAndInclude &ruleAndInclude) { 1757 auto old = ruleAndInclude; 1758 ruleAndInclude = {&rule, includeRules}; 1759 return old; 1760 }; 1761 1762 if (rule.firstNonSpace == XmlBool::True) { 1763 return set(firstNonSpace); 1764 } else if (rule.column == 0) { 1765 return set(column0); 1766 } else if (rule.column > 0) { 1767 return set(columnGreaterThan0[rule.column]); 1768 } else { 1769 return set(normal); 1770 } 1771 } 1772 1773 private: 1774 RuleAndInclude normal; 1775 RuleAndInclude column0; 1776 QMap<int, RuleAndInclude> columnGreaterThan0; 1777 RuleAndInclude firstNonSpace; 1778 }; 1779 1780 // Associate QChar with RuleAndInclude 1781 struct CharTable { 1782 /// Search RuleAndInclude associated with @p c. 1783 RuleAndInclude find(QChar c) const 1784 { 1785 if (c.unicode() < 128) { 1786 return m_asciiMap[c.unicode()]; 1787 } 1788 auto it = m_utf8Map.find(c); 1789 return it == m_utf8Map.end() ? RuleAndInclude{nullptr, nullptr} : it.value(); 1790 } 1791 1792 /// Search RuleAndInclude associated with the characters of @p s. 1793 /// \return an empty QList when at least one character is not found. 1794 QList<RuleAndInclude> find(QStringView s) const 1795 { 1796 QList<RuleAndInclude> result; 1797 1798 for (QChar c : s) { 1799 if (!find(c)) { 1800 return result; 1801 } 1802 } 1803 1804 for (QChar c : s) { 1805 result.append(find(c)); 1806 } 1807 1808 return result; 1809 } 1810 1811 /// Associates @p c with a rule. 1812 void append(QChar c, const Context::Rule &rule, const Context::Rule *includeRule = nullptr) 1813 { 1814 if (c.unicode() < 128) { 1815 m_asciiMap[c.unicode()] = {&rule, includeRule}; 1816 } else { 1817 m_utf8Map[c] = {&rule, includeRule}; 1818 } 1819 } 1820 1821 /// Associates each character of @p s with a rule. 1822 void append(QStringView s, const Context::Rule &rule, const Context::Rule *includeRule = nullptr) 1823 { 1824 for (QChar c : s) { 1825 append(c, rule, includeRule); 1826 } 1827 } 1828 1829 private: 1830 RuleAndInclude m_asciiMap[127]{}; 1831 QMap<QChar, RuleAndInclude> m_utf8Map; 1832 }; 1833 1834 struct Char4Tables { 1835 CharTable chars; 1836 CharTable charsColumn0; 1837 QMap<int, CharTable> charsColumnGreaterThan0; 1838 CharTable charsFirstNonSpace; 1839 }; 1840 1841 // View on Char4Tables members 1842 struct CharTableArray { 1843 // Append Char4Tables members that satisfies firstNonSpace and column. 1844 // Char4Tables::char is always added. 1845 CharTableArray(Char4Tables &tables, const Context::Rule &rule) 1846 { 1847 if (rule.firstNonSpace == XmlBool::True) { 1848 appendTable(tables.charsFirstNonSpace); 1849 } 1850 1851 if (rule.column == 0) { 1852 appendTable(tables.charsColumn0); 1853 } else if (rule.column > 0) { 1854 appendTable(tables.charsColumnGreaterThan0[rule.column]); 1855 } 1856 1857 appendTable(tables.chars); 1858 } 1859 1860 // Removes Char4Tables::chars when the rule contains firstNonSpace or column 1861 void removeNonSpecialWhenSpecial() 1862 { 1863 if (m_size > 1) { 1864 --m_size; 1865 } 1866 } 1867 1868 /// Search RuleAndInclude associated with @p c. 1869 RuleAndInclude find(QChar c) const 1870 { 1871 for (int i = 0; i < m_size; ++i) { 1872 if (auto ruleAndInclude = m_charTables[i]->find(c)) { 1873 return ruleAndInclude; 1874 } 1875 } 1876 return RuleAndInclude{nullptr, nullptr}; 1877 } 1878 1879 /// Search RuleAndInclude associated with the characters of @p s. 1880 /// \return an empty QList when at least one character is not found. 1881 QList<RuleAndInclude> find(QStringView s) const 1882 { 1883 for (int i = 0; i < m_size; ++i) { 1884 auto result = m_charTables[i]->find(s); 1885 if (result.size()) { 1886 while (++i < m_size) { 1887 result.append(m_charTables[i]->find(s)); 1888 } 1889 return result; 1890 } 1891 } 1892 return QList<RuleAndInclude>(); 1893 } 1894 1895 /// Associates @p c with a rule. 1896 void append(QChar c, const Context::Rule &rule, const Context::Rule *includeRule = nullptr) 1897 { 1898 for (int i = 0; i < m_size; ++i) { 1899 m_charTables[i]->append(c, rule, includeRule); 1900 } 1901 } 1902 1903 /// Associates each character of @p s with a rule. 1904 void append(QStringView s, const Context::Rule &rule, const Context::Rule *includeRule = nullptr) 1905 { 1906 for (int i = 0; i < m_size; ++i) { 1907 m_charTables[i]->append(s, rule, includeRule); 1908 } 1909 } 1910 1911 private: 1912 void appendTable(CharTable &t) 1913 { 1914 m_charTables[m_size] = &t; 1915 ++m_size; 1916 } 1917 1918 CharTable *m_charTables[3]; 1919 int m_size = 0; 1920 }; 1921 1922 struct ObservableRule { 1923 const Context::Rule *rule; 1924 const Context::Rule *includeRules; 1925 1926 bool hasResolvedIncludeRules() const 1927 { 1928 return rule == includeRules; 1929 } 1930 }; 1931 1932 // Iterates over all the rules, including those in includedRules 1933 struct RuleIterator { 1934 RuleIterator(const QList<ObservableRule> &rules, const ObservableRule &endRule) 1935 : m_end(&endRule - rules.data()) 1936 , m_rules(rules) 1937 { 1938 } 1939 1940 /// \return next rule or nullptr 1941 const Context::Rule *next() 1942 { 1943 // if in includedRules 1944 if (m_includedRules) { 1945 ++m_i2; 1946 if (m_i2 != m_includedRules->size()) { 1947 return (*m_includedRules)[m_i2]; 1948 } 1949 ++m_i; 1950 m_includedRules = nullptr; 1951 } 1952 1953 // if is a includedRules 1954 while (m_i < m_end && m_rules[m_i].rule->type == Context::Rule::Type::IncludeRules) { 1955 if (!m_rules[m_i].includeRules && m_rules[m_i].rule->includedRules.size()) { 1956 m_i2 = 0; 1957 m_includedRules = &m_rules[m_i].rule->includedRules; 1958 return (*m_includedRules)[m_i2]; 1959 } 1960 ++m_i; 1961 } 1962 1963 if (m_i < m_end) { 1964 ++m_i; 1965 return m_rules[m_i - 1].rule; 1966 } 1967 1968 return nullptr; 1969 } 1970 1971 /// \return current IncludeRules or nullptr 1972 const Context::Rule *currentIncludeRules() const 1973 { 1974 return m_includedRules ? m_rules[m_i].rule : m_rules[m_i].includeRules; 1975 } 1976 1977 private: 1978 int m_i = 0; 1979 int m_i2 = 0; 1980 const int m_end; 1981 const QList<ObservableRule> &m_rules; 1982 const QList<const Context::Rule *> *m_includedRules = nullptr; 1983 }; 1984 1985 // Dot regex container that satisfies firstNonSpace and column. 1986 struct DotRegex { 1987 /// Append a dot regex rule. 1988 void append(const Context::Rule &rule, const Context::Rule *includedRule) 1989 { 1990 auto array = extractDotRegexes(rule); 1991 if (array[0]) { 1992 *array[0] = {&rule, includedRule}; 1993 } 1994 if (array[1]) { 1995 *array[1] = {&rule, includedRule}; 1996 } 1997 } 1998 1999 /// Search dot regex which hides @p rule 2000 RuleAndInclude find(const Context::Rule &rule) 2001 { 2002 auto array = extractDotRegexes(rule); 2003 if (array[0]) { 2004 return *array[0]; 2005 } 2006 if (array[1]) { 2007 return *array[1]; 2008 } 2009 return RuleAndInclude{}; 2010 } 2011 2012 private: 2013 using Array = std::array<RuleAndInclude *, 2>; 2014 2015 Array extractDotRegexes(const Context::Rule &rule) 2016 { 2017 Array ret{}; 2018 2019 if (rule.firstNonSpace != XmlBool::True && rule.column == -1) { 2020 ret[0] = &dotRegex; 2021 } else { 2022 if (rule.firstNonSpace == XmlBool::True) { 2023 ret[0] = &dotRegexFirstNonSpace; 2024 } 2025 2026 if (rule.column == 0) { 2027 ret[1] = &dotRegexColumn0; 2028 } else if (rule.column > 0) { 2029 ret[1] = &dotRegexColumnGreaterThan0[rule.column]; 2030 } 2031 } 2032 2033 return ret; 2034 } 2035 2036 RuleAndInclude dotRegex{}; 2037 RuleAndInclude dotRegexColumn0{}; 2038 QMap<int, RuleAndInclude> dotRegexColumnGreaterThan0{}; 2039 RuleAndInclude dotRegexFirstNonSpace{}; 2040 }; 2041 2042 bool success = true; 2043 2044 // characters of DetectChar/AnyChar 2045 Char4Tables detectChars; 2046 // characters of dynamic DetectChar 2047 Char4Tables dynamicDetectChars; 2048 // characters of LineContinue 2049 Char4Tables lineContinueChars; 2050 2051 Rule4 intRule{}; 2052 Rule4 floatRule{}; 2053 Rule4 hlCCharRule{}; 2054 Rule4 hlCOctRule{}; 2055 Rule4 hlCHexRule{}; 2056 Rule4 hlCStringCharRule{}; 2057 Rule4 detectIdentifierRule{}; 2058 2059 // Contains includedRules and included includedRules 2060 QMap<Context const *, RuleAndInclude> includeContexts; 2061 2062 DotRegex dotRegex; 2063 2064 QList<ObservableRule> observedRules; 2065 observedRules.reserve(context.rules.size()); 2066 for (const Context::Rule &rule : context.rules) { 2067 const Context::Rule *includeRule = nullptr; 2068 if (rule.type == Context::Rule::Type::IncludeRules) { 2069 auto *context = rule.context.context; 2070 if (context && context->isOnlyIncluded) { 2071 includeRule = &rule; 2072 } 2073 } 2074 2075 observedRules.push_back({&rule, includeRule}); 2076 if (includeRule) { 2077 for (const Context::Rule *rule2 : rule.includedRules) { 2078 observedRules.push_back({rule2, includeRule}); 2079 } 2080 } 2081 } 2082 2083 for (auto &observedRule : observedRules) { 2084 const Context::Rule &rule = *observedRule.rule; 2085 bool isUnreachable = false; 2086 QList<RuleAndInclude> unreachableBy; 2087 2088 // declare rule as unreachable if ruleAndInclude is not empty 2089 auto updateUnreachable1 = [&](RuleAndInclude ruleAndInclude) { 2090 if (ruleAndInclude) { 2091 isUnreachable = true; 2092 unreachableBy.append(ruleAndInclude); 2093 } 2094 }; 2095 2096 // declare rule as unreachable if ruleAndIncludes is not empty 2097 auto updateUnreachable2 = [&](const QList<RuleAndInclude> &ruleAndIncludes) { 2098 if (!ruleAndIncludes.isEmpty()) { 2099 isUnreachable = true; 2100 unreachableBy.append(ruleAndIncludes); 2101 } 2102 }; 2103 2104 // check if rule2.firstNonSpace/column is compatible with those of rule 2105 auto isCompatible = [&rule](Context::Rule const &rule2) { 2106 return (rule2.firstNonSpace != XmlBool::True && rule2.column == -1) || (rule.column == rule2.column && rule.column != -1) 2107 || (rule.firstNonSpace == rule2.firstNonSpace && rule.firstNonSpace == XmlBool::True); 2108 }; 2109 2110 updateUnreachable1(dotRegex.find(rule)); 2111 2112 switch (rule.type) { 2113 // checks if hidden by DetectChar/AnyChar 2114 // then add the characters to detectChars 2115 case Context::Rule::Type::AnyChar: { 2116 auto tables = CharTableArray(detectChars, rule); 2117 updateUnreachable2(tables.find(rule.string)); 2118 tables.removeNonSpecialWhenSpecial(); 2119 tables.append(rule.string, rule); 2120 break; 2121 } 2122 2123 // check if is hidden by DetectChar/AnyChar 2124 // then add the characters to detectChars or dynamicDetectChars 2125 case Context::Rule::Type::DetectChar: { 2126 auto &chars4 = (rule.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars; 2127 auto tables = CharTableArray(chars4, rule); 2128 updateUnreachable1(tables.find(rule.char0)); 2129 tables.removeNonSpecialWhenSpecial(); 2130 tables.append(rule.char0, rule); 2131 break; 2132 } 2133 2134 // check if hidden by DetectChar/AnyChar 2135 // then add spaces characters to detectChars 2136 case Context::Rule::Type::DetectSpaces: { 2137 auto tables = CharTableArray(detectChars, rule); 2138 updateUnreachable2(tables.find(QStringLiteral(" \t"))); 2139 tables.removeNonSpecialWhenSpecial(); 2140 tables.append(QLatin1Char(' '), rule); 2141 tables.append(QLatin1Char('\t'), rule); 2142 break; 2143 } 2144 2145 // check if hidden by DetectChar/AnyChar 2146 case Context::Rule::Type::HlCChar: 2147 updateUnreachable1(CharTableArray(detectChars, rule).find(QLatin1Char('\''))); 2148 updateUnreachable1(hlCCharRule.setRule(rule)); 2149 break; 2150 2151 // check if hidden by DetectChar/AnyChar 2152 case Context::Rule::Type::HlCHex: 2153 updateUnreachable1(CharTableArray(detectChars, rule).find(QLatin1Char('0'))); 2154 updateUnreachable1(hlCHexRule.setRule(rule)); 2155 break; 2156 2157 // check if hidden by DetectChar/AnyChar 2158 case Context::Rule::Type::HlCOct: 2159 updateUnreachable1(CharTableArray(detectChars, rule).find(QLatin1Char('0'))); 2160 updateUnreachable1(hlCOctRule.setRule(rule)); 2161 break; 2162 2163 // check if hidden by DetectChar/AnyChar 2164 case Context::Rule::Type::HlCStringChar: 2165 updateUnreachable1(CharTableArray(detectChars, rule).find(QLatin1Char('\\'))); 2166 updateUnreachable1(hlCStringCharRule.setRule(rule)); 2167 break; 2168 2169 // check if hidden by DetectChar/AnyChar 2170 case Context::Rule::Type::Int: 2171 updateUnreachable2(CharTableArray(detectChars, rule).find(QStringLiteral("0123456789"))); 2172 updateUnreachable1(intRule.setRule(rule)); 2173 break; 2174 2175 // check if hidden by DetectChar/AnyChar 2176 case Context::Rule::Type::Float: 2177 updateUnreachable2(CharTableArray(detectChars, rule).find(QStringLiteral("0123456789."))); 2178 updateUnreachable1(floatRule.setRule(rule)); 2179 // check that Float is before Int 2180 updateUnreachable1(Rule4(intRule).setRule(rule)); 2181 break; 2182 2183 // check if hidden by another DetectIdentifier rule 2184 case Context::Rule::Type::DetectIdentifier: 2185 updateUnreachable1(detectIdentifierRule.setRule(rule)); 2186 break; 2187 2188 // check if hidden by DetectChar/AnyChar or another LineContinue 2189 case Context::Rule::Type::LineContinue: { 2190 updateUnreachable1(CharTableArray(detectChars, rule).find(rule.char0)); 2191 2192 auto tables = CharTableArray(lineContinueChars, rule); 2193 updateUnreachable1(tables.find(rule.char0)); 2194 tables.removeNonSpecialWhenSpecial(); 2195 tables.append(rule.char0, rule); 2196 break; 2197 } 2198 2199 // check if hidden by DetectChar/AnyChar or another Detect2Chars/RangeDetect 2200 case Context::Rule::Type::Detect2Chars: 2201 case Context::Rule::Type::RangeDetect: 2202 updateUnreachable1(CharTableArray(detectChars, rule).find(rule.char0)); 2203 if (!isUnreachable) { 2204 RuleIterator ruleIterator(observedRules, observedRule); 2205 while (const auto *rulePtr = ruleIterator.next()) { 2206 if (isUnreachable) { 2207 break; 2208 } 2209 const auto &rule2 = *rulePtr; 2210 if (rule2.type == rule.type && isCompatible(rule2) && rule.char0 == rule2.char0 && rule.char1 == rule2.char1) { 2211 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()}); 2212 } 2213 } 2214 } 2215 break; 2216 2217 case Context::Rule::Type::RegExpr: { 2218 if (rule.isDotRegex) { 2219 dotRegex.append(rule, nullptr); 2220 break; 2221 } 2222 2223 // check that `rule` does not have another RegExpr as a prefix 2224 RuleIterator ruleIterator(observedRules, observedRule); 2225 while (const auto *rulePtr = ruleIterator.next()) { 2226 if (isUnreachable) { 2227 break; 2228 } 2229 const auto &rule2 = *rulePtr; 2230 if (rule2.type == Context::Rule::Type::RegExpr && isCompatible(rule2) && rule.insensitive == rule2.insensitive 2231 && rule.dynamic == rule2.dynamic && rule.sanitizedString.startsWith(rule2.sanitizedString)) { 2232 bool add = (rule.sanitizedString.startsWith(rule2.string) || rule.sanitizedString.size() < rule2.sanitizedString.size() + 2); 2233 if (!add) { 2234 // \s.* (sanitized = \s) is considered hiding \s*\S 2235 // we check the quantifiers to see if this is the case 2236 auto c1 = rule.sanitizedString[rule2.sanitizedString.size()].unicode(); 2237 auto c2 = rule.sanitizedString[rule2.sanitizedString.size() + 1].unicode(); 2238 auto c3 = rule2.sanitizedString.back().unicode(); 2239 if (c3 == '*' || c3 == '?' || c3 == '+') { 2240 add = true; 2241 } else if (c1 == '*' || c1 == '?') { 2242 add = !((c2 == '?' || c2 == '+') || (rule.sanitizedString.size() >= rule2.sanitizedString.size() + 3)); 2243 } else { 2244 add = true; 2245 } 2246 } 2247 if (add) { 2248 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()}); 2249 } 2250 } 2251 } 2252 2253 Q_FALLTHROUGH(); 2254 } 2255 // check if a rule does not have another rule as a prefix 2256 case Context::Rule::Type::WordDetect: 2257 case Context::Rule::Type::StringDetect: { 2258 // check that dynamic `rule` does not have another dynamic StringDetect as a prefix 2259 if (rule.type == Context::Rule::Type::StringDetect && rule.dynamic == XmlBool::True) { 2260 RuleIterator ruleIterator(observedRules, observedRule); 2261 while (const auto *rulePtr = ruleIterator.next()) { 2262 if (isUnreachable) { 2263 break; 2264 } 2265 2266 const auto &rule2 = *rulePtr; 2267 if (rule2.type != Context::Rule::Type::StringDetect || rule2.dynamic != XmlBool::True || !isCompatible(rule2)) { 2268 continue; 2269 } 2270 2271 const bool isSensitive = (rule2.insensitive == XmlBool::True); 2272 const auto caseSensitivity = isSensitive ? Qt::CaseInsensitive : Qt::CaseSensitive; 2273 if ((isSensitive || rule.insensitive != XmlBool::True) && rule.string.startsWith(rule2.string, caseSensitivity)) { 2274 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()}); 2275 } 2276 } 2277 } 2278 2279 // string used for comparison and truncated from "dynamic" part 2280 QStringView s = rule.string; 2281 2282 // truncate to '%' with dynamic rules 2283 if (rule.dynamic == XmlBool::True) { 2284 static const QRegularExpression dynamicPosition(QStringLiteral(R"(^(?:[^%]*|%(?![1-9]))*)")); 2285 auto result = dynamicPosition.match(rule.string); 2286 s = s.left(result.capturedLength()); 2287 } 2288 2289 QString sanitizedRegex; 2290 // truncate to special character with RegExpr. 2291 // If regexp contains '|', `s` becomes empty. 2292 if (rule.type == Context::Rule::Type::RegExpr) { 2293 static const QRegularExpression regularChars(QStringLiteral(R"(^(?:[^.?*+^$[{(\\|]+|\\[-.?*+^$[\]{}()\\|]+|\[[^^\\]\])+)")); 2294 static const QRegularExpression sanitizeChars(QStringLiteral(R"(\\([-.?*+^$[\]{}()\\|])|\[([^^\\])\])")); 2295 const qsizetype result = regularChars.match(rule.string).capturedLength(); 2296 const qsizetype pos = qMin(result, s.size()); 2297 if (rule.string.indexOf(QLatin1Char('|'), pos) < pos) { 2298 sanitizedRegex = rule.string.left(qMin(result, s.size())); 2299 sanitizedRegex.replace(sanitizeChars, QStringLiteral("\\1")); 2300 s = sanitizedRegex; 2301 } else { 2302 s = QStringView(); 2303 } 2304 } 2305 2306 // check if hidden by DetectChar/AnyChar 2307 if (s.size() > 0) { 2308 auto t = CharTableArray(detectChars, rule); 2309 if (rule.insensitive != XmlBool::True) { 2310 updateUnreachable1(t.find(s[0])); 2311 } else { 2312 QChar c2[]{s[0].toLower(), s[0].toUpper()}; 2313 updateUnreachable2(t.find(QStringView(c2, 2))); 2314 } 2315 } 2316 2317 // check if Detect2Chars, StringDetect, WordDetect is not a prefix of s 2318 if (s.size() > 0 && !isUnreachable) { 2319 // combination of uppercase and lowercase 2320 RuleAndInclude detect2CharsInsensitives[]{{}, {}, {}, {}}; 2321 2322 RuleIterator ruleIterator(observedRules, observedRule); 2323 while (const auto *rulePtr = ruleIterator.next()) { 2324 if (isUnreachable) { 2325 break; 2326 } 2327 const auto &rule2 = *rulePtr; 2328 const bool isSensitive = (rule2.insensitive == XmlBool::True); 2329 const auto caseSensitivity = isSensitive ? Qt::CaseInsensitive : Qt::CaseSensitive; 2330 2331 switch (rule2.type) { 2332 // check that it is not a detectChars prefix 2333 case Context::Rule::Type::Detect2Chars: 2334 if (isCompatible(rule2) && s.size() >= 2) { 2335 if (rule.insensitive != XmlBool::True) { 2336 if (rule2.char0 == s[0] && rule2.char1 == s[1]) { 2337 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()}); 2338 } 2339 } else { 2340 // when the string is case insensitive, 2341 // all 4 upper/lower case combinations must be found 2342 auto set = [&](RuleAndInclude &x, QChar c1, QChar c2) { 2343 if (!x && rule2.char0 == c1 && rule2.char0 == c2) { 2344 x = {&rule2, ruleIterator.currentIncludeRules()}; 2345 } 2346 }; 2347 set(detect2CharsInsensitives[0], s[0].toLower(), s[1].toLower()); 2348 set(detect2CharsInsensitives[1], s[0].toLower(), s[1].toUpper()); 2349 set(detect2CharsInsensitives[2], s[0].toUpper(), s[1].toUpper()); 2350 set(detect2CharsInsensitives[3], s[0].toUpper(), s[1].toLower()); 2351 2352 if (detect2CharsInsensitives[0] && detect2CharsInsensitives[1] && detect2CharsInsensitives[2] 2353 && detect2CharsInsensitives[3]) { 2354 isUnreachable = true; 2355 unreachableBy.append(detect2CharsInsensitives[0]); 2356 unreachableBy.append(detect2CharsInsensitives[1]); 2357 unreachableBy.append(detect2CharsInsensitives[2]); 2358 unreachableBy.append(detect2CharsInsensitives[3]); 2359 } 2360 } 2361 } 2362 break; 2363 2364 // check that it is not a StringDetect prefix 2365 case Context::Rule::Type::StringDetect: 2366 if (isCompatible(rule2) && rule2.dynamic != XmlBool::True && (isSensitive || rule.insensitive != XmlBool::True) 2367 && s.startsWith(rule2.string, caseSensitivity)) { 2368 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()}); 2369 } 2370 break; 2371 2372 // check if a WordDetect is hidden by another WordDetect 2373 case Context::Rule::Type::WordDetect: 2374 if (rule.type == Context::Rule::Type::WordDetect && isCompatible(rule2) && (isSensitive || rule.insensitive != XmlBool::True) 2375 && 0 == rule.string.compare(rule2.string, caseSensitivity)) { 2376 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()}); 2377 } 2378 break; 2379 2380 default:; 2381 } 2382 } 2383 } 2384 2385 break; 2386 } 2387 2388 // check if hidden by another keyword rule 2389 case Context::Rule::Type::keyword: { 2390 RuleIterator ruleIterator(observedRules, observedRule); 2391 while (const auto *rulePtr = ruleIterator.next()) { 2392 if (isUnreachable) { 2393 break; 2394 } 2395 const auto &rule2 = *rulePtr; 2396 if (rule2.type == Context::Rule::Type::keyword && isCompatible(rule2) && rule.string == rule2.string) { 2397 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()}); 2398 } 2399 } 2400 // TODO check that all keywords are hidden by another rules 2401 break; 2402 } 2403 2404 // add characters in those used but without checking if they are already. 2405 // <DetectChar char="}" /> 2406 // <includedRules .../> <- reference an another <DetectChar char="}" /> who will not be checked 2407 // <includedRules .../> <- reference a <DetectChar char="{" /> who will be added 2408 // <DetectChar char="{" /> <- hidden by previous rule 2409 case Context::Rule::Type::IncludeRules: 2410 if (observedRule.includeRules && !observedRule.hasResolvedIncludeRules()) { 2411 break; 2412 } 2413 2414 if (auto &ruleAndInclude = includeContexts[rule.context.context]) { 2415 updateUnreachable1(ruleAndInclude); 2416 } else { 2417 ruleAndInclude.rule = &rule; 2418 } 2419 2420 for (const auto *rulePtr : rule.includedIncludeRules) { 2421 includeContexts.insert(rulePtr->context.context, RuleAndInclude{rulePtr, &rule}); 2422 } 2423 2424 if (observedRule.includeRules) { 2425 break; 2426 } 2427 2428 for (const auto *rulePtr : rule.includedRules) { 2429 const auto &rule2 = *rulePtr; 2430 switch (rule2.type) { 2431 case Context::Rule::Type::AnyChar: { 2432 auto tables = CharTableArray(detectChars, rule2); 2433 tables.removeNonSpecialWhenSpecial(); 2434 tables.append(rule2.string, rule2, &rule); 2435 break; 2436 } 2437 2438 case Context::Rule::Type::DetectChar: { 2439 auto &chars4 = (rule.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars; 2440 auto tables = CharTableArray(chars4, rule2); 2441 tables.removeNonSpecialWhenSpecial(); 2442 tables.append(rule2.char0, rule2, &rule); 2443 break; 2444 } 2445 2446 case Context::Rule::Type::DetectSpaces: { 2447 auto tables = CharTableArray(detectChars, rule2); 2448 tables.removeNonSpecialWhenSpecial(); 2449 tables.append(QLatin1Char(' '), rule2, &rule); 2450 tables.append(QLatin1Char('\t'), rule2, &rule); 2451 break; 2452 } 2453 2454 case Context::Rule::Type::HlCChar: 2455 hlCCharRule.setRule(rule2, &rule); 2456 break; 2457 2458 case Context::Rule::Type::HlCHex: 2459 hlCHexRule.setRule(rule2, &rule); 2460 break; 2461 2462 case Context::Rule::Type::HlCOct: 2463 hlCOctRule.setRule(rule2, &rule); 2464 break; 2465 2466 case Context::Rule::Type::HlCStringChar: 2467 hlCStringCharRule.setRule(rule2, &rule); 2468 break; 2469 2470 case Context::Rule::Type::Int: 2471 intRule.setRule(rule2, &rule); 2472 break; 2473 2474 case Context::Rule::Type::Float: 2475 floatRule.setRule(rule2, &rule); 2476 break; 2477 2478 case Context::Rule::Type::LineContinue: { 2479 auto tables = CharTableArray(lineContinueChars, rule2); 2480 tables.removeNonSpecialWhenSpecial(); 2481 tables.append(rule2.char0, rule2, &rule); 2482 break; 2483 } 2484 2485 case Context::Rule::Type::RegExpr: 2486 if (rule2.isDotRegex) { 2487 dotRegex.append(rule2, &rule); 2488 } 2489 break; 2490 2491 case Context::Rule::Type::WordDetect: 2492 case Context::Rule::Type::StringDetect: 2493 case Context::Rule::Type::Detect2Chars: 2494 case Context::Rule::Type::IncludeRules: 2495 case Context::Rule::Type::DetectIdentifier: 2496 case Context::Rule::Type::keyword: 2497 case Context::Rule::Type::Unknown: 2498 case Context::Rule::Type::RangeDetect: 2499 break; 2500 } 2501 } 2502 break; 2503 2504 case Context::Rule::Type::Unknown: 2505 break; 2506 } 2507 2508 if (observedRule.includeRules && !observedRule.hasResolvedIncludeRules()) { 2509 auto &unreachableIncludedRule = unreachableIncludedRules[&rule]; 2510 if (isUnreachable && unreachableIncludedRule.alwaysUnreachable) { 2511 unreachableIncludedRule.unreachableBy.append(unreachableBy); 2512 } else { 2513 unreachableIncludedRule.alwaysUnreachable = false; 2514 } 2515 } else if (isUnreachable) { 2516 success = false; 2517 QString message; 2518 message.reserve(128); 2519 for (auto &ruleAndInclude : unreachableBy) { 2520 message += QStringLiteral("line "); 2521 if (ruleAndInclude.includeRules) { 2522 message += QString::number(ruleAndInclude.includeRules->line); 2523 message += QStringLiteral(" [by '"); 2524 message += ruleAndInclude.includeRules->context.name; 2525 message += QStringLiteral("' line "); 2526 message += QString::number(ruleAndInclude.rule->line); 2527 if (ruleAndInclude.includeRules->filename != ruleAndInclude.rule->filename) { 2528 message += QStringLiteral(" ("); 2529 message += ruleAndInclude.rule->filename; 2530 message += QLatin1Char(')'); 2531 } 2532 message += QLatin1Char(']'); 2533 } else { 2534 message += QString::number(ruleAndInclude.rule->line); 2535 } 2536 message += QStringLiteral(", "); 2537 } 2538 message.chop(2); 2539 qWarning() << filename << "line" << rule.line << "unreachable rule by" << message; 2540 } 2541 } 2542 2543 return success; 2544 } 2545 2546 //! Proposes to merge certain rule sequences 2547 //! - several DetectChar/AnyChar into AnyChar 2548 //! - several RegExpr into one RegExpr 2549 bool suggestRuleMerger(const QString &filename, const Context &context) const 2550 { 2551 bool success = true; 2552 2553 if (context.rules.isEmpty()) { 2554 return success; 2555 } 2556 2557 auto it = context.rules.begin(); 2558 const auto end = context.rules.end() - 1; 2559 2560 for (; it < end; ++it) { 2561 auto &rule1 = *it; 2562 auto &rule2 = it[1]; 2563 2564 auto isCommonCompatible = [&] { 2565 if (rule1.lookAhead != rule2.lookAhead) { 2566 return false; 2567 } 2568 // ignore attribute when lookAhead is true 2569 if (rule1.lookAhead != XmlBool::True && rule1.attribute != rule2.attribute) { 2570 return false; 2571 } 2572 // clang-format off 2573 return rule1.beginRegion == rule2.beginRegion 2574 && rule1.endRegion == rule2.endRegion 2575 && rule1.firstNonSpace == rule2.firstNonSpace 2576 && rule1.context.context == rule2.context.context 2577 && rule1.context.popCount == rule2.context.popCount; 2578 // clang-format on 2579 }; 2580 2581 switch (rule1.type) { 2582 // request to merge AnyChar/DetectChar 2583 case Context::Rule::Type::AnyChar: 2584 case Context::Rule::Type::DetectChar: 2585 if ((rule2.type == Context::Rule::Type::AnyChar || rule2.type == Context::Rule::Type::DetectChar) && isCommonCompatible() 2586 && rule1.column == rule2.column) { 2587 qWarning() << filename << "line" << rule2.line << "can be merged as AnyChar with the previous rule"; 2588 success = false; 2589 } 2590 break; 2591 2592 // request to merge multiple RegExpr 2593 case Context::Rule::Type::RegExpr: 2594 if (rule2.type == Context::Rule::Type::RegExpr && isCommonCompatible() && rule1.dynamic == rule2.dynamic 2595 && (rule1.column == rule2.column || (rule1.column <= 0 && rule2.column <= 0))) { 2596 qWarning() << filename << "line" << rule2.line << "can be merged with the previous rule"; 2597 success = false; 2598 } 2599 break; 2600 2601 case Context::Rule::Type::DetectSpaces: 2602 case Context::Rule::Type::HlCChar: 2603 case Context::Rule::Type::HlCHex: 2604 case Context::Rule::Type::HlCOct: 2605 case Context::Rule::Type::HlCStringChar: 2606 case Context::Rule::Type::Int: 2607 case Context::Rule::Type::Float: 2608 case Context::Rule::Type::LineContinue: 2609 case Context::Rule::Type::WordDetect: 2610 case Context::Rule::Type::StringDetect: 2611 case Context::Rule::Type::Detect2Chars: 2612 case Context::Rule::Type::IncludeRules: 2613 case Context::Rule::Type::DetectIdentifier: 2614 case Context::Rule::Type::keyword: 2615 case Context::Rule::Type::Unknown: 2616 case Context::Rule::Type::RangeDetect: 2617 break; 2618 } 2619 } 2620 2621 return success; 2622 } 2623 2624 //! Initialize the referenced context (ContextName::context) 2625 //! Some input / output examples are: 2626 //! - "#stay" -> "" 2627 //! - "#pop" -> "" 2628 //! - "Comment" -> "Comment" 2629 //! - "#pop!Comment" -> "Comment" 2630 //! - "##ISO C++" -> "" 2631 //! - "Comment##ISO C++"-> "Comment" in ISO C++ 2632 void resolveContextName(Definition &definition, Context &context, ContextName &contextName, int line) 2633 { 2634 QStringView name = contextName.name; 2635 if (name.isEmpty()) { 2636 contextName.stay = true; 2637 } else if (name.startsWith(QStringLiteral("#stay"))) { 2638 name = name.mid(5); 2639 contextName.stay = true; 2640 contextName.context = &context; 2641 if (!name.isEmpty()) { 2642 qWarning() << definition.filename << "line" << line << "invalid context in" << context.name; 2643 m_success = false; 2644 } 2645 } else { 2646 while (name.startsWith(QStringLiteral("#pop"))) { 2647 name = name.mid(4); 2648 ++contextName.popCount; 2649 } 2650 2651 if (contextName.popCount && !name.isEmpty()) { 2652 if (name.startsWith(QLatin1Char('!')) && name.size() > 1) { 2653 name = name.mid(1); 2654 } else { 2655 qWarning() << definition.filename << "line" << line << "'!' missing between '#pop' and context name" << context.name; 2656 m_success = false; 2657 } 2658 } 2659 2660 if (!name.isEmpty()) { 2661 const int idx = name.indexOf(QStringLiteral("##")); 2662 if (idx == -1) { 2663 auto it = definition.contexts.find(name.toString()); 2664 if (it != definition.contexts.end()) { 2665 contextName.context = &*it; 2666 } 2667 } else { 2668 auto defName = name.mid(idx + 2); 2669 auto it = m_definitions.find(defName.toString()); 2670 if (it != m_definitions.end()) { 2671 auto listName = name.left(idx).toString(); 2672 definition.referencedDefinitions.insert(&*it); 2673 auto ctxIt = it->contexts.find(listName.isEmpty() ? it->firstContextName : listName); 2674 if (ctxIt != it->contexts.end()) { 2675 contextName.context = &*ctxIt; 2676 } 2677 } else { 2678 qWarning() << definition.filename << "line" << line << "unknown definition in" << context.name; 2679 m_success = false; 2680 } 2681 } 2682 2683 if (!contextName.context) { 2684 qWarning() << definition.filename << "line" << line << "unknown context" << name << "in" << context.name; 2685 m_success = false; 2686 } 2687 } 2688 } 2689 } 2690 2691 QMap<QString, Definition> m_definitions; 2692 Definition *m_currentDefinition = nullptr; 2693 Keywords *m_currentKeywords = nullptr; 2694 Context *m_currentContext = nullptr; 2695 bool m_success = true; 2696 }; 2697 2698 namespace 2699 { 2700 QStringList readListing(const QString &fileName) 2701 { 2702 QFile file(fileName); 2703 if (!file.open(QIODevice::ReadOnly)) { 2704 return QStringList(); 2705 } 2706 2707 QXmlStreamReader xml(&file); 2708 QStringList listing; 2709 while (!xml.atEnd()) { 2710 xml.readNext(); 2711 2712 // add only .xml files, no .json or stuff 2713 if (xml.isCharacters() && xml.text().contains(QLatin1String(".xml"))) { 2714 listing.append(xml.text().toString()); 2715 } 2716 } 2717 2718 if (xml.hasError()) { 2719 qWarning() << "XML error while reading" << fileName << " - " << qPrintable(xml.errorString()) << "@ offset" << xml.characterOffset(); 2720 listing.clear(); 2721 } 2722 2723 return listing; 2724 } 2725 2726 /** 2727 * check if the "extensions" attribute have valid wildcards 2728 * @param extensions extensions string to check 2729 * @return valid? 2730 */ 2731 bool checkExtensions(QStringView extensions) 2732 { 2733 // get list of extensions 2734 const QList<QStringView> extensionParts = extensions.split(QLatin1Char(';'), Qt::SkipEmptyParts); 2735 2736 // ok if empty 2737 if (extensionParts.isEmpty()) { 2738 return true; 2739 } 2740 2741 // check that only valid wildcard things are inside the parts 2742 for (const auto &extension : extensionParts) { 2743 for (const auto c : extension) { 2744 // eat normal things 2745 if (c.isDigit() || c.isLetter()) { 2746 continue; 2747 } 2748 2749 // allow some special characters 2750 if (c == QLatin1Char('.') || c == QLatin1Char('-') || c == QLatin1Char('_') || c == QLatin1Char('+')) { 2751 continue; 2752 } 2753 2754 // only allowed wildcard things: '?' and '*' 2755 if (c == QLatin1Char('?') || c == QLatin1Char('*')) { 2756 continue; 2757 } 2758 2759 qWarning() << "invalid character" << c << "seen in extensions wildcard"; 2760 return false; 2761 } 2762 } 2763 2764 // all checks passed 2765 return true; 2766 } 2767 2768 } 2769 2770 int main(int argc, char *argv[]) 2771 { 2772 // get app instance 2773 QCoreApplication app(argc, argv); 2774 2775 // ensure enough arguments are passed 2776 if (app.arguments().size() < 3) { 2777 return 1; 2778 } 2779 2780 #ifdef HAS_XERCESC 2781 // care for proper init and cleanup 2782 XMLPlatformUtils::Initialize(); 2783 auto cleanup = qScopeGuard(XMLPlatformUtils::Terminate); 2784 2785 /* 2786 * parse XSD first time and cache it 2787 */ 2788 XMLGrammarPoolImpl xsd(XMLPlatformUtils::fgMemoryManager); 2789 2790 // create parser for the XSD 2791 SAX2XMLReaderImpl parser(XMLPlatformUtils::fgMemoryManager, &xsd); 2792 init_parser(parser); 2793 QString messages; 2794 CustomErrorHandler eh(&messages); 2795 parser.setErrorHandler(&eh); 2796 2797 // load grammar into the pool, on error just abort 2798 const auto xsdFile = app.arguments().at(2); 2799 if (!parser.loadGrammar((const char16_t *)xsdFile.utf16(), Grammar::SchemaGrammarType, true) || eh.failed()) { 2800 qWarning("Failed to parse XSD %s: %s", qPrintable(xsdFile), qPrintable(messages)); 2801 return 2; 2802 } 2803 2804 // lock the pool, no later modifications wanted! 2805 xsd.lockPool(); 2806 #endif 2807 2808 const QString hlFilenamesListing = app.arguments().value(3); 2809 if (hlFilenamesListing.isEmpty()) { 2810 return 1; 2811 } 2812 2813 QStringList hlFilenames = readListing(hlFilenamesListing); 2814 if (hlFilenames.isEmpty()) { 2815 qWarning("Failed to read %s", qPrintable(hlFilenamesListing)); 2816 return 3; 2817 } 2818 2819 // text attributes 2820 const QStringList textAttributes = QStringList() << QStringLiteral("name") << QStringLiteral("section") << QStringLiteral("mimetype") 2821 << QStringLiteral("extensions") << QStringLiteral("style") << QStringLiteral("author") 2822 << QStringLiteral("license") << QStringLiteral("indenter"); 2823 2824 // index all given highlightings 2825 HlFilesChecker filesChecker; 2826 QVariantMap hls; 2827 int anyError = 0; 2828 for (const QString &hlFilename : std::as_const(hlFilenames)) { 2829 QFile hlFile(hlFilename); 2830 if (!hlFile.open(QIODevice::ReadOnly)) { 2831 qWarning("Failed to open %s", qPrintable(hlFilename)); 2832 anyError = 3; 2833 continue; 2834 } 2835 2836 #ifdef HAS_XERCESC 2837 // create parser 2838 SAX2XMLReaderImpl parser(XMLPlatformUtils::fgMemoryManager, &xsd); 2839 init_parser(parser); 2840 QString messages; 2841 CustomErrorHandler eh(&messages); 2842 parser.setErrorHandler(&eh); 2843 2844 // parse the XML file 2845 parser.parse((const char16_t *)hlFile.fileName().utf16()); 2846 2847 // report issues 2848 if (eh.failed()) { 2849 qWarning("Failed to validate XML %s: %s", qPrintable(hlFile.fileName()), qPrintable(messages)); 2850 anyError = 4; 2851 continue; 2852 } 2853 #endif 2854 2855 // read the needed attributes from toplevel language tag 2856 hlFile.reset(); 2857 QXmlStreamReader xml(&hlFile); 2858 if (xml.readNextStartElement()) { 2859 if (xml.name() != QLatin1String("language")) { 2860 anyError = 5; 2861 continue; 2862 } 2863 } else { 2864 anyError = 6; 2865 continue; 2866 } 2867 2868 // map to store hl info 2869 QVariantMap hl; 2870 2871 // transfer text attributes 2872 for (const QString &attribute : std::as_const(textAttributes)) { 2873 hl[attribute] = xml.attributes().value(attribute).toString(); 2874 } 2875 2876 // check if extensions have the right format 2877 if (!checkExtensions(hl[QStringLiteral("extensions")].toString())) { 2878 qWarning() << hlFilename << "'extensions' wildcards invalid:" << hl[QStringLiteral("extensions")].toString(); 2879 anyError = 23; 2880 } 2881 2882 // numerical attributes 2883 hl[QStringLiteral("version")] = xml.attributes().value(QLatin1String("version")).toInt(); 2884 hl[QStringLiteral("priority")] = xml.attributes().value(QLatin1String("priority")).toInt(); 2885 2886 // add boolean one 2887 hl[QStringLiteral("hidden")] = attrToBool(xml.attributes().value(QLatin1String("hidden"))); 2888 2889 // keep some strings as UTF-8 for faster translations 2890 hl[QStringLiteral("nameUtf8")] = hl[QStringLiteral("name")].toString().toUtf8(); 2891 hl[QStringLiteral("sectionUtf8")] = hl[QStringLiteral("section")].toString().toUtf8(); 2892 2893 // remember hl 2894 hls[QFileInfo(hlFile).fileName()] = hl; 2895 2896 const QString hlName = hl[QStringLiteral("name")].toString(); 2897 2898 filesChecker.setDefinition(xml.attributes().value(QStringLiteral("kateversion")), hlFilename, hlName); 2899 2900 // scan for broken regex or keywords with spaces 2901 while (!xml.atEnd()) { 2902 xml.readNext(); 2903 filesChecker.processElement(xml); 2904 } 2905 2906 if (xml.hasError()) { 2907 anyError = 33; 2908 qWarning() << hlFilename << "-" << xml.errorString() << "@ offset" << xml.characterOffset(); 2909 } 2910 } 2911 2912 filesChecker.resolveContexts(); 2913 2914 if (!filesChecker.check()) { 2915 anyError = 7; 2916 } 2917 2918 // bail out if any problem was seen 2919 if (anyError) { 2920 return anyError; 2921 } 2922 2923 // create outfile, after all has worked! 2924 QFile outFile(app.arguments().at(1)); 2925 if (!outFile.open(QIODevice::WriteOnly | QIODevice::Truncate)) { 2926 return 9; 2927 } 2928 2929 // write out json 2930 outFile.write(QCborValue::fromVariant(QVariant(hls)).toCbor()); 2931 2932 // be done 2933 return 0; 2934 }