File indexing completed on 2024-05-12 15:50:01
0001 /* 0002 SPDX-FileCopyrightText: 2014 Christoph Cullmann <cullmann@kde.org> 0003 SPDX-FileCopyrightText: 2020 Jonathan Poelen <jonathan.poelen@gmail.com> 0004 0005 SPDX-License-Identifier: MIT 0006 */ 0007 0008 #include <QCborValue> 0009 #include <QCoreApplication> 0010 #include <QDebug> 0011 #include <QFile> 0012 #include <QFileInfo> 0013 #include <QMutableMapIterator> 0014 #include <QRegularExpression> 0015 #include <QVariant> 0016 #include <QXmlStreamReader> 0017 0018 #ifdef QT_XMLPATTERNS_LIB 0019 #include <QXmlSchema> 0020 #include <QXmlSchemaValidator> 0021 #endif 0022 0023 #include "../lib/worddelimiters_p.h" 0024 #include "../lib/xml_p.h" 0025 0026 #include <array> 0027 0028 using KSyntaxHighlighting::WordDelimiters; 0029 using KSyntaxHighlighting::Xml::attrToBool; 0030 0031 class HlFilesChecker 0032 { 0033 public: 0034 template<typename T> 0035 void setDefinition(const T &verStr, const QString &filename, const QString &name) 0036 { 0037 m_currentDefinition = &*m_definitions.insert(name, Definition{}); 0038 m_currentDefinition->languageName = name; 0039 m_currentDefinition->filename = filename; 0040 m_currentDefinition->kateVersionStr = verStr.toString(); 0041 m_currentKeywords = nullptr; 0042 m_currentContext = nullptr; 0043 0044 const auto idx = verStr.indexOf(QLatin1Char('.')); 0045 if (idx <= 0) { 0046 qWarning() << filename << "invalid kateversion" << verStr; 0047 m_success = false; 0048 } else { 0049 m_currentDefinition->kateVersion = {verStr.left(idx).toInt(), verStr.mid(idx + 1).toInt()}; 0050 } 0051 } 0052 0053 void processElement(QXmlStreamReader &xml) 0054 { 0055 if (xml.isStartElement()) { 0056 if (m_currentContext) { 0057 m_currentContext->rules.push_back(Context::Rule{}); 0058 auto &rule = m_currentContext->rules.back(); 0059 m_success = rule.parseElement(m_currentDefinition->filename, xml) && m_success; 0060 m_currentContext->hasDynamicRule = m_currentContext->hasDynamicRule || rule.dynamic == XmlBool::True; 0061 } else if (m_currentKeywords) { 0062 m_success = m_currentKeywords->items.parseElement(m_currentDefinition->filename, xml) && m_success; 0063 } else if (xml.name() == QStringLiteral("context")) { 0064 processContextElement(xml); 0065 } else if (xml.name() == QStringLiteral("list")) { 0066 processListElement(xml); 0067 } else if (xml.name() == QStringLiteral("keywords")) { 0068 m_success = m_currentDefinition->parseKeywords(xml) && m_success; 0069 } else if (xml.name() == QStringLiteral("emptyLine")) { 0070 m_success = parseEmptyLine(m_currentDefinition->filename, xml) && m_success; 0071 } else if (xml.name() == QStringLiteral("itemData")) { 0072 m_success = m_currentDefinition->itemDatas.parseElement(m_currentDefinition->filename, xml) && m_success; 0073 } 0074 } else if (xml.isEndElement()) { 0075 if (m_currentContext && xml.name() == QStringLiteral("context")) { 0076 m_currentContext = nullptr; 0077 } else if (m_currentKeywords && xml.name() == QStringLiteral("list")) { 0078 m_currentKeywords = nullptr; 0079 } 0080 } 0081 } 0082 0083 //! Resolve context attribute and include tag 0084 void resolveContexts() 0085 { 0086 QMutableMapIterator<QString, Definition> def(m_definitions); 0087 while (def.hasNext()) { 0088 def.next(); 0089 auto &definition = def.value(); 0090 auto &contexts = definition.contexts; 0091 0092 if (contexts.isEmpty()) { 0093 qWarning() << definition.filename << "has no context"; 0094 m_success = false; 0095 continue; 0096 } 0097 0098 auto markAsUsedContext = [](ContextName &contextName) { 0099 if (!contextName.stay && contextName.context) { 0100 contextName.context->isOnlyIncluded = false; 0101 } 0102 }; 0103 0104 QMutableMapIterator<QString, Context> contextIt(contexts); 0105 while (contextIt.hasNext()) { 0106 contextIt.next(); 0107 auto &context = contextIt.value(); 0108 resolveContextName(definition, context, context.lineEndContext, context.line); 0109 resolveContextName(definition, context, context.lineEmptyContext, context.line); 0110 resolveContextName(definition, context, context.fallthroughContext, context.line); 0111 markAsUsedContext(context.lineEndContext); 0112 markAsUsedContext(context.lineEmptyContext); 0113 markAsUsedContext(context.fallthroughContext); 0114 for (auto &rule : context.rules) { 0115 rule.parentContext = &context; 0116 resolveContextName(definition, context, rule.context, rule.line); 0117 if (rule.type != Context::Rule::Type::IncludeRules) { 0118 markAsUsedContext(rule.context); 0119 } else if (rule.includeAttrib == XmlBool::True && rule.context.context) { 0120 rule.context.context->referencedWithIncludeAttrib = true; 0121 } 0122 } 0123 } 0124 0125 auto *firstContext = &*definition.contexts.find(definition.firstContextName); 0126 firstContext->isOnlyIncluded = false; 0127 definition.firstContext = firstContext; 0128 } 0129 0130 resolveIncludeRules(); 0131 } 0132 0133 bool check() const 0134 { 0135 bool success = m_success; 0136 0137 const auto usedContexts = extractUsedContexts(); 0138 0139 QMap<const Definition *, const Definition *> maxVersionByDefinitions; 0140 QMap<const Context::Rule *, IncludedRuleUnreachableBy> unreachableIncludedRules; 0141 0142 QMapIterator<QString, Definition> def(m_definitions); 0143 while (def.hasNext()) { 0144 def.next(); 0145 const auto &definition = def.value(); 0146 const auto &filename = definition.filename; 0147 0148 auto *maxDef = maxKateVersionDefinition(definition, maxVersionByDefinitions); 0149 if (maxDef != &definition) { 0150 qWarning() << definition.filename << "depends on a language" << maxDef->languageName << "in version" << maxDef->kateVersionStr 0151 << ". Please, increase kateversion."; 0152 success = false; 0153 } 0154 0155 QSet<const Keywords *> referencedKeywords; 0156 QSet<ItemDatas::Style> usedAttributeNames; 0157 QSet<ItemDatas::Style> ignoredAttributeNames; 0158 success = checkKeywordsList(definition, referencedKeywords) && success; 0159 success = 0160 checkContexts(definition, referencedKeywords, usedAttributeNames, ignoredAttributeNames, usedContexts, unreachableIncludedRules) && success; 0161 0162 // search for non-existing itemDatas. 0163 const auto invalidNames = usedAttributeNames - definition.itemDatas.styleNames; 0164 for (const auto &styleName : invalidNames) { 0165 qWarning() << filename << "line" << styleName.line << "reference of non-existing itemData attributes:" << styleName.name; 0166 success = false; 0167 } 0168 0169 // search for existing itemDatas, but unusable. 0170 const auto ignoredNames = ignoredAttributeNames - usedAttributeNames; 0171 for (const auto &styleName : ignoredNames) { 0172 qWarning() << filename << "line" << styleName.line << "attribute" << styleName.name 0173 << "is never used. All uses are with lookAhead=true or <IncludeRules/>"; 0174 success = false; 0175 } 0176 0177 // search for unused itemDatas. 0178 auto unusedNames = definition.itemDatas.styleNames - usedAttributeNames; 0179 unusedNames -= ignoredNames; 0180 for (const auto &styleName : std::as_const(unusedNames)) { 0181 qWarning() << filename << "line" << styleName.line << "unused itemData:" << styleName.name; 0182 success = false; 0183 } 0184 } 0185 0186 QMutableMapIterator<const Context::Rule *, IncludedRuleUnreachableBy> unreachableIncludedRuleIt(unreachableIncludedRules); 0187 while (unreachableIncludedRuleIt.hasNext()) { 0188 unreachableIncludedRuleIt.next(); 0189 IncludedRuleUnreachableBy &unreachableRulesBy = unreachableIncludedRuleIt.value(); 0190 if (unreachableRulesBy.alwaysUnreachable) { 0191 auto *rule = unreachableIncludedRuleIt.key(); 0192 0193 if (!rule->parentContext->isOnlyIncluded) { 0194 continue; 0195 } 0196 0197 // remove duplicates rules 0198 QSet<const Context::Rule *> rules; 0199 auto &unreachableBy = unreachableRulesBy.unreachableBy; 0200 unreachableBy.erase(std::remove_if(unreachableBy.begin(), 0201 unreachableBy.end(), 0202 [&](const RuleAndInclude &ruleAndInclude) { 0203 if (rules.contains(ruleAndInclude.rule)) { 0204 return true; 0205 } 0206 rules.insert(ruleAndInclude.rule); 0207 return false; 0208 }), 0209 unreachableBy.end()); 0210 0211 QString message; 0212 message.reserve(128); 0213 for (auto &ruleAndInclude : std::as_const(unreachableBy)) { 0214 message += QStringLiteral("line "); 0215 message += QString::number(ruleAndInclude.rule->line); 0216 message += QStringLiteral(" ["); 0217 message += ruleAndInclude.rule->parentContext->name; 0218 if (rule->filename != ruleAndInclude.rule->filename) { 0219 message += QStringLiteral(" ("); 0220 message += ruleAndInclude.rule->filename; 0221 message += QLatin1Char(')'); 0222 } 0223 if (ruleAndInclude.includeRules) { 0224 message += QStringLiteral(" via line "); 0225 message += QString::number(ruleAndInclude.includeRules->line); 0226 } 0227 message += QStringLiteral("], "); 0228 } 0229 message.chop(2); 0230 0231 qWarning() << rule->filename << "line" << rule->line << "no IncludeRule can reach this rule, hidden by" << message; 0232 success = false; 0233 } 0234 } 0235 0236 return success; 0237 } 0238 0239 private: 0240 enum class XmlBool { 0241 Unspecified, 0242 False, 0243 True, 0244 }; 0245 0246 struct Context; 0247 0248 struct ContextName { 0249 QString name; 0250 int popCount = 0; 0251 bool stay = false; 0252 0253 Context *context = nullptr; 0254 }; 0255 0256 struct Parser { 0257 const QString &filename; 0258 QXmlStreamReader &xml; 0259 QXmlStreamAttribute &attr; 0260 bool success; 0261 0262 //! Read a string type attribute, \c success = \c false when \p str is not empty 0263 //! \return \c true when attr.name() == attrName, otherwise false 0264 bool extractString(QString &str, const QString &attrName) 0265 { 0266 if (attr.name() != attrName) { 0267 return false; 0268 } 0269 0270 str = attr.value().toString(); 0271 if (str.isEmpty()) { 0272 qWarning() << filename << "line" << xml.lineNumber() << attrName << "attribute is empty"; 0273 success = false; 0274 } 0275 0276 return true; 0277 } 0278 0279 //! Read a bool type attribute, \c success = \c false when \p xmlBool is not \c XmlBool::Unspecified. 0280 //! \return \c true when attr.name() == attrName, otherwise false 0281 bool extractXmlBool(XmlBool &xmlBool, const QString &attrName) 0282 { 0283 if (attr.name() != attrName) { 0284 return false; 0285 } 0286 0287 xmlBool = attr.value().isNull() ? XmlBool::Unspecified : attrToBool(attr.value()) ? XmlBool::True : XmlBool::False; 0288 0289 return true; 0290 } 0291 0292 //! Read a positive integer type attribute, \c success = \c false when \p positive is already greater than or equal to 0 0293 //! \return \c true when attr.name() == attrName, otherwise false 0294 bool extractPositive(int &positive, const QString &attrName) 0295 { 0296 if (attr.name() != attrName) { 0297 return false; 0298 } 0299 0300 bool ok = true; 0301 positive = attr.value().toInt(&ok); 0302 0303 if (!ok || positive < 0) { 0304 qWarning() << filename << "line" << xml.lineNumber() << attrName << "should be a positive integer:" << attr.value(); 0305 success = false; 0306 } 0307 0308 return true; 0309 } 0310 0311 //! Read a color, \c success = \c false when \p color is already greater than or equal to 0 0312 //! \return \c true when attr.name() == attrName, otherwise false 0313 bool checkColor(const QString &attrName) 0314 { 0315 if (attr.name() != attrName) { 0316 return false; 0317 } 0318 0319 const auto value = attr.value(); 0320 if (value.isEmpty() /*|| QColor(value).isValid()*/) { 0321 qWarning() << filename << "line" << xml.lineNumber() << attrName << "should be a color:" << value; 0322 success = false; 0323 } 0324 0325 return true; 0326 } 0327 0328 //! Read a QChar, \c success = \c false when \p c is not \c '\0' or does not have one char 0329 //! \return \c true when attr.name() == attrName, otherwise false 0330 bool extractChar(QChar &c, const QString &attrName) 0331 { 0332 if (attr.name() != attrName) { 0333 return false; 0334 } 0335 0336 if (attr.value().size() == 1) { 0337 c = attr.value()[0]; 0338 } else { 0339 c = QLatin1Char('_'); 0340 qWarning() << filename << "line" << xml.lineNumber() << attrName << "must contain exactly one char:" << attr.value(); 0341 success = false; 0342 } 0343 0344 return true; 0345 } 0346 0347 //! \return parsing status when \p isExtracted is \c true, otherwise \c false 0348 bool checkIfExtracted(bool isExtracted) 0349 { 0350 if (isExtracted) { 0351 return success; 0352 } 0353 0354 qWarning() << filename << "line" << xml.lineNumber() << "unknown attribute:" << attr.name(); 0355 return false; 0356 } 0357 }; 0358 0359 struct Keywords { 0360 struct Items { 0361 struct Item { 0362 QString content; 0363 int line; 0364 0365 friend uint qHash(const Item &item, uint seed = 0) 0366 { 0367 return qHash(item.content, seed); 0368 } 0369 0370 friend bool operator==(const Item &item0, const Item &item1) 0371 { 0372 return item0.content == item1.content; 0373 } 0374 }; 0375 0376 QVector<Item> keywords; 0377 QSet<Item> includes; 0378 0379 bool parseElement(const QString &filename, QXmlStreamReader &xml) 0380 { 0381 bool success = true; 0382 0383 const int line = xml.lineNumber(); 0384 QString content = xml.readElementText(); 0385 0386 if (content.isEmpty()) { 0387 qWarning() << filename << "line" << line << "is empty:" << xml.name(); 0388 success = false; 0389 } 0390 0391 if (xml.name() == QStringLiteral("include")) { 0392 includes.insert({content, line}); 0393 } else if (xml.name() == QStringLiteral("item")) { 0394 keywords.append({content, line}); 0395 } else { 0396 qWarning() << filename << "line" << line << "invalid element:" << xml.name(); 0397 success = false; 0398 } 0399 0400 return success; 0401 } 0402 }; 0403 0404 QString name; 0405 Items items; 0406 int line; 0407 0408 bool parseElement(const QString &filename, QXmlStreamReader &xml) 0409 { 0410 line = xml.lineNumber(); 0411 0412 bool success = true; 0413 for (auto &attr : xml.attributes()) { 0414 Parser parser{filename, xml, attr, success}; 0415 0416 const bool isExtracted = parser.extractString(name, QStringLiteral("name")); 0417 0418 success = parser.checkIfExtracted(isExtracted); 0419 } 0420 return success; 0421 } 0422 }; 0423 0424 struct Context { 0425 struct Rule { 0426 enum class Type { 0427 Unknown, 0428 AnyChar, 0429 Detect2Chars, 0430 DetectChar, 0431 DetectIdentifier, 0432 DetectSpaces, 0433 Float, 0434 HlCChar, 0435 HlCHex, 0436 HlCOct, 0437 HlCStringChar, 0438 IncludeRules, 0439 Int, 0440 LineContinue, 0441 RangeDetect, 0442 RegExpr, 0443 StringDetect, 0444 WordDetect, 0445 keyword, 0446 }; 0447 0448 Type type{}; 0449 0450 bool isDotRegex = false; 0451 int line = -1; 0452 0453 // commonAttributes 0454 QString attribute; 0455 ContextName context; 0456 QString beginRegion; 0457 QString endRegion; 0458 int column = -1; 0459 XmlBool lookAhead{}; 0460 XmlBool firstNonSpace{}; 0461 0462 // StringDetect, WordDetect, keyword 0463 XmlBool insensitive{}; 0464 0465 // DetectChar, StringDetect, RegExpr, keyword 0466 XmlBool dynamic{}; 0467 0468 // Regex 0469 XmlBool minimal{}; 0470 0471 // IncludeRule 0472 XmlBool includeAttrib{}; 0473 0474 // DetectChar, Detect2Chars, LineContinue, RangeDetect 0475 QChar char0; 0476 // Detect2Chars, RangeDetect 0477 QChar char1; 0478 0479 // AnyChar, DetectChar, StringDetect, RegExpr, WordDetect, keyword 0480 QString string; 0481 // RegExpr without .* as suffix 0482 QString sanitizedString; 0483 0484 // Float, HlCHex, HlCOct, Int, WordDetect, keyword 0485 QString additionalDeliminator; 0486 QString weakDeliminator; 0487 0488 // rules included by IncludeRules (without IncludeRule) 0489 QVector<const Rule *> includedRules; 0490 0491 // IncludeRules included by IncludeRules 0492 QSet<const Rule *> includedIncludeRules; 0493 0494 Context const *parentContext = nullptr; 0495 0496 QString filename; 0497 0498 bool parseElement(const QString &filename, QXmlStreamReader &xml) 0499 { 0500 this->filename = filename; 0501 line = xml.lineNumber(); 0502 0503 using Pair = QPair<QString, Type>; 0504 static const auto pairs = { 0505 Pair{QStringLiteral("AnyChar"), Type::AnyChar}, 0506 Pair{QStringLiteral("Detect2Chars"), Type::Detect2Chars}, 0507 Pair{QStringLiteral("DetectChar"), Type::DetectChar}, 0508 Pair{QStringLiteral("DetectIdentifier"), Type::DetectIdentifier}, 0509 Pair{QStringLiteral("DetectSpaces"), Type::DetectSpaces}, 0510 Pair{QStringLiteral("Float"), Type::Float}, 0511 Pair{QStringLiteral("HlCChar"), Type::HlCChar}, 0512 Pair{QStringLiteral("HlCHex"), Type::HlCHex}, 0513 Pair{QStringLiteral("HlCOct"), Type::HlCOct}, 0514 Pair{QStringLiteral("HlCStringChar"), Type::HlCStringChar}, 0515 Pair{QStringLiteral("IncludeRules"), Type::IncludeRules}, 0516 Pair{QStringLiteral("Int"), Type::Int}, 0517 Pair{QStringLiteral("LineContinue"), Type::LineContinue}, 0518 Pair{QStringLiteral("RangeDetect"), Type::RangeDetect}, 0519 Pair{QStringLiteral("RegExpr"), Type::RegExpr}, 0520 Pair{QStringLiteral("StringDetect"), Type::StringDetect}, 0521 Pair{QStringLiteral("WordDetect"), Type::WordDetect}, 0522 Pair{QStringLiteral("keyword"), Type::keyword}, 0523 }; 0524 0525 for (auto pair : pairs) { 0526 if (xml.name() == pair.first) { 0527 type = pair.second; 0528 bool success = parseAttributes(filename, xml); 0529 success = checkMandoryAttributes(filename, xml) && success; 0530 if (success && type == Type::RegExpr) { 0531 // ., (.) followed by *, +, {1} or nothing 0532 static const QRegularExpression isDot(QStringLiteral(R"(^\(?\.(?:[*+][*+?]?|[*+]|\{1\})?\$?$)")); 0533 // remove "(?:" and ")" 0534 static const QRegularExpression removeParentheses(QStringLiteral(R"(\((?:\?:)?|\))")); 0535 // remove parentheses on a copy of string 0536 auto reg = QString(string).replace(removeParentheses, QString()); 0537 isDotRegex = reg.contains(isDot); 0538 0539 // Remove .* and .*$ suffix. 0540 static const QRegularExpression allSuffix(QStringLiteral("(?<!\\\\)[.][*][?+]?[$]?$")); 0541 sanitizedString = string; 0542 sanitizedString.replace(allSuffix, QString()); 0543 // string is a catch-all, do not sanitize 0544 if (sanitizedString.isEmpty() || sanitizedString == QStringLiteral("^")) { 0545 sanitizedString = string; 0546 } 0547 } 0548 return success; 0549 } 0550 } 0551 0552 qWarning() << filename << "line" << xml.lineNumber() << "unknown element:" << xml.name(); 0553 return false; 0554 } 0555 0556 private: 0557 bool parseAttributes(const QString &filename, QXmlStreamReader &xml) 0558 { 0559 bool success = true; 0560 0561 for (auto &attr : xml.attributes()) { 0562 Parser parser{filename, xml, attr, success}; 0563 0564 // clang-format off 0565 const bool isExtracted 0566 = parser.extractString(attribute, QStringLiteral("attribute")) 0567 || parser.extractString(context.name, QStringLiteral("context")) 0568 || parser.extractXmlBool(lookAhead, QStringLiteral("lookAhead")) 0569 || parser.extractXmlBool(firstNonSpace, QStringLiteral("firstNonSpace")) 0570 || parser.extractString(beginRegion, QStringLiteral("beginRegion")) 0571 || parser.extractString(endRegion, QStringLiteral("endRegion")) 0572 || parser.extractPositive(column, QStringLiteral("column")) 0573 || ((type == Type::RegExpr 0574 || type == Type::StringDetect 0575 || type == Type::WordDetect 0576 || type == Type::keyword 0577 ) && parser.extractXmlBool(insensitive, QStringLiteral("insensitive"))) 0578 || ((type == Type::DetectChar 0579 || type == Type::RegExpr 0580 || type == Type::StringDetect 0581 || type == Type::keyword 0582 ) && parser.extractXmlBool(dynamic, QStringLiteral("dynamic"))) 0583 || ((type == Type::RegExpr) 0584 && parser.extractXmlBool(minimal, QStringLiteral("minimal"))) 0585 || ((type == Type::DetectChar 0586 || type == Type::Detect2Chars 0587 || type == Type::LineContinue 0588 || type == Type::RangeDetect 0589 ) && parser.extractChar(char0, QStringLiteral("char"))) 0590 || ((type == Type::Detect2Chars 0591 || type == Type::RangeDetect 0592 ) && parser.extractChar(char1, QStringLiteral("char1"))) 0593 || ((type == Type::AnyChar 0594 || type == Type::RegExpr 0595 || type == Type::StringDetect 0596 || type == Type::WordDetect 0597 || type == Type::keyword 0598 ) && parser.extractString(string, QStringLiteral("String"))) 0599 || ((type == Type::IncludeRules) 0600 && parser.extractXmlBool(includeAttrib, QStringLiteral("includeAttrib"))) 0601 || ((type == Type::Float 0602 || type == Type::HlCHex 0603 || type == Type::HlCOct 0604 || type == Type::Int 0605 || type == Type::keyword 0606 || type == Type::WordDetect 0607 ) && (parser.extractString(additionalDeliminator, QStringLiteral("additionalDeliminator")) 0608 || parser.extractString(weakDeliminator, QStringLiteral("weakDeliminator")))) 0609 ; 0610 // clang-format on 0611 0612 success = parser.checkIfExtracted(isExtracted); 0613 0614 if (type == Type::LineContinue && char0 == QLatin1Char('\0')) { 0615 char0 = QLatin1Char('\\'); 0616 } 0617 } 0618 0619 return success; 0620 } 0621 0622 bool checkMandoryAttributes(const QString &filename, QXmlStreamReader &xml) 0623 { 0624 QString missingAttr; 0625 0626 switch (type) { 0627 case Type::Unknown: 0628 return false; 0629 0630 case Type::AnyChar: 0631 case Type::RegExpr: 0632 case Type::StringDetect: 0633 case Type::WordDetect: 0634 case Type::keyword: 0635 missingAttr = string.isEmpty() ? QStringLiteral("String") : QString(); 0636 break; 0637 0638 case Type::DetectChar: 0639 missingAttr = !char0.unicode() ? QStringLiteral("char") : QString(); 0640 break; 0641 0642 case Type::Detect2Chars: 0643 case Type::RangeDetect: 0644 missingAttr = !char0.unicode() && !char1.unicode() ? QStringLiteral("char and char1") 0645 : !char0.unicode() ? QStringLiteral("char") 0646 : !char1.unicode() ? QStringLiteral("char1") 0647 : QString(); 0648 break; 0649 0650 case Type::IncludeRules: 0651 missingAttr = context.name.isEmpty() ? QStringLiteral("context") : QString(); 0652 break; 0653 0654 case Type::DetectIdentifier: 0655 case Type::DetectSpaces: 0656 case Type::Float: 0657 case Type::HlCChar: 0658 case Type::HlCHex: 0659 case Type::HlCOct: 0660 case Type::HlCStringChar: 0661 case Type::Int: 0662 case Type::LineContinue: 0663 break; 0664 } 0665 0666 if (!missingAttr.isEmpty()) { 0667 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute:" << missingAttr; 0668 return false; 0669 } 0670 0671 return true; 0672 } 0673 }; 0674 0675 int line; 0676 // becomes false when a context (except includeRule) refers to it 0677 bool isOnlyIncluded = true; 0678 // becomes true when an includedRule refers to it with includeAttrib=true 0679 bool referencedWithIncludeAttrib = false; 0680 bool hasDynamicRule = false; 0681 QString name; 0682 QString attribute; 0683 ContextName lineEndContext; 0684 ContextName lineEmptyContext; 0685 ContextName fallthroughContext; 0686 QVector<Rule> rules; 0687 XmlBool dynamic{}; 0688 XmlBool fallthrough{}; 0689 0690 bool parseElement(const QString &filename, QXmlStreamReader &xml) 0691 { 0692 line = xml.lineNumber(); 0693 0694 bool success = true; 0695 0696 for (auto &attr : xml.attributes()) { 0697 Parser parser{filename, xml, attr, success}; 0698 XmlBool noIndentationBasedFolding{}; 0699 0700 const bool isExtracted = parser.extractString(name, QStringLiteral("name")) || parser.extractString(attribute, QStringLiteral("attribute")) 0701 || parser.extractString(lineEndContext.name, QStringLiteral("lineEndContext")) 0702 || parser.extractString(lineEmptyContext.name, QStringLiteral("lineEmptyContext")) 0703 || parser.extractString(fallthroughContext.name, QStringLiteral("fallthroughContext")) 0704 || parser.extractXmlBool(dynamic, QStringLiteral("dynamic")) || parser.extractXmlBool(fallthrough, QStringLiteral("fallthrough")) 0705 || parser.extractXmlBool(noIndentationBasedFolding, QStringLiteral("noIndentationBasedFolding")); 0706 0707 success = parser.checkIfExtracted(isExtracted); 0708 } 0709 0710 if (name.isEmpty()) { 0711 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: name"; 0712 success = false; 0713 } 0714 0715 if (attribute.isEmpty()) { 0716 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: attribute"; 0717 success = false; 0718 } 0719 0720 if (lineEndContext.name.isEmpty()) { 0721 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: lineEndContext"; 0722 success = false; 0723 } 0724 0725 return success; 0726 } 0727 }; 0728 0729 struct Version { 0730 int majorRevision; 0731 int minorRevision; 0732 0733 Version(int majorRevision = 0, int minorRevision = 0) 0734 : majorRevision(majorRevision) 0735 , minorRevision(minorRevision) 0736 { 0737 } 0738 0739 bool operator<(const Version &version) const 0740 { 0741 return majorRevision < version.majorRevision || (majorRevision == version.majorRevision && minorRevision < version.minorRevision); 0742 } 0743 }; 0744 0745 struct ItemDatas { 0746 struct Style { 0747 QString name; 0748 int line; 0749 0750 friend uint qHash(const Style &style, uint seed = 0) 0751 { 0752 return qHash(style.name, seed); 0753 } 0754 0755 friend bool operator==(const Style &style0, const Style &style1) 0756 { 0757 return style0.name == style1.name; 0758 } 0759 }; 0760 0761 QSet<Style> styleNames; 0762 0763 bool parseElement(const QString &filename, QXmlStreamReader &xml) 0764 { 0765 bool success = true; 0766 0767 QString name; 0768 QString defStyleNum; 0769 XmlBool boolean; 0770 0771 for (auto &attr : xml.attributes()) { 0772 Parser parser{filename, xml, attr, success}; 0773 0774 const bool isExtracted = parser.extractString(name, QStringLiteral("name")) || parser.extractString(defStyleNum, QStringLiteral("defStyleNum")) 0775 || parser.extractXmlBool(boolean, QStringLiteral("bold")) || parser.extractXmlBool(boolean, QStringLiteral("italic")) 0776 || parser.extractXmlBool(boolean, QStringLiteral("underline")) || parser.extractXmlBool(boolean, QStringLiteral("strikeOut")) 0777 || parser.extractXmlBool(boolean, QStringLiteral("spellChecking")) || parser.checkColor(QStringLiteral("color")) 0778 || parser.checkColor(QStringLiteral("selColor")) || parser.checkColor(QStringLiteral("backgroundColor")) 0779 || parser.checkColor(QStringLiteral("selBackgroundColor")); 0780 0781 success = parser.checkIfExtracted(isExtracted); 0782 } 0783 0784 if (!name.isEmpty()) { 0785 const auto len = styleNames.size(); 0786 styleNames.insert({name, int(xml.lineNumber())}); 0787 if (len == styleNames.size()) { 0788 qWarning() << filename << "line" << xml.lineNumber() << "itemData duplicate:" << name; 0789 success = false; 0790 } 0791 } 0792 0793 return success; 0794 } 0795 }; 0796 0797 struct Definition { 0798 QMap<QString, Keywords> keywordsList; 0799 QMap<QString, Context> contexts; 0800 ItemDatas itemDatas; 0801 QString firstContextName; 0802 const Context *firstContext = nullptr; 0803 QString filename; 0804 WordDelimiters wordDelimiters; 0805 XmlBool casesensitive{}; 0806 Version kateVersion{}; 0807 QString kateVersionStr; 0808 QString languageName; 0809 QSet<const Definition *> referencedDefinitions; 0810 0811 // Parse <keywords ...> 0812 bool parseKeywords(QXmlStreamReader &xml) 0813 { 0814 wordDelimiters.append(xml.attributes().value(QStringLiteral("additionalDeliminator"))); 0815 wordDelimiters.remove(xml.attributes().value(QStringLiteral("weakDeliminator"))); 0816 return true; 0817 } 0818 }; 0819 0820 // Parse <context> 0821 void processContextElement(QXmlStreamReader &xml) 0822 { 0823 Context context; 0824 m_success = context.parseElement(m_currentDefinition->filename, xml) && m_success; 0825 if (m_currentDefinition->firstContextName.isEmpty()) { 0826 m_currentDefinition->firstContextName = context.name; 0827 } 0828 if (m_currentDefinition->contexts.contains(context.name)) { 0829 qWarning() << m_currentDefinition->filename << "line" << xml.lineNumber() << "duplicate context:" << context.name; 0830 m_success = false; 0831 } 0832 m_currentContext = &*m_currentDefinition->contexts.insert(context.name, context); 0833 } 0834 0835 // Parse <list name="..."> 0836 void processListElement(QXmlStreamReader &xml) 0837 { 0838 Keywords keywords; 0839 m_success = keywords.parseElement(m_currentDefinition->filename, xml) && m_success; 0840 if (m_currentDefinition->keywordsList.contains(keywords.name)) { 0841 qWarning() << m_currentDefinition->filename << "line" << xml.lineNumber() << "duplicate list:" << keywords.name; 0842 m_success = false; 0843 } 0844 m_currentKeywords = &*m_currentDefinition->keywordsList.insert(keywords.name, keywords); 0845 } 0846 0847 const Definition *maxKateVersionDefinition(const Definition &definition, QMap<const Definition *, const Definition *> &maxVersionByDefinitions) const 0848 { 0849 auto it = maxVersionByDefinitions.find(&definition); 0850 if (it != maxVersionByDefinitions.end()) { 0851 return it.value(); 0852 } else { 0853 auto it = maxVersionByDefinitions.insert(&definition, &definition); 0854 for (const auto &referencedDef : definition.referencedDefinitions) { 0855 auto *maxDef = maxKateVersionDefinition(*referencedDef, maxVersionByDefinitions); 0856 if (it.value()->kateVersion < maxDef->kateVersion) { 0857 it.value() = maxDef; 0858 } 0859 } 0860 return it.value(); 0861 } 0862 } 0863 0864 // Initialize the referenced rules (Rule::includedRules) 0865 void resolveIncludeRules() 0866 { 0867 QSet<const Context *> usedContexts; 0868 QVector<const Context *> contexts; 0869 0870 QMutableMapIterator<QString, Definition> def(m_definitions); 0871 while (def.hasNext()) { 0872 def.next(); 0873 auto &definition = def.value(); 0874 QMutableMapIterator<QString, Context> contextIt(definition.contexts); 0875 while (contextIt.hasNext()) { 0876 contextIt.next(); 0877 auto ¤tContext = contextIt.value(); 0878 for (auto &rule : currentContext.rules) { 0879 if (rule.type != Context::Rule::Type::IncludeRules) { 0880 continue; 0881 } 0882 0883 if (rule.context.stay) { 0884 qWarning() << definition.filename << "line" << rule.line << "IncludeRules refers to himself"; 0885 m_success = false; 0886 continue; 0887 } 0888 0889 if (rule.context.popCount) { 0890 qWarning() << definition.filename << "line" << rule.line << "IncludeRules with #pop prefix"; 0891 m_success = false; 0892 } 0893 0894 if (!rule.context.context) { 0895 m_success = false; 0896 continue; 0897 } 0898 0899 // resolve includedRules and includedIncludeRules 0900 0901 usedContexts.clear(); 0902 usedContexts.insert(rule.context.context); 0903 contexts.clear(); 0904 contexts.append(rule.context.context); 0905 0906 for (int i = 0; i < contexts.size(); ++i) { 0907 currentContext.hasDynamicRule = contexts[i]->hasDynamicRule; 0908 for (const auto &includedRule : contexts[i]->rules) { 0909 if (includedRule.type != Context::Rule::Type::IncludeRules) { 0910 rule.includedRules.append(&includedRule); 0911 } else if (&rule == &includedRule) { 0912 qWarning() << definition.filename << "line" << rule.line << "IncludeRules refers to himself by recursivity"; 0913 m_success = false; 0914 } else { 0915 rule.includedIncludeRules.insert(&includedRule); 0916 0917 if (includedRule.includedRules.isEmpty()) { 0918 const auto *context = includedRule.context.context; 0919 if (context && !usedContexts.contains(context)) { 0920 contexts.append(context); 0921 usedContexts.insert(context); 0922 } 0923 } else { 0924 rule.includedRules.append(includedRule.includedRules); 0925 } 0926 } 0927 } 0928 } 0929 } 0930 } 0931 } 0932 } 0933 0934 //! Recursively extracts the contexts used from the first context of the definitions. 0935 //! This method detects groups of contexts which are only used among themselves. 0936 QSet<const Context *> extractUsedContexts() const 0937 { 0938 QSet<const Context *> usedContexts; 0939 QVector<const Context *> contexts; 0940 0941 QMapIterator<QString, Definition> def(m_definitions); 0942 while (def.hasNext()) { 0943 def.next(); 0944 const auto &definition = def.value(); 0945 0946 if (definition.firstContext) { 0947 usedContexts.insert(definition.firstContext); 0948 contexts.clear(); 0949 contexts.append(definition.firstContext); 0950 0951 for (int i = 0; i < contexts.size(); ++i) { 0952 auto appendContext = [&](const Context *context) { 0953 if (context && !usedContexts.contains(context)) { 0954 contexts.append(context); 0955 usedContexts.insert(context); 0956 } 0957 }; 0958 0959 const auto *context = contexts[i]; 0960 appendContext(context->lineEndContext.context); 0961 appendContext(context->lineEmptyContext.context); 0962 appendContext(context->fallthroughContext.context); 0963 0964 for (auto &rule : context->rules) { 0965 appendContext(rule.context.context); 0966 } 0967 } 0968 } 0969 } 0970 0971 return usedContexts; 0972 } 0973 0974 struct RuleAndInclude { 0975 const Context::Rule *rule; 0976 const Context::Rule *includeRules; 0977 0978 explicit operator bool() const 0979 { 0980 return rule; 0981 } 0982 }; 0983 0984 struct IncludedRuleUnreachableBy { 0985 QVector<RuleAndInclude> unreachableBy; 0986 bool alwaysUnreachable = true; 0987 }; 0988 0989 //! Check contexts and rules 0990 bool checkContexts(const Definition &definition, 0991 QSet<const Keywords *> &referencedKeywords, 0992 QSet<ItemDatas::Style> &usedAttributeNames, 0993 QSet<ItemDatas::Style> &ignoredAttributeNames, 0994 const QSet<const Context *> &usedContexts, 0995 QMap<const Context::Rule *, IncludedRuleUnreachableBy> &unreachableIncludedRules) const 0996 { 0997 bool success = true; 0998 0999 QMapIterator<QString, Context> contextIt(definition.contexts); 1000 while (contextIt.hasNext()) { 1001 contextIt.next(); 1002 1003 const auto &context = contextIt.value(); 1004 const auto &filename = definition.filename; 1005 1006 if (!usedContexts.contains(&context)) { 1007 qWarning() << filename << "line" << context.line << "unused context:" << context.name; 1008 success = false; 1009 continue; 1010 } 1011 1012 if (context.name.startsWith(QStringLiteral("#pop"))) { 1013 qWarning() << filename << "line" << context.line << "the context name must not start with '#pop':" << context.name; 1014 success = false; 1015 } 1016 1017 if (!context.attribute.isEmpty() && (!context.isOnlyIncluded || context.referencedWithIncludeAttrib)) { 1018 usedAttributeNames.insert({context.attribute, context.line}); 1019 } 1020 1021 success = checkfallthrough(definition, context) && success; 1022 success = checkUreachableRules(definition.filename, context, unreachableIncludedRules) && success; 1023 success = suggestRuleMerger(definition.filename, context) && success; 1024 1025 for (const auto &rule : context.rules) { 1026 if (!rule.attribute.isEmpty()) { 1027 if (rule.lookAhead != XmlBool::True) { 1028 usedAttributeNames.insert({rule.attribute, rule.line}); 1029 } else { 1030 ignoredAttributeNames.insert({rule.attribute, rule.line}); 1031 } 1032 } 1033 success = checkLookAhead(rule) && success; 1034 success = checkStringDetect(rule) && success; 1035 success = checkKeyword(definition, rule, referencedKeywords) && success; 1036 success = checkRegExpr(filename, rule, context) && success; 1037 success = checkDelimiters(definition, rule) && success; 1038 } 1039 } 1040 1041 return success; 1042 } 1043 1044 //! Check that a regular expression in a RegExpr rule: 1045 //! - isValid() 1046 //! - character ranges such as [A-Z] are valid and not accidentally e.g. [A-z]. 1047 //! - dynamic=true but no place holder used? 1048 //! - is not . with lookAhead="1" 1049 //! - is not ^... without column ou firstNonSpace attribute 1050 //! - is not equivalent to DetectSpaces, DetectChar, Detect2Chars, StringDetect, DetectIdentifier, RangeDetect 1051 //! - has no unused captures 1052 //! - has no unnecessary quantifier with lookAhead 1053 bool checkRegExpr(const QString &filename, const Context::Rule &rule, const Context &context) const 1054 { 1055 if (rule.type == Context::Rule::Type::RegExpr) { 1056 const QRegularExpression regexp(rule.string); 1057 if (!checkRegularExpression(rule.filename, regexp, rule.line)) { 1058 return false; 1059 } 1060 1061 // dynamic == true and no place holder? 1062 if (rule.dynamic == XmlBool::True) { 1063 static const QRegularExpression placeHolder(QStringLiteral("%\\d+")); 1064 if (!rule.string.contains(placeHolder)) { 1065 qWarning() << rule.filename << "line" << rule.line << "broken regex:" << rule.string << "problem: dynamic=true but no %\\d+ placeholder"; 1066 return false; 1067 } 1068 } 1069 1070 auto reg = (rule.lookAhead == XmlBool::True) ? rule.sanitizedString : rule.string; 1071 if (rule.lookAhead == XmlBool::True) { 1072 static const QRegularExpression removeAllSuffix(QStringLiteral( 1073 R"(((?<!\\)\\(?:[DSWdsw]|x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\}|0\d\d|o\{[0-7]+\}|u[0-9a-fA-F]{4})|(?<!\\)[^])}\\]|(?=\\)\\\\)[*][?+]?$)")); 1074 reg.replace(removeAllSuffix, QString()); 1075 } 1076 1077 reg.replace(QStringLiteral("{1}"), QString()); 1078 1079 // is DetectSpaces 1080 // optional ^ then \s, [\s], [\t ], [ \t] possibly in (...) or (?:...) followed by *, + 1081 static const QRegularExpression isDetectSpaces( 1082 QStringLiteral(R"(^\^?(?:\((?:\?:)?)?\^?(?:\\s|\[(?:\\s| (?:\t|\\t)|(?:\t|\\t) )\])\)?(?:[*+][*+?]?|[*+])?\)?\)?$)")); 1083 if (rule.string.contains(isDetectSpaces)) { 1084 char const *extraMsg = rule.string.contains(QLatin1Char('^')) ? "+ column=\"0\" or firstNonSpace=\"1\"" : ""; 1085 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by DetectSpaces / DetectChar / AnyChar" << extraMsg << ":" 1086 << rule.string; 1087 return false; 1088 } 1089 1090 #define REG_ESCAPE_CHAR R"(\\(?:[^0BDPSWbdpswoux]|x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\}|0\d\d|o\{[0-7]+\}|u[0-9a-fA-F]{4}))" 1091 #define REG_CHAR "(?:" REG_ESCAPE_CHAR "|\\[(?:" REG_ESCAPE_CHAR "|.)\\]|[^[.^])" 1092 1093 // is RangeDetect 1094 static const QRegularExpression isRange(QStringLiteral("^\\^?" REG_CHAR "(?:" 1095 "\\.\\*[?*]?" REG_CHAR "|" 1096 "\\[\\^(" REG_ESCAPE_CHAR "|.)\\]\\*[?*]?\\1" 1097 ")$")); 1098 if ((rule.lookAhead == XmlBool::True || rule.minimal == XmlBool::True || rule.string.contains(QStringLiteral(".*?")) 1099 || rule.string.contains(QStringLiteral("[^"))) 1100 && reg.contains(isRange)) { 1101 qWarning() << filename << "line" << rule.line << "RegExpr should be replaced by RangeDetect:" << rule.string; 1102 return false; 1103 } 1104 1105 // is LineContinue 1106 static const QRegularExpression isLineContinue(QStringLiteral("^\\^?" REG_CHAR "\\$$")); 1107 if (reg.contains(isLineContinue)) { 1108 auto extra = (reg[0] == QLatin1Char('^')) ? "with column=\"0\"" : ""; 1109 qWarning() << filename << "line" << rule.line << "RegExpr should be replaced by LineContinue:" << rule.string << extra; 1110 return false; 1111 } 1112 1113 // replace \c, \xhhh, \x{hhh...}, \0dd, \o{ddd}, \uhhhh, with _ 1114 static const QRegularExpression sanitize1(QStringLiteral(REG_ESCAPE_CHAR)); 1115 reg.replace(sanitize1, QStringLiteral("_")); 1116 1117 #undef REG_CHAR 1118 #undef REG_ESCAPE_CHAR 1119 1120 // use minimal or lazy operator 1121 static const QRegularExpression isMinimal(QStringLiteral("(?![.][*+?][$]?[)]*$)[.][*+?][^?+]")); 1122 static const QRegularExpression hasNotGreedy(QStringLiteral("[*+?][?+]")); 1123 1124 if (rule.lookAhead == XmlBool::True && rule.minimal != XmlBool::True && reg.contains(isMinimal) && !reg.contains(hasNotGreedy) 1125 && (!rule.context.context || !rule.context.context->hasDynamicRule || regexp.captureCount() == 0) 1126 && (reg.back() != QLatin1Char('$') || reg.contains(QLatin1Char('|')))) { 1127 qWarning() << filename << "line" << rule.line 1128 << "RegExpr should be have minimal=\"1\" or use lazy operator (i.g, '.*' -> '.*?'):" << rule.string; 1129 return false; 1130 } 1131 1132 // replace [:...:] with ___ 1133 static const QRegularExpression sanitize2(QStringLiteral(R"(\[:\w+:\])")); 1134 reg.replace(sanitize2, QStringLiteral("___")); 1135 1136 // replace [ccc...], [special] with ... 1137 static const QRegularExpression sanitize3(QStringLiteral(R"(\[(?:\^\]?[^]]*|\]?[^]\\]*?\\.[^]]*|\][^]]{2,}|[^]]{3,})\]|(\[\]?[^]]*\]))")); 1138 reg.replace(sanitize3, QStringLiteral("...\\1")); 1139 1140 // replace [c] with _ 1141 static const QRegularExpression sanitize4(QStringLiteral(R"(\[.\])")); 1142 reg.replace(sanitize4, QStringLiteral("_")); 1143 1144 const int len = reg.size(); 1145 // replace [cC] with _ 1146 static const QRegularExpression toInsensitive(QStringLiteral(R"(\[(?:([^]])\1)\])")); 1147 reg = reg.toUpper(); 1148 reg.replace(toInsensitive, QString()); 1149 1150 // is StringDetect 1151 // ignore (?:, ) and {n} 1152 static const QRegularExpression isStringDetect(QStringLiteral(R"(^\^?(?:[^|\\?*+$^[{(.]|{(?!\d+,\d*}|,\d+})|\(\?:)+$)")); 1153 if (reg.contains(isStringDetect)) { 1154 char const *extraMsg = rule.string.contains(QLatin1Char('^')) ? "+ column=\"0\" or firstNonSpace=\"1\"" : ""; 1155 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by StringDetect / Detect2Chars / DetectChar" << extraMsg 1156 << ":" << rule.string; 1157 if (len != reg.size()) { 1158 qWarning() << rule.filename << "line" << rule.line << "insensitive=\"1\" missing:" << rule.string; 1159 } 1160 return false; 1161 } 1162 1163 // column="0" or firstNonSpace="1" 1164 if (rule.column == -1 && rule.firstNonSpace != XmlBool::True) { 1165 // ^ without | 1166 // (^sas*) -> ok 1167 // (^sa|s*) -> ko 1168 // (^(sa|s*)) -> ok 1169 auto first = std::as_const(reg).begin(); 1170 auto last = std::as_const(reg).end(); 1171 int depth = 0; 1172 1173 while (QLatin1Char('(') == *first) { 1174 ++depth; 1175 ++first; 1176 if (QLatin1Char('?') == *first || QLatin1Char(':') == first[1]) { 1177 first += 2; 1178 } 1179 } 1180 1181 if (QLatin1Char('^') == *first) { 1182 const int bolDepth = depth; 1183 bool replace = true; 1184 1185 while (++first != last) { 1186 if (QLatin1Char('(') == *first) { 1187 ++depth; 1188 } else if (QLatin1Char(')') == *first) { 1189 --depth; 1190 if (depth < bolDepth) { 1191 // (^a)? === (^a|) -> ko 1192 if (first + 1 != last && QStringLiteral("*?").contains(first[1])) { 1193 replace = false; 1194 break; 1195 } 1196 } 1197 } else if (QLatin1Char('|') == *first) { 1198 // ignore '|' within subgroup 1199 if (depth <= bolDepth) { 1200 replace = false; 1201 break; 1202 } 1203 } 1204 } 1205 1206 if (replace) { 1207 qWarning() << rule.filename << "line" << rule.line << "column=\"0\" or firstNonSpace=\"1\" missing with RegExpr:" << rule.string; 1208 return false; 1209 } 1210 } 1211 } 1212 1213 // add ^ with column=0 1214 if (rule.column == 0 && !rule.isDotRegex) { 1215 bool hasStartOfLine = false; 1216 auto first = std::as_const(reg).begin(); 1217 auto last = std::as_const(reg).end(); 1218 for (; first != last; ++first) { 1219 if (*first == QLatin1Char('^')) { 1220 hasStartOfLine = true; 1221 break; 1222 } else if (*first == QLatin1Char('(')) { 1223 if (last - first >= 3 && first[1] == QLatin1Char('?') && first[2] == QLatin1Char(':')) { 1224 first += 2; 1225 } 1226 } else { 1227 break; 1228 } 1229 } 1230 1231 if (!hasStartOfLine) { 1232 qWarning() << rule.filename << "line" << rule.line 1233 << "start of line missing in the pattern with column=\"0\" (i.e. abc -> ^abc):" << rule.string; 1234 return false; 1235 } 1236 } 1237 1238 bool useCapture = false; 1239 1240 // detection of unnecessary capture 1241 if (regexp.captureCount()) { 1242 auto maximalCapture = [](const QString(&referenceNames)[9], const QString &s) { 1243 int maxCapture = 9; 1244 while (maxCapture && !s.contains(referenceNames[maxCapture - 1])) { 1245 --maxCapture; 1246 } 1247 return maxCapture; 1248 }; 1249 1250 int maxCaptureUsed = 0; 1251 // maximal dynamic reference 1252 if (rule.context.context && !rule.context.stay) { 1253 for (const auto &nextRule : rule.context.context->rules) { 1254 if (nextRule.dynamic == XmlBool::True) { 1255 static const QString cap[]{ 1256 QStringLiteral("%1"), 1257 QStringLiteral("%2"), 1258 QStringLiteral("%3"), 1259 QStringLiteral("%4"), 1260 QStringLiteral("%5"), 1261 QStringLiteral("%6"), 1262 QStringLiteral("%7"), 1263 QStringLiteral("%8"), 1264 QStringLiteral("%9"), 1265 }; 1266 int maxDynamicCapture = maximalCapture(cap, nextRule.string); 1267 maxCaptureUsed = std::max(maxCaptureUsed, maxDynamicCapture); 1268 } 1269 } 1270 } 1271 1272 static const QString num1[]{ 1273 QStringLiteral("\\1"), 1274 QStringLiteral("\\2"), 1275 QStringLiteral("\\3"), 1276 QStringLiteral("\\4"), 1277 QStringLiteral("\\5"), 1278 QStringLiteral("\\6"), 1279 QStringLiteral("\\7"), 1280 QStringLiteral("\\8"), 1281 QStringLiteral("\\9"), 1282 }; 1283 static const QString num2[]{ 1284 QStringLiteral("\\g1"), 1285 QStringLiteral("\\g2"), 1286 QStringLiteral("\\g3"), 1287 QStringLiteral("\\g4"), 1288 QStringLiteral("\\g5"), 1289 QStringLiteral("\\g6"), 1290 QStringLiteral("\\g7"), 1291 QStringLiteral("\\g8"), 1292 QStringLiteral("\\g9"), 1293 }; 1294 const int maxBackReference = std::max(maximalCapture(num1, rule.string), maximalCapture(num1, rule.string)); 1295 1296 const int maxCapture = std::max(maxCaptureUsed, maxBackReference); 1297 1298 if (maxCapture && regexp.captureCount() > maxCapture) { 1299 qWarning() << rule.filename << "line" << rule.line << "RegExpr with" << regexp.captureCount() << "captures but only" << maxCapture 1300 << "are used. Please, replace '(...)' with '(?:...)':" << rule.string; 1301 return false; 1302 } 1303 1304 useCapture = maxCapture; 1305 } 1306 1307 if (!useCapture) { 1308 // is DetectIdentifier 1309 static const QRegularExpression isInsensitiveDetectIdentifier( 1310 QStringLiteral(R"(^(\((\?:)?)?\[((a-z|_){2}|(A-Z|_){2})\]([+][*?]?)?\[((0-9|a-z|_){3}|(0-9|A-Z|_){3})\][*][*?]?(\))?$)")); 1311 static const QRegularExpression isSensitiveDetectIdentifier( 1312 QStringLiteral(R"(^(\((\?:)?)?\[(a-z|A-Z|_){3}\]([+][*?]?)?\[(0-9|a-z|A-Z|_){4}\][*][*?]?(\))?$)")); 1313 auto &isDetectIdentifier = (rule.insensitive == XmlBool::True) ? isInsensitiveDetectIdentifier : isSensitiveDetectIdentifier; 1314 if (rule.string.contains(isDetectIdentifier)) { 1315 qWarning() << rule.filename << "line" << rule.line << "RegExpr should be replaced by DetectIdentifier:" << rule.string; 1316 return false; 1317 } 1318 } 1319 1320 if (rule.isDotRegex) { 1321 // search next rule with same column or firstNonSpace 1322 int i = &rule - context.rules.data() + 1; 1323 const bool hasColumn = (rule.column != -1); 1324 const bool hasFirstNonSpace = (rule.firstNonSpace == XmlBool::True); 1325 const bool isSpecial = (hasColumn || hasFirstNonSpace); 1326 for (; i < context.rules.size(); ++i) { 1327 auto &rule2 = context.rules[i]; 1328 if (rule2.type == Context::Rule::Type::IncludeRules && isSpecial) { 1329 i = context.rules.size(); 1330 break; 1331 } 1332 1333 const bool hasColumn2 = (rule2.column != -1); 1334 const bool hasFirstNonSpace2 = (rule2.firstNonSpace == XmlBool::True); 1335 if ((!isSpecial && !hasColumn2 && !hasFirstNonSpace2) || (hasColumn && rule.column == rule2.column) 1336 || (hasFirstNonSpace && hasFirstNonSpace2)) { 1337 break; 1338 } 1339 } 1340 1341 auto ruleFilename = (filename == rule.filename) ? QString() : QStringLiteral("in ") + rule.filename; 1342 if (i == context.rules.size()) { 1343 if (rule.lookAhead == XmlBool::True && rule.firstNonSpace != XmlBool::True && rule.column == -1 && rule.beginRegion.isEmpty() 1344 && rule.endRegion.isEmpty() && !useCapture) { 1345 qWarning() << filename << "context line" << context.line << ": RegExpr line" << rule.line << ruleFilename 1346 << "should be replaced by fallthroughContext:" << rule.string; 1347 } 1348 } else { 1349 auto &nextRule = context.rules[i]; 1350 auto nextRuleFilename = (filename == nextRule.filename) ? QString() : QStringLiteral("in ") + nextRule.filename; 1351 qWarning() << filename << "context line" << context.line << "contains unreachable element line" << nextRule.line << nextRuleFilename 1352 << "because a dot RegExpr is used line" << rule.line << ruleFilename; 1353 } 1354 1355 // unnecessary quantifier 1356 static const QRegularExpression unnecessaryQuantifier1(QStringLiteral(R"([*+?]([.][*+?]{0,2})?$)")); 1357 static const QRegularExpression unnecessaryQuantifier2(QStringLiteral(R"([*+?]([.][*+?]{0,2})?[)]*$)")); 1358 auto &unnecessaryQuantifier = useCapture ? unnecessaryQuantifier1 : unnecessaryQuantifier2; 1359 if (rule.lookAhead == XmlBool::True && rule.minimal != XmlBool::True && reg.contains(unnecessaryQuantifier)) { 1360 qWarning() << filename << "line" << rule.line 1361 << "Last quantifier is not necessary (i.g., 'xyz*' -> 'xy', 'xyz+.' -> 'xyz.'):" << rule.string; 1362 return false; 1363 } 1364 } 1365 } 1366 1367 return true; 1368 } 1369 1370 // Parse and check <emptyLine> 1371 bool parseEmptyLine(const QString &filename, QXmlStreamReader &xml) 1372 { 1373 bool success = true; 1374 1375 QString pattern; 1376 XmlBool casesensitive{}; 1377 1378 for (auto &attr : xml.attributes()) { 1379 Parser parser{filename, xml, attr, success}; 1380 1381 const bool isExtracted = 1382 parser.extractString(pattern, QStringLiteral("regexpr")) || parser.extractXmlBool(casesensitive, QStringLiteral("casesensitive")); 1383 1384 success = parser.checkIfExtracted(isExtracted); 1385 } 1386 1387 if (pattern.isEmpty()) { 1388 qWarning() << filename << "line" << xml.lineNumber() << "missing attribute: regexpr"; 1389 success = false; 1390 } else { 1391 success = checkRegularExpression(filename, QRegularExpression(pattern), xml.lineNumber()); 1392 } 1393 1394 return success; 1395 } 1396 1397 //! Check that a regular expression: 1398 //! - isValid() 1399 //! - character ranges such as [A-Z] are valid and not accidentally e.g. [A-z]. 1400 bool checkRegularExpression(const QString &filename, const QRegularExpression ®exp, int line) const 1401 { 1402 const auto pattern = regexp.pattern(); 1403 1404 // validate regexp 1405 if (!regexp.isValid()) { 1406 qWarning() << filename << "line" << line << "broken regex:" << pattern << "problem:" << regexp.errorString() << "at offset" 1407 << regexp.patternErrorOffset(); 1408 return false; 1409 } 1410 1411 // catch possible case typos: [A-z] or [a-Z] 1412 const int azOffset = std::max(pattern.indexOf(QStringLiteral("A-z")), pattern.indexOf(QStringLiteral("a-Z"))); 1413 if (azOffset >= 0) { 1414 qWarning() << filename << "line" << line << "broken regex:" << pattern << "problem: [a-Z] or [A-z] at offset" << azOffset; 1415 return false; 1416 } 1417 1418 return true; 1419 } 1420 1421 //! Search for rules with lookAhead="true" and context="#stay". 1422 //! This would cause an infinite loop. 1423 bool checkfallthrough(const Definition &definition, const Context &context) const 1424 { 1425 bool success = true; 1426 1427 if (!context.fallthroughContext.name.isEmpty()) { 1428 if (context.fallthroughContext.stay) { 1429 qWarning() << definition.filename << "line" << context.line << "possible infinite loop due to fallthroughContext=\"#stay\" in context " 1430 << context.name; 1431 success = false; 1432 } 1433 1434 const bool mandatoryFallthroughAttribute = definition.kateVersion < Version{5, 62}; 1435 if (context.fallthrough == XmlBool::True && !mandatoryFallthroughAttribute) { 1436 qWarning() << definition.filename << "line" << context.line << "fallthrough attribute is unnecessary with kateversion >= 5.62 in context" 1437 << context.name; 1438 success = false; 1439 } else if (context.fallthrough != XmlBool::True && mandatoryFallthroughAttribute) { 1440 qWarning() << definition.filename << "line" << context.line 1441 << "fallthroughContext attribute without fallthrough=\"1\" attribute is only valid with kateversion >= 5.62 in context" 1442 << context.name; 1443 success = false; 1444 } 1445 } 1446 1447 return success; 1448 } 1449 1450 //! Search for additionalDeliminator/weakDeliminator which has no effect. 1451 bool checkDelimiters(const Definition &definition, const Context::Rule &rule) const 1452 { 1453 if (rule.additionalDeliminator.isEmpty() && rule.weakDeliminator.isEmpty()) { 1454 return true; 1455 } 1456 1457 bool success = true; 1458 1459 if (definition.kateVersion < Version{5, 79}) { 1460 qWarning() << definition.filename << "line" << rule.line 1461 << "additionalDeliminator and weakDeliminator are only available since version \"5.79\". Please, increase kateversion."; 1462 success = false; 1463 } 1464 1465 for (QChar c : rule.additionalDeliminator) { 1466 if (!definition.wordDelimiters.contains(c)) { 1467 return success; 1468 } 1469 } 1470 1471 for (QChar c : rule.weakDeliminator) { 1472 if (definition.wordDelimiters.contains(c)) { 1473 return success; 1474 } 1475 } 1476 1477 qWarning() << rule.filename << "line" << rule.line << "unnecessary use of additionalDeliminator and/or weakDeliminator" << rule.string; 1478 return false; 1479 } 1480 1481 //! Search for rules with lookAhead="true" and context="#stay". 1482 //! This would cause an infinite loop. 1483 bool checkKeyword(const Definition &definition, const Context::Rule &rule, QSet<const Keywords *> &referencedKeywords) const 1484 { 1485 if (rule.type == Context::Rule::Type::keyword) { 1486 auto it = definition.keywordsList.find(rule.string); 1487 if (it != definition.keywordsList.end()) { 1488 referencedKeywords.insert(&*it); 1489 } else { 1490 qWarning() << rule.filename << "line" << rule.line << "reference of non-existing keyword list:" << rule.string; 1491 return false; 1492 } 1493 } 1494 return true; 1495 } 1496 1497 //! Search for rules with lookAhead="true" and context="#stay". 1498 //! This would cause an infinite loop. 1499 bool checkLookAhead(const Context::Rule &rule) const 1500 { 1501 if (rule.lookAhead == XmlBool::True && rule.context.stay) { 1502 qWarning() << rule.filename << "line" << rule.line << "infinite loop: lookAhead with context #stay"; 1503 } 1504 return true; 1505 } 1506 1507 //! Check that StringDetect contains more that 2 characters 1508 //! Fix with following command: 1509 //! \code 1510 //! sed -E 1511 //! '/StringDetect/{/dynamic="(1|true)|insensitive="(1|true)/!{s/StringDetect(.*)String="(.|<|>|"|&)(.|<|>|"|&)"/Detect2Chars\1char="\2" 1512 //! char1="\3"/;t;s/StringDetect(.*)String="(.|<|>|"|&)"/DetectChar\1char="\2"/}}' -i file.xml... 1513 //! \endcode 1514 bool checkStringDetect(const Context::Rule &rule) const 1515 { 1516 if (rule.type == Context::Rule::Type::StringDetect) { 1517 // dynamic == true and no place holder? 1518 if (rule.dynamic == XmlBool::True) { 1519 static const QRegularExpression placeHolder(QStringLiteral("%\\d+")); 1520 if (!rule.string.contains(placeHolder)) { 1521 qWarning() << rule.filename << "line" << rule.line << "broken regex:" << rule.string << "problem: dynamic=true but no %\\d+ placeholder"; 1522 return false; 1523 } 1524 } 1525 } 1526 return true; 1527 } 1528 1529 //! Check \<include> and delimiter in a keyword list 1530 bool checkKeywordsList(const Definition &definition, QSet<const Keywords *> &referencedKeywords) const 1531 { 1532 bool success = true; 1533 1534 bool includeNotSupport = (definition.kateVersion < Version{5, 53}); 1535 QMapIterator<QString, Keywords> keywordsIt(definition.keywordsList); 1536 while (keywordsIt.hasNext()) { 1537 keywordsIt.next(); 1538 1539 for (const auto &include : keywordsIt.value().items.includes) { 1540 if (includeNotSupport) { 1541 qWarning() << definition.filename << "line" << include.line 1542 << "<include> is only available since version \"5.53\". Please, increase kateversion."; 1543 success = false; 1544 } 1545 success = checkKeywordInclude(definition, include, referencedKeywords) && success; 1546 } 1547 1548 // Check that keyword list items do not have deliminator character 1549 #if 0 1550 for (const auto& keyword : keywordsIt.value().items.keywords) { 1551 for (QChar c : keyword.content) { 1552 if (definition.wordDelimiters.contains(c)) { 1553 qWarning() << definition.filename << "line" << keyword.line << "keyword with delimiter:" << c << "in" << keyword.content; 1554 success = false; 1555 } 1556 } 1557 } 1558 #endif 1559 } 1560 1561 return success; 1562 } 1563 1564 //! Search for non-existing keyword include. 1565 bool checkKeywordInclude(const Definition &definition, const Keywords::Items::Item &include, QSet<const Keywords *> &referencedKeywords) const 1566 { 1567 bool containsKeywordName = true; 1568 int const idx = include.content.indexOf(QStringLiteral("##")); 1569 if (idx == -1) { 1570 auto it = definition.keywordsList.find(include.content); 1571 containsKeywordName = (it != definition.keywordsList.end()); 1572 if (containsKeywordName) { 1573 referencedKeywords.insert(&*it); 1574 } 1575 } else { 1576 auto defName = include.content.mid(idx + 2); 1577 auto listName = include.content.left(idx); 1578 auto it = m_definitions.find(defName); 1579 if (it == m_definitions.end()) { 1580 qWarning() << definition.filename << "line" << include.line << "unknown definition in" << include.content; 1581 return false; 1582 } 1583 containsKeywordName = it->keywordsList.contains(listName); 1584 } 1585 1586 if (!containsKeywordName) { 1587 qWarning() << definition.filename << "line" << include.line << "unknown keyword name in" << include.content; 1588 } 1589 1590 return containsKeywordName; 1591 } 1592 1593 //! Check if a rule is hidden by another 1594 //! - rule hidden by DetectChar or AnyChar 1595 //! - DetectSpaces, AnyChar, Int, Float with all their characters hidden by DetectChar or AnyChar 1596 //! - StringDetect, WordDetect, RegExpr with as prefix Detect2Chars or other strings 1597 //! - duplicate rule (Int, Float, keyword with same String, etc) 1598 //! - Rule hidden by a dot regex 1599 bool checkUreachableRules(const QString &filename, 1600 const Context &context, 1601 QMap<const Context::Rule *, IncludedRuleUnreachableBy> &unreachableIncludedRules) const 1602 { 1603 if (context.isOnlyIncluded) { 1604 return true; 1605 } 1606 1607 struct Rule4 { 1608 RuleAndInclude setRule(const Context::Rule &rule, const Context::Rule *includeRules = nullptr) 1609 { 1610 auto set = [&](RuleAndInclude &ruleAndInclude) { 1611 auto old = ruleAndInclude; 1612 ruleAndInclude = {&rule, includeRules}; 1613 return old; 1614 }; 1615 1616 if (rule.firstNonSpace == XmlBool::True) { 1617 return set(firstNonSpace); 1618 } else if (rule.column == 0) { 1619 return set(column0); 1620 } else if (rule.column > 0) { 1621 return set(columnGreaterThan0[rule.column]); 1622 } else { 1623 return set(normal); 1624 } 1625 } 1626 1627 private: 1628 RuleAndInclude normal; 1629 RuleAndInclude column0; 1630 QMap<int, RuleAndInclude> columnGreaterThan0; 1631 RuleAndInclude firstNonSpace; 1632 }; 1633 1634 // Associate QChar with RuleAndInclude 1635 struct CharTable { 1636 /// Search RuleAndInclude associated with @p c. 1637 RuleAndInclude find(QChar c) const 1638 { 1639 if (c.unicode() < 128) { 1640 return m_asciiMap[c.unicode()]; 1641 } 1642 auto it = m_utf8Map.find(c); 1643 return it == m_utf8Map.end() ? RuleAndInclude{nullptr, nullptr} : it.value(); 1644 } 1645 1646 /// Search RuleAndInclude associated with the characters of @p s. 1647 /// \return an empty QVector when at least one character is not found. 1648 QVector<RuleAndInclude> find(QStringView s) const 1649 { 1650 QVector<RuleAndInclude> result; 1651 1652 for (QChar c : s) { 1653 if (!find(c)) { 1654 return result; 1655 } 1656 } 1657 1658 for (QChar c : s) { 1659 result.append(find(c)); 1660 } 1661 1662 return result; 1663 } 1664 1665 /// Associates @p c with a rule. 1666 void append(QChar c, const Context::Rule &rule, const Context::Rule *includeRule = nullptr) 1667 { 1668 if (c.unicode() < 128) { 1669 m_asciiMap[c.unicode()] = {&rule, includeRule}; 1670 } else { 1671 m_utf8Map[c] = {&rule, includeRule}; 1672 } 1673 } 1674 1675 /// Associates each character of @p s with a rule. 1676 void append(QStringView s, const Context::Rule &rule, const Context::Rule *includeRule = nullptr) 1677 { 1678 for (QChar c : s) { 1679 append(c, rule, includeRule); 1680 } 1681 } 1682 1683 private: 1684 RuleAndInclude m_asciiMap[127]{}; 1685 QMap<QChar, RuleAndInclude> m_utf8Map; 1686 }; 1687 1688 struct Char4Tables { 1689 CharTable chars; 1690 CharTable charsColumn0; 1691 QMap<int, CharTable> charsColumnGreaterThan0; 1692 CharTable charsFirstNonSpace; 1693 }; 1694 1695 // View on Char4Tables members 1696 struct CharTableArray { 1697 // Append Char4Tables members that satisfies firstNonSpace and column. 1698 // Char4Tables::char is always added. 1699 CharTableArray(Char4Tables &tables, const Context::Rule &rule) 1700 { 1701 if (rule.firstNonSpace == XmlBool::True) { 1702 appendTable(tables.charsFirstNonSpace); 1703 } 1704 1705 if (rule.column == 0) { 1706 appendTable(tables.charsColumn0); 1707 } else if (rule.column > 0) { 1708 appendTable(tables.charsColumnGreaterThan0[rule.column]); 1709 } 1710 1711 appendTable(tables.chars); 1712 } 1713 1714 // Removes Char4Tables::chars when the rule contains firstNonSpace or column 1715 void removeNonSpecialWhenSpecial() 1716 { 1717 if (m_size > 1) { 1718 --m_size; 1719 } 1720 } 1721 1722 /// Search RuleAndInclude associated with @p c. 1723 RuleAndInclude find(QChar c) const 1724 { 1725 for (int i = 0; i < m_size; ++i) { 1726 if (auto ruleAndInclude = m_charTables[i]->find(c)) { 1727 return ruleAndInclude; 1728 } 1729 } 1730 return RuleAndInclude{nullptr, nullptr}; 1731 } 1732 1733 /// Search RuleAndInclude associated with the characters of @p s. 1734 /// \return an empty QVector when at least one character is not found. 1735 QVector<RuleAndInclude> find(QStringView s) const 1736 { 1737 for (int i = 0; i < m_size; ++i) { 1738 auto result = m_charTables[i]->find(s); 1739 if (result.size()) { 1740 while (++i < m_size) { 1741 result.append(m_charTables[i]->find(s)); 1742 } 1743 return result; 1744 } 1745 } 1746 return QVector<RuleAndInclude>(); 1747 } 1748 1749 /// Associates @p c with a rule. 1750 void append(QChar c, const Context::Rule &rule, const Context::Rule *includeRule = nullptr) 1751 { 1752 for (int i = 0; i < m_size; ++i) { 1753 m_charTables[i]->append(c, rule, includeRule); 1754 } 1755 } 1756 1757 /// Associates each character of @p s with a rule. 1758 void append(QStringView s, const Context::Rule &rule, const Context::Rule *includeRule = nullptr) 1759 { 1760 for (int i = 0; i < m_size; ++i) { 1761 m_charTables[i]->append(s, rule, includeRule); 1762 } 1763 } 1764 1765 private: 1766 void appendTable(CharTable &t) 1767 { 1768 m_charTables[m_size] = &t; 1769 ++m_size; 1770 } 1771 1772 CharTable *m_charTables[3]; 1773 int m_size = 0; 1774 }; 1775 1776 struct ObservableRule { 1777 const Context::Rule *rule; 1778 const Context::Rule *includeRules; 1779 1780 bool hasResolvedIncludeRules() const 1781 { 1782 return rule == includeRules; 1783 } 1784 }; 1785 1786 // Iterates over all the rules, including those in includedRules 1787 struct RuleIterator { 1788 RuleIterator(const QVector<ObservableRule> &rules, const ObservableRule &endRule) 1789 : m_end(&endRule - rules.data()) 1790 , m_rules(rules) 1791 { 1792 } 1793 1794 /// \return next rule or nullptr 1795 const Context::Rule *next() 1796 { 1797 // if in includedRules 1798 if (m_includedRules) { 1799 ++m_i2; 1800 if (m_i2 != m_includedRules->size()) { 1801 return (*m_includedRules)[m_i2]; 1802 } 1803 ++m_i; 1804 m_includedRules = nullptr; 1805 } 1806 1807 // if is a includedRules 1808 while (m_i < m_end && m_rules[m_i].rule->type == Context::Rule::Type::IncludeRules) { 1809 if (!m_rules[m_i].includeRules && m_rules[m_i].rule->includedRules.size()) { 1810 m_i2 = 0; 1811 m_includedRules = &m_rules[m_i].rule->includedRules; 1812 return (*m_includedRules)[m_i2]; 1813 } 1814 ++m_i; 1815 } 1816 1817 if (m_i < m_end) { 1818 ++m_i; 1819 return m_rules[m_i - 1].rule; 1820 } 1821 1822 return nullptr; 1823 } 1824 1825 /// \return current IncludeRules or nullptr 1826 const Context::Rule *currentIncludeRules() const 1827 { 1828 return m_includedRules ? m_rules[m_i].rule : m_rules[m_i].includeRules; 1829 } 1830 1831 private: 1832 int m_i = 0; 1833 int m_i2; 1834 int m_end; 1835 const QVector<ObservableRule> &m_rules; 1836 const QVector<const Context::Rule *> *m_includedRules = nullptr; 1837 }; 1838 1839 // Dot regex container that satisfies firstNonSpace and column. 1840 struct DotRegex { 1841 /// Append a dot regex rule. 1842 void append(const Context::Rule &rule, const Context::Rule *includedRule) 1843 { 1844 auto array = extractDotRegexes(rule); 1845 if (array[0]) { 1846 *array[0] = {&rule, includedRule}; 1847 } 1848 if (array[1]) { 1849 *array[1] = {&rule, includedRule}; 1850 } 1851 } 1852 1853 /// Search dot regex which hides @p rule 1854 RuleAndInclude find(const Context::Rule &rule) 1855 { 1856 auto array = extractDotRegexes(rule); 1857 if (array[0]) { 1858 return *array[0]; 1859 } 1860 if (array[1]) { 1861 return *array[1]; 1862 } 1863 return RuleAndInclude{}; 1864 } 1865 1866 private: 1867 using Array = std::array<RuleAndInclude *, 2>; 1868 1869 Array extractDotRegexes(const Context::Rule &rule) 1870 { 1871 Array ret{}; 1872 1873 if (rule.firstNonSpace != XmlBool::True && rule.column == -1) { 1874 ret[0] = &dotRegex; 1875 } else { 1876 if (rule.firstNonSpace == XmlBool::True) { 1877 ret[0] = &dotRegexFirstNonSpace; 1878 } 1879 1880 if (rule.column == 0) { 1881 ret[1] = &dotRegexColumn0; 1882 } else if (rule.column > 0) { 1883 ret[1] = &dotRegexColumnGreaterThan0[rule.column]; 1884 } 1885 } 1886 1887 return ret; 1888 } 1889 1890 RuleAndInclude dotRegex{}; 1891 RuleAndInclude dotRegexColumn0{}; 1892 QMap<int, RuleAndInclude> dotRegexColumnGreaterThan0{}; 1893 RuleAndInclude dotRegexFirstNonSpace{}; 1894 }; 1895 1896 bool success = true; 1897 1898 // characters of DetectChar/AnyChar 1899 Char4Tables detectChars; 1900 // characters of dynamic DetectChar 1901 Char4Tables dynamicDetectChars; 1902 // characters of LineContinue 1903 Char4Tables lineContinueChars; 1904 1905 Rule4 intRule{}; 1906 Rule4 floatRule{}; 1907 Rule4 hlCCharRule{}; 1908 Rule4 hlCOctRule{}; 1909 Rule4 hlCHexRule{}; 1910 Rule4 hlCStringCharRule{}; 1911 Rule4 detectIdentifierRule{}; 1912 1913 // Contains includedRules and included includedRules 1914 QMap<Context const *, RuleAndInclude> includeContexts; 1915 1916 DotRegex dotRegex; 1917 1918 QVector<ObservableRule> observedRules; 1919 observedRules.reserve(context.rules.size()); 1920 for (const Context::Rule &rule : context.rules) { 1921 const Context::Rule *includeRule = nullptr; 1922 if (rule.type == Context::Rule::Type::IncludeRules) { 1923 auto *context = rule.context.context; 1924 if (context && context->isOnlyIncluded) { 1925 includeRule = &rule; 1926 } 1927 } 1928 1929 observedRules.push_back({&rule, includeRule}); 1930 if (includeRule) { 1931 for (const Context::Rule *rule2 : rule.includedRules) { 1932 observedRules.push_back({rule2, includeRule}); 1933 } 1934 } 1935 } 1936 1937 for (auto &observedRule : observedRules) { 1938 const Context::Rule &rule = *observedRule.rule; 1939 bool isUnreachable = false; 1940 QVector<RuleAndInclude> unreachableBy; 1941 1942 // declare rule as unreachable if ruleAndInclude is not empty 1943 auto updateUnreachable1 = [&](RuleAndInclude ruleAndInclude) { 1944 if (ruleAndInclude) { 1945 isUnreachable = true; 1946 unreachableBy.append(ruleAndInclude); 1947 } 1948 }; 1949 1950 // declare rule as unreachable if ruleAndIncludes is not empty 1951 auto updateUnreachable2 = [&](const QVector<RuleAndInclude> &ruleAndIncludes) { 1952 if (!ruleAndIncludes.isEmpty()) { 1953 isUnreachable = true; 1954 unreachableBy.append(ruleAndIncludes); 1955 } 1956 }; 1957 1958 // check if rule2.firstNonSpace/column is compatible with those of rule 1959 auto isCompatible = [&rule](Context::Rule const &rule2) { 1960 return (rule2.firstNonSpace != XmlBool::True && rule2.column == -1) || (rule.column == rule2.column && rule.column != -1) 1961 || (rule.firstNonSpace == rule2.firstNonSpace && rule.firstNonSpace == XmlBool::True); 1962 }; 1963 1964 updateUnreachable1(dotRegex.find(rule)); 1965 1966 switch (rule.type) { 1967 // checks if hidden by DetectChar/AnyChar 1968 // then add the characters to detectChars 1969 case Context::Rule::Type::AnyChar: { 1970 auto tables = CharTableArray(detectChars, rule); 1971 updateUnreachable2(tables.find(rule.string)); 1972 tables.removeNonSpecialWhenSpecial(); 1973 tables.append(rule.string, rule); 1974 break; 1975 } 1976 1977 // check if is hidden by DetectChar/AnyChar 1978 // then add the characters to detectChars or dynamicDetectChars 1979 case Context::Rule::Type::DetectChar: { 1980 auto &chars4 = (rule.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars; 1981 auto tables = CharTableArray(chars4, rule); 1982 updateUnreachable1(tables.find(rule.char0)); 1983 tables.removeNonSpecialWhenSpecial(); 1984 tables.append(rule.char0, rule); 1985 break; 1986 } 1987 1988 // check if hidden by DetectChar/AnyChar 1989 // then add spaces characters to detectChars 1990 case Context::Rule::Type::DetectSpaces: { 1991 auto tables = CharTableArray(detectChars, rule); 1992 updateUnreachable2(tables.find(QStringLiteral(" \t"))); 1993 tables.removeNonSpecialWhenSpecial(); 1994 tables.append(QLatin1Char(' '), rule); 1995 tables.append(QLatin1Char('\t'), rule); 1996 break; 1997 } 1998 1999 // check if hidden by DetectChar/AnyChar 2000 case Context::Rule::Type::HlCChar: 2001 updateUnreachable1(CharTableArray(detectChars, rule).find(QLatin1Char('\''))); 2002 updateUnreachable1(hlCCharRule.setRule(rule)); 2003 break; 2004 2005 // check if hidden by DetectChar/AnyChar 2006 case Context::Rule::Type::HlCHex: 2007 updateUnreachable1(CharTableArray(detectChars, rule).find(QLatin1Char('0'))); 2008 updateUnreachable1(hlCHexRule.setRule(rule)); 2009 break; 2010 2011 // check if hidden by DetectChar/AnyChar 2012 case Context::Rule::Type::HlCOct: 2013 updateUnreachable1(CharTableArray(detectChars, rule).find(QLatin1Char('0'))); 2014 updateUnreachable1(hlCOctRule.setRule(rule)); 2015 break; 2016 2017 // check if hidden by DetectChar/AnyChar 2018 case Context::Rule::Type::HlCStringChar: 2019 updateUnreachable1(CharTableArray(detectChars, rule).find(QLatin1Char('\\'))); 2020 updateUnreachable1(hlCStringCharRule.setRule(rule)); 2021 break; 2022 2023 // check if hidden by DetectChar/AnyChar 2024 case Context::Rule::Type::Int: 2025 updateUnreachable2(CharTableArray(detectChars, rule).find(QStringLiteral("0123456789"))); 2026 updateUnreachable1(intRule.setRule(rule)); 2027 break; 2028 2029 // check if hidden by DetectChar/AnyChar 2030 case Context::Rule::Type::Float: 2031 updateUnreachable2(CharTableArray(detectChars, rule).find(QStringLiteral("0123456789."))); 2032 updateUnreachable1(floatRule.setRule(rule)); 2033 break; 2034 2035 // check if hidden by another DetectIdentifier rule 2036 case Context::Rule::Type::DetectIdentifier: 2037 updateUnreachable1(detectIdentifierRule.setRule(rule)); 2038 break; 2039 2040 // check if hidden by DetectChar/AnyChar or another LineContinue 2041 case Context::Rule::Type::LineContinue: { 2042 updateUnreachable1(CharTableArray(detectChars, rule).find(rule.char0)); 2043 2044 auto tables = CharTableArray(lineContinueChars, rule); 2045 updateUnreachable1(tables.find(rule.char0)); 2046 tables.removeNonSpecialWhenSpecial(); 2047 tables.append(rule.char0, rule); 2048 break; 2049 } 2050 2051 // check if hidden by DetectChar/AnyChar or another Detect2Chars/RangeDetect 2052 case Context::Rule::Type::Detect2Chars: 2053 case Context::Rule::Type::RangeDetect: 2054 updateUnreachable1(CharTableArray(detectChars, rule).find(rule.char0)); 2055 if (!isUnreachable) { 2056 RuleIterator ruleIterator(observedRules, observedRule); 2057 while (const auto *rulePtr = ruleIterator.next()) { 2058 if (isUnreachable) { 2059 break; 2060 } 2061 const auto &rule2 = *rulePtr; 2062 if (rule2.type == rule.type && isCompatible(rule2) && rule.char0 == rule2.char0 && rule.char1 == rule2.char1) { 2063 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()}); 2064 } 2065 } 2066 } 2067 break; 2068 2069 case Context::Rule::Type::RegExpr: { 2070 if (rule.isDotRegex) { 2071 dotRegex.append(rule, nullptr); 2072 break; 2073 } 2074 2075 // check that `rule` does not have another RegExpr as a prefix 2076 RuleIterator ruleIterator(observedRules, observedRule); 2077 while (const auto *rulePtr = ruleIterator.next()) { 2078 if (isUnreachable) { 2079 break; 2080 } 2081 const auto &rule2 = *rulePtr; 2082 if (rule2.type == Context::Rule::Type::RegExpr && isCompatible(rule2) && rule.insensitive == rule2.insensitive 2083 && rule.dynamic == rule2.dynamic && rule.sanitizedString.startsWith(rule2.sanitizedString)) { 2084 bool add = (rule.sanitizedString.startsWith(rule2.string) || rule.sanitizedString.size() < rule2.sanitizedString.size() + 2); 2085 if (!add) { 2086 // \s.* (sanitized = \s) is considered hiding \s*\S 2087 // we check the quantifiers to see if this is the case 2088 auto c1 = rule.sanitizedString[rule2.sanitizedString.size()].unicode(); 2089 auto c2 = rule.sanitizedString[rule2.sanitizedString.size() + 1].unicode(); 2090 auto c3 = rule2.sanitizedString.back().unicode(); 2091 if (c3 == '*' || c3 == '?' || c3 == '+') { 2092 add = true; 2093 } else if (c1 == '*' || c1 == '?') { 2094 add = !((c2 == '?' || c2 == '+') || (rule.sanitizedString.size() >= rule2.sanitizedString.size() + 3)); 2095 } else { 2096 add = true; 2097 } 2098 } 2099 if (add) { 2100 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()}); 2101 } 2102 } 2103 } 2104 2105 Q_FALLTHROUGH(); 2106 } 2107 // check if a rule does not have another rule as a prefix 2108 case Context::Rule::Type::WordDetect: 2109 case Context::Rule::Type::StringDetect: { 2110 // check that dynamic `rule` does not have another dynamic StringDetect as a prefix 2111 if (rule.type == Context::Rule::Type::StringDetect && rule.dynamic == XmlBool::True) { 2112 RuleIterator ruleIterator(observedRules, observedRule); 2113 while (const auto *rulePtr = ruleIterator.next()) { 2114 if (isUnreachable) { 2115 break; 2116 } 2117 2118 const auto &rule2 = *rulePtr; 2119 if (rule2.type != Context::Rule::Type::StringDetect || rule2.dynamic != XmlBool::True || !isCompatible(rule2)) { 2120 continue; 2121 } 2122 2123 const bool isSensitive = (rule2.insensitive == XmlBool::True); 2124 const auto caseSensitivity = isSensitive ? Qt::CaseInsensitive : Qt::CaseSensitive; 2125 if ((isSensitive || rule.insensitive != XmlBool::True) && rule.string.startsWith(rule2.string, caseSensitivity)) { 2126 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()}); 2127 } 2128 } 2129 } 2130 2131 // string used for comparison and truncated from "dynamic" part 2132 QStringView s = rule.string; 2133 2134 // truncate to '%' with dynamic rules 2135 if (rule.dynamic == XmlBool::True) { 2136 static const QRegularExpression dynamicPosition(QStringLiteral(R"(^(?:[^%]*|%(?![1-9]))*)")); 2137 auto result = dynamicPosition.match(rule.string); 2138 s = s.left(result.capturedLength()); 2139 } 2140 2141 QString sanitizedRegex; 2142 // truncate to special character with RegExpr. 2143 // If regexp contains '|', `s` becomes empty. 2144 if (rule.type == Context::Rule::Type::RegExpr) { 2145 static const QRegularExpression regularChars(QStringLiteral(R"(^(?:[^.?*+^$[{(\\|]+|\\[-.?*+^$[\]{}()\\|]+|\[[^^\\]\])+)")); 2146 static const QRegularExpression sanitizeChars(QStringLiteral(R"(\\([-.?*+^$[\]{}()\\|])|\[([^^\\])\])")); 2147 const qsizetype result = regularChars.match(rule.string).capturedLength(); 2148 const qsizetype pos = qMin(result, s.size()); 2149 if (rule.string.indexOf(QLatin1Char('|'), pos) < pos) { 2150 sanitizedRegex = rule.string.left(qMin(result, s.size())); 2151 sanitizedRegex.replace(sanitizeChars, QStringLiteral("\\1")); 2152 s = sanitizedRegex; 2153 } else { 2154 s = QStringView(); 2155 } 2156 } 2157 2158 // check if hidden by DetectChar/AnyChar 2159 if (s.size() > 0) { 2160 auto t = CharTableArray(detectChars, rule); 2161 if (rule.insensitive != XmlBool::True) { 2162 updateUnreachable1(t.find(s[0])); 2163 } else { 2164 QChar c2[]{s[0].toLower(), s[0].toUpper()}; 2165 updateUnreachable2(t.find(QStringView(c2, 2))); 2166 } 2167 } 2168 2169 // check if Detect2Chars, StringDetect, WordDetect is not a prefix of s 2170 if (s.size() > 0 && !isUnreachable) { 2171 // combination of uppercase and lowercase 2172 RuleAndInclude detect2CharsInsensitives[]{{}, {}, {}, {}}; 2173 2174 RuleIterator ruleIterator(observedRules, observedRule); 2175 while (const auto *rulePtr = ruleIterator.next()) { 2176 if (isUnreachable) { 2177 break; 2178 } 2179 const auto &rule2 = *rulePtr; 2180 const bool isSensitive = (rule2.insensitive == XmlBool::True); 2181 const auto caseSensitivity = isSensitive ? Qt::CaseInsensitive : Qt::CaseSensitive; 2182 2183 switch (rule2.type) { 2184 // check that it is not a detectChars prefix 2185 case Context::Rule::Type::Detect2Chars: 2186 if (isCompatible(rule2) && s.size() >= 2) { 2187 if (rule.insensitive != XmlBool::True) { 2188 if (rule2.char0 == s[0] && rule2.char1 == s[1]) { 2189 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()}); 2190 } 2191 } else { 2192 // when the string is case insensitive, 2193 // all 4 upper/lower case combinations must be found 2194 auto set = [&](RuleAndInclude &x, QChar c1, QChar c2) { 2195 if (!x && rule2.char0 == c1 && rule2.char0 == c2) { 2196 x = {&rule2, ruleIterator.currentIncludeRules()}; 2197 } 2198 }; 2199 set(detect2CharsInsensitives[0], s[0].toLower(), s[1].toLower()); 2200 set(detect2CharsInsensitives[1], s[0].toLower(), s[1].toUpper()); 2201 set(detect2CharsInsensitives[2], s[0].toUpper(), s[1].toUpper()); 2202 set(detect2CharsInsensitives[3], s[0].toUpper(), s[1].toLower()); 2203 2204 if (detect2CharsInsensitives[0] && detect2CharsInsensitives[1] && detect2CharsInsensitives[2] 2205 && detect2CharsInsensitives[3]) { 2206 isUnreachable = true; 2207 unreachableBy.append(detect2CharsInsensitives[0]); 2208 unreachableBy.append(detect2CharsInsensitives[1]); 2209 unreachableBy.append(detect2CharsInsensitives[2]); 2210 unreachableBy.append(detect2CharsInsensitives[3]); 2211 } 2212 } 2213 } 2214 break; 2215 2216 // check that it is not a StringDetect prefix 2217 case Context::Rule::Type::StringDetect: 2218 if (isCompatible(rule2) && rule2.dynamic != XmlBool::True && (isSensitive || rule.insensitive != XmlBool::True) 2219 && s.startsWith(rule2.string, caseSensitivity)) { 2220 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()}); 2221 } 2222 break; 2223 2224 // check if a WordDetect is hidden by another WordDetect 2225 case Context::Rule::Type::WordDetect: 2226 if (rule.type == Context::Rule::Type::WordDetect && isCompatible(rule2) && (isSensitive || rule.insensitive != XmlBool::True) 2227 && 0 == rule.string.compare(rule2.string, caseSensitivity)) { 2228 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()}); 2229 } 2230 break; 2231 2232 default:; 2233 } 2234 } 2235 } 2236 2237 break; 2238 } 2239 2240 // check if hidden by another keyword rule 2241 case Context::Rule::Type::keyword: { 2242 RuleIterator ruleIterator(observedRules, observedRule); 2243 while (const auto *rulePtr = ruleIterator.next()) { 2244 if (isUnreachable) { 2245 break; 2246 } 2247 const auto &rule2 = *rulePtr; 2248 if (rule2.type == Context::Rule::Type::keyword && isCompatible(rule2) && rule.string == rule2.string) { 2249 updateUnreachable1({&rule2, ruleIterator.currentIncludeRules()}); 2250 } 2251 } 2252 // TODO check that all keywords are hidden by another rules 2253 break; 2254 } 2255 2256 // add characters in those used but without checking if they are already. 2257 // <DetectChar char="}" /> 2258 // <includedRules .../> <- reference an another <DetectChar char="}" /> who will not be checked 2259 // <includedRules .../> <- reference a <DetectChar char="{" /> who will be added 2260 // <DetectChar char="{" /> <- hidden by previous rule 2261 case Context::Rule::Type::IncludeRules: 2262 if (observedRule.includeRules && !observedRule.hasResolvedIncludeRules()) { 2263 break; 2264 } 2265 2266 if (auto &ruleAndInclude = includeContexts[rule.context.context]) { 2267 updateUnreachable1(ruleAndInclude); 2268 } else { 2269 ruleAndInclude.rule = &rule; 2270 } 2271 2272 for (const auto *rulePtr : rule.includedIncludeRules) { 2273 includeContexts.insert(rulePtr->context.context, RuleAndInclude{rulePtr, &rule}); 2274 } 2275 2276 if (observedRule.includeRules) { 2277 break; 2278 } 2279 2280 for (const auto *rulePtr : rule.includedRules) { 2281 const auto &rule2 = *rulePtr; 2282 switch (rule2.type) { 2283 case Context::Rule::Type::AnyChar: { 2284 auto tables = CharTableArray(detectChars, rule2); 2285 tables.removeNonSpecialWhenSpecial(); 2286 tables.append(rule2.string, rule2, &rule); 2287 break; 2288 } 2289 2290 case Context::Rule::Type::DetectChar: { 2291 auto &chars4 = (rule.dynamic != XmlBool::True) ? detectChars : dynamicDetectChars; 2292 auto tables = CharTableArray(chars4, rule2); 2293 tables.removeNonSpecialWhenSpecial(); 2294 tables.append(rule2.char0, rule2, &rule); 2295 break; 2296 } 2297 2298 case Context::Rule::Type::DetectSpaces: { 2299 auto tables = CharTableArray(detectChars, rule2); 2300 tables.removeNonSpecialWhenSpecial(); 2301 tables.append(QLatin1Char(' '), rule2, &rule); 2302 tables.append(QLatin1Char('\t'), rule2, &rule); 2303 break; 2304 } 2305 2306 case Context::Rule::Type::HlCChar: 2307 hlCCharRule.setRule(rule2, &rule); 2308 break; 2309 2310 case Context::Rule::Type::HlCHex: 2311 hlCHexRule.setRule(rule2, &rule); 2312 break; 2313 2314 case Context::Rule::Type::HlCOct: 2315 hlCOctRule.setRule(rule2, &rule); 2316 break; 2317 2318 case Context::Rule::Type::HlCStringChar: 2319 hlCStringCharRule.setRule(rule2, &rule); 2320 break; 2321 2322 case Context::Rule::Type::Int: 2323 intRule.setRule(rule2, &rule); 2324 break; 2325 2326 case Context::Rule::Type::Float: 2327 floatRule.setRule(rule2, &rule); 2328 break; 2329 2330 case Context::Rule::Type::LineContinue: { 2331 auto tables = CharTableArray(lineContinueChars, rule2); 2332 tables.removeNonSpecialWhenSpecial(); 2333 tables.append(rule2.char0, rule2, &rule); 2334 break; 2335 } 2336 2337 case Context::Rule::Type::RegExpr: 2338 if (rule2.isDotRegex) { 2339 dotRegex.append(rule2, &rule); 2340 } 2341 break; 2342 2343 case Context::Rule::Type::WordDetect: 2344 case Context::Rule::Type::StringDetect: 2345 case Context::Rule::Type::Detect2Chars: 2346 case Context::Rule::Type::IncludeRules: 2347 case Context::Rule::Type::DetectIdentifier: 2348 case Context::Rule::Type::keyword: 2349 case Context::Rule::Type::Unknown: 2350 case Context::Rule::Type::RangeDetect: 2351 break; 2352 } 2353 } 2354 break; 2355 2356 case Context::Rule::Type::Unknown: 2357 break; 2358 } 2359 2360 if (observedRule.includeRules && !observedRule.hasResolvedIncludeRules()) { 2361 auto &unreachableIncludedRule = unreachableIncludedRules[&rule]; 2362 if (isUnreachable && unreachableIncludedRule.alwaysUnreachable) { 2363 unreachableIncludedRule.unreachableBy.append(unreachableBy); 2364 } else { 2365 unreachableIncludedRule.alwaysUnreachable = false; 2366 } 2367 } else if (isUnreachable) { 2368 success = false; 2369 QString message; 2370 message.reserve(128); 2371 for (auto &ruleAndInclude : unreachableBy) { 2372 message += QStringLiteral("line "); 2373 if (ruleAndInclude.includeRules) { 2374 message += QString::number(ruleAndInclude.includeRules->line); 2375 message += QStringLiteral(" [by '"); 2376 message += ruleAndInclude.includeRules->context.name; 2377 message += QStringLiteral("' line "); 2378 message += QString::number(ruleAndInclude.rule->line); 2379 if (ruleAndInclude.includeRules->filename != ruleAndInclude.rule->filename) { 2380 message += QStringLiteral(" ("); 2381 message += ruleAndInclude.rule->filename; 2382 message += QLatin1Char(')'); 2383 } 2384 message += QLatin1Char(']'); 2385 } else { 2386 message += QString::number(ruleAndInclude.rule->line); 2387 } 2388 message += QStringLiteral(", "); 2389 } 2390 message.chop(2); 2391 qWarning() << filename << "line" << rule.line << "unreachable rule by" << message; 2392 } 2393 } 2394 2395 return success; 2396 } 2397 2398 //! Proposes to merge certain rule sequences 2399 //! - several DetectChar/AnyChar into AnyChar 2400 //! - several RegExpr into one RegExpr 2401 bool suggestRuleMerger(const QString &filename, const Context &context) const 2402 { 2403 bool success = true; 2404 2405 if (context.rules.isEmpty()) { 2406 return success; 2407 } 2408 2409 auto it = context.rules.begin(); 2410 const auto end = context.rules.end() - 1; 2411 2412 for (; it < end; ++it) { 2413 auto &rule1 = *it; 2414 auto &rule2 = it[1]; 2415 2416 auto isCommonCompatible = [&] { 2417 if (rule1.lookAhead != rule2.lookAhead) { 2418 return false; 2419 } 2420 // ignore attribute when lookAhead is true 2421 if (rule1.lookAhead != XmlBool::True && rule1.attribute != rule2.attribute) { 2422 return false; 2423 } 2424 // clang-format off 2425 return rule1.beginRegion == rule2.beginRegion 2426 && rule1.endRegion == rule2.endRegion 2427 && rule1.firstNonSpace == rule2.firstNonSpace 2428 && rule1.context.context == rule2.context.context 2429 && rule1.context.popCount == rule2.context.popCount; 2430 // clang-format on 2431 }; 2432 2433 switch (rule1.type) { 2434 // request to merge AnyChar/DetectChar 2435 case Context::Rule::Type::AnyChar: 2436 case Context::Rule::Type::DetectChar: 2437 if ((rule2.type == Context::Rule::Type::AnyChar || rule2.type == Context::Rule::Type::DetectChar) && isCommonCompatible() 2438 && rule1.column == rule2.column) { 2439 qWarning() << filename << "line" << rule2.line << "can be merged as AnyChar with the previous rule"; 2440 success = false; 2441 } 2442 break; 2443 2444 // request to merge multiple RegExpr 2445 case Context::Rule::Type::RegExpr: 2446 if (rule2.type == Context::Rule::Type::RegExpr && isCommonCompatible() && rule1.dynamic == rule2.dynamic 2447 && (rule1.column == rule2.column || (rule1.column <= 0 && rule2.column <= 0))) { 2448 qWarning() << filename << "line" << rule2.line << "can be merged with the previous rule"; 2449 success = false; 2450 } 2451 break; 2452 2453 case Context::Rule::Type::DetectSpaces: 2454 case Context::Rule::Type::HlCChar: 2455 case Context::Rule::Type::HlCHex: 2456 case Context::Rule::Type::HlCOct: 2457 case Context::Rule::Type::HlCStringChar: 2458 case Context::Rule::Type::Int: 2459 case Context::Rule::Type::Float: 2460 case Context::Rule::Type::LineContinue: 2461 case Context::Rule::Type::WordDetect: 2462 case Context::Rule::Type::StringDetect: 2463 case Context::Rule::Type::Detect2Chars: 2464 case Context::Rule::Type::IncludeRules: 2465 case Context::Rule::Type::DetectIdentifier: 2466 case Context::Rule::Type::keyword: 2467 case Context::Rule::Type::Unknown: 2468 case Context::Rule::Type::RangeDetect: 2469 break; 2470 } 2471 } 2472 2473 return success; 2474 } 2475 2476 //! Initialize the referenced context (ContextName::context) 2477 //! Some input / output examples are: 2478 //! - "#stay" -> "" 2479 //! - "#pop" -> "" 2480 //! - "Comment" -> "Comment" 2481 //! - "#pop!Comment" -> "Comment" 2482 //! - "##ISO C++" -> "" 2483 //! - "Comment##ISO C++"-> "Comment" in ISO C++ 2484 void resolveContextName(Definition &definition, Context &context, ContextName &contextName, int line) 2485 { 2486 QStringView name = contextName.name; 2487 if (name.isEmpty()) { 2488 contextName.stay = true; 2489 } else if (name.startsWith(QStringLiteral("#stay"))) { 2490 name = name.mid(5); 2491 contextName.stay = true; 2492 contextName.context = &context; 2493 if (!name.isEmpty()) { 2494 qWarning() << definition.filename << "line" << line << "invalid context in" << context.name; 2495 m_success = false; 2496 } 2497 } else { 2498 while (name.startsWith(QStringLiteral("#pop"))) { 2499 name = name.mid(4); 2500 ++contextName.popCount; 2501 } 2502 2503 if (contextName.popCount && !name.isEmpty()) { 2504 if (name.startsWith(QLatin1Char('!')) && name.size() > 1) { 2505 name = name.mid(1); 2506 } else { 2507 qWarning() << definition.filename << "line" << line << "'!' missing between '#pop' and context name" << context.name; 2508 m_success = false; 2509 } 2510 } 2511 2512 if (!name.isEmpty()) { 2513 const int idx = name.indexOf(QStringLiteral("##")); 2514 if (idx == -1) { 2515 auto it = definition.contexts.find(name.toString()); 2516 if (it != definition.contexts.end()) { 2517 contextName.context = &*it; 2518 } 2519 } else { 2520 auto defName = name.mid(idx + 2); 2521 auto it = m_definitions.find(defName.toString()); 2522 if (it != m_definitions.end()) { 2523 auto listName = name.left(idx).toString(); 2524 definition.referencedDefinitions.insert(&*it); 2525 auto ctxIt = it->contexts.find(listName.isEmpty() ? it->firstContextName : listName); 2526 if (ctxIt != it->contexts.end()) { 2527 contextName.context = &*ctxIt; 2528 } 2529 } else { 2530 qWarning() << definition.filename << "line" << line << "unknown definition in" << context.name; 2531 m_success = false; 2532 } 2533 } 2534 2535 if (!contextName.context) { 2536 qWarning() << definition.filename << "line" << line << "unknown context" << name << "in" << context.name; 2537 m_success = false; 2538 } 2539 } 2540 } 2541 } 2542 2543 QMap<QString, Definition> m_definitions; 2544 Definition *m_currentDefinition = nullptr; 2545 Keywords *m_currentKeywords = nullptr; 2546 Context *m_currentContext = nullptr; 2547 bool m_success = true; 2548 }; 2549 2550 namespace 2551 { 2552 QStringList readListing(const QString &fileName) 2553 { 2554 QFile file(fileName); 2555 if (!file.open(QIODevice::ReadOnly)) { 2556 return QStringList(); 2557 } 2558 2559 QXmlStreamReader xml(&file); 2560 QStringList listing; 2561 while (!xml.atEnd()) { 2562 xml.readNext(); 2563 2564 // add only .xml files, no .json or stuff 2565 if (xml.isCharacters() && xml.text().contains(QLatin1String(".xml"))) { 2566 listing.append(xml.text().toString()); 2567 } 2568 } 2569 2570 if (xml.hasError()) { 2571 qWarning() << "XML error while reading" << fileName << " - " << qPrintable(xml.errorString()) << "@ offset" << xml.characterOffset(); 2572 listing.clear(); 2573 } 2574 2575 return listing; 2576 } 2577 2578 /** 2579 * check if the "extensions" attribute have valid wildcards 2580 * @param extensions extensions string to check 2581 * @return valid? 2582 */ 2583 bool checkExtensions(QStringView extensions) 2584 { 2585 // get list of extensions 2586 const QList<QStringView> extensionParts = extensions.split(QLatin1Char(';'), Qt::SkipEmptyParts); 2587 2588 // ok if empty 2589 if (extensionParts.isEmpty()) { 2590 return true; 2591 } 2592 2593 // check that only valid wildcard things are inside the parts 2594 for (const auto &extension : extensionParts) { 2595 for (const auto c : extension) { 2596 // eat normal things 2597 if (c.isDigit() || c.isLetter()) { 2598 continue; 2599 } 2600 2601 // allow some special characters 2602 if (c == QLatin1Char('.') || c == QLatin1Char('-') || c == QLatin1Char('_') || c == QLatin1Char('+')) { 2603 continue; 2604 } 2605 2606 // only allowed wildcard things: '?' and '*' 2607 if (c == QLatin1Char('?') || c == QLatin1Char('*')) { 2608 continue; 2609 } 2610 2611 qWarning() << "invalid character" << c << "seen in extensions wildcard"; 2612 return false; 2613 } 2614 } 2615 2616 // all checks passed 2617 return true; 2618 } 2619 2620 } 2621 2622 int main(int argc, char *argv[]) 2623 { 2624 // get app instance 2625 QCoreApplication app(argc, argv); 2626 2627 // ensure enough arguments are passed 2628 if (app.arguments().size() < 3) { 2629 return 1; 2630 } 2631 2632 #ifdef QT_XMLPATTERNS_LIB 2633 // open schema 2634 QXmlSchema schema; 2635 if (!schema.load(QUrl::fromLocalFile(app.arguments().at(2)))) { 2636 return 2; 2637 } 2638 #endif 2639 2640 const QString hlFilenamesListing = app.arguments().value(3); 2641 if (hlFilenamesListing.isEmpty()) { 2642 return 1; 2643 } 2644 2645 QStringList hlFilenames = readListing(hlFilenamesListing); 2646 if (hlFilenames.isEmpty()) { 2647 qWarning("Failed to read %s", qPrintable(hlFilenamesListing)); 2648 return 3; 2649 } 2650 2651 // text attributes 2652 const QStringList textAttributes = QStringList() << QStringLiteral("name") << QStringLiteral("section") << QStringLiteral("mimetype") 2653 << QStringLiteral("extensions") << QStringLiteral("style") << QStringLiteral("author") 2654 << QStringLiteral("license") << QStringLiteral("indenter"); 2655 2656 // index all given highlightings 2657 HlFilesChecker filesChecker; 2658 QVariantMap hls; 2659 int anyError = 0; 2660 for (const QString &hlFilename : std::as_const(hlFilenames)) { 2661 QFile hlFile(hlFilename); 2662 if (!hlFile.open(QIODevice::ReadOnly)) { 2663 qWarning("Failed to open %s", qPrintable(hlFilename)); 2664 anyError = 3; 2665 continue; 2666 } 2667 2668 #ifdef QT_XMLPATTERNS_LIB 2669 // validate against schema 2670 QXmlSchemaValidator validator(schema); 2671 if (!validator.validate(&hlFile, QUrl::fromLocalFile(hlFile.fileName()))) { 2672 anyError = 4; 2673 continue; 2674 } 2675 #endif 2676 2677 // read the needed attributes from toplevel language tag 2678 hlFile.reset(); 2679 QXmlStreamReader xml(&hlFile); 2680 if (xml.readNextStartElement()) { 2681 if (xml.name() != QLatin1String("language")) { 2682 anyError = 5; 2683 continue; 2684 } 2685 } else { 2686 anyError = 6; 2687 continue; 2688 } 2689 2690 // map to store hl info 2691 QVariantMap hl; 2692 2693 // transfer text attributes 2694 for (const QString &attribute : std::as_const(textAttributes)) { 2695 hl[attribute] = xml.attributes().value(attribute).toString(); 2696 } 2697 2698 // check if extensions have the right format 2699 if (!checkExtensions(hl[QStringLiteral("extensions")].toString())) { 2700 qWarning() << hlFilename << "'extensions' wildcards invalid:" << hl[QStringLiteral("extensions")].toString(); 2701 anyError = 23; 2702 } 2703 2704 // numerical attributes 2705 hl[QStringLiteral("version")] = xml.attributes().value(QLatin1String("version")).toInt(); 2706 hl[QStringLiteral("priority")] = xml.attributes().value(QLatin1String("priority")).toInt(); 2707 2708 // add boolean one 2709 hl[QStringLiteral("hidden")] = attrToBool(xml.attributes().value(QLatin1String("hidden"))); 2710 2711 // remember hl 2712 hls[QFileInfo(hlFile).fileName()] = hl; 2713 2714 const QString hlName = hl[QStringLiteral("name")].toString(); 2715 2716 filesChecker.setDefinition(xml.attributes().value(QStringLiteral("kateversion")), hlFilename, hlName); 2717 2718 // scan for broken regex or keywords with spaces 2719 while (!xml.atEnd()) { 2720 xml.readNext(); 2721 filesChecker.processElement(xml); 2722 } 2723 2724 if (xml.hasError()) { 2725 anyError = 33; 2726 qWarning() << hlFilename << "-" << xml.errorString() << "@ offset" << xml.characterOffset(); 2727 } 2728 } 2729 2730 filesChecker.resolveContexts(); 2731 2732 if (!filesChecker.check()) { 2733 anyError = 7; 2734 } 2735 2736 // bail out if any problem was seen 2737 if (anyError) { 2738 return anyError; 2739 } 2740 2741 // create outfile, after all has worked! 2742 QFile outFile(app.arguments().at(1)); 2743 if (!outFile.open(QIODevice::WriteOnly | QIODevice::Truncate)) { 2744 return 9; 2745 } 2746 2747 // write out json 2748 outFile.write(QCborValue::fromVariant(QVariant(hls)).toCbor()); 2749 2750 // be done 2751 return 0; 2752 }