File indexing completed on 2024-05-12 15:50:05

0001 /*
0002     SPDX-FileCopyrightText: 2021 Jonathan Poelen <jonathan.poelen@gmail.com>
0003 
0004     SPDX-License-Identifier: MIT
0005 */
0006 
0007 #include "highlightingdata_p.hpp"
0008 #include "ksyntaxhighlighting_logging.h"
0009 #include "xml_p.h"
0010 
0011 #include <QXmlStreamReader>
0012 #include <QStringView>
0013 
0014 using namespace KSyntaxHighlighting;
0015 
0016 template<class Data, class... Args>
0017 static void initRuleData(Data &data, Args &&...args)
0018 {
0019     new (&data) Data{std::move(args)...};
0020 }
0021 
0022 static Qt::CaseSensitivity attrToCaseSensitivity(QStringView str)
0023 {
0024     return Xml::attrToBool(str) ? Qt::CaseInsensitive : Qt::CaseSensitive;
0025 }
0026 
0027 static HighlightingContextData::Rule::WordDelimiters loadAdditionalWordDelimiters(QXmlStreamReader &reader)
0028 {
0029     return HighlightingContextData::Rule::WordDelimiters{
0030         reader.attributes().value(QLatin1String("additionalDeliminator")).toString(),
0031         reader.attributes().value(QLatin1String("weakDeliminator")).toString(),
0032     };
0033 }
0034 
0035 static bool checkIsNotEmpty(QStringView str, const char *attrName, const QString &defName, QXmlStreamReader &reader)
0036 {
0037     if (!str.isEmpty()) {
0038         return true;
0039     }
0040 
0041     qCWarning(Log) << defName << "at line" << reader.lineNumber() << ": " << attrName << "attribute is empty";
0042     return false;
0043 }
0044 
0045 static bool checkIsChar(QStringView str, const char *attrName, const QString &defName, QXmlStreamReader &reader)
0046 {
0047     if (str.size() == 1) {
0048         return true;
0049     }
0050 
0051     qCWarning(Log) << defName << "at line" << reader.lineNumber() << ": " << attrName << "attribute must contain exactly 1 character";
0052     return false;
0053 }
0054 
0055 static bool loadRule(const QString &defName, HighlightingContextData::Rule &rule, QXmlStreamReader &reader)
0056 {
0057     using Rule = HighlightingContextData::Rule;
0058 
0059     QStringView name = reader.name();
0060     const auto attrs = reader.attributes();
0061     bool isIncludeRules = false;
0062 
0063     if (name == QLatin1String("DetectChar")) {
0064         const auto s = attrs.value(QLatin1String("char"));
0065         if (!checkIsChar(s, "char", defName, reader)) {
0066             return false;
0067         }
0068         const QChar c = s.at(0);
0069         const bool dynamic = Xml::attrToBool(attrs.value(QLatin1String("dynamic")));
0070 
0071         initRuleData(rule.data.detectChar, c, dynamic);
0072         rule.type = Rule::Type::DetectChar;
0073     } else if (name == QLatin1String("RegExpr")) {
0074         const auto pattern = attrs.value(QLatin1String("String"));
0075         if (!checkIsNotEmpty(pattern, "String", defName, reader)) {
0076             return false;
0077         }
0078 
0079         const auto isCaseInsensitive = attrToCaseSensitivity(attrs.value(QLatin1String("insensitive")));
0080         const auto isMinimal = Xml::attrToBool(attrs.value(QLatin1String("minimal")));
0081         const auto dynamic = Xml::attrToBool(attrs.value(QLatin1String("dynamic")));
0082 
0083         initRuleData(rule.data.regExpr, pattern.toString(), isCaseInsensitive, isMinimal, dynamic);
0084         rule.type = Rule::Type::RegExpr;
0085     } else if (name == QLatin1String("IncludeRules")) {
0086         const auto context = attrs.value(QLatin1String("context"));
0087         if (!checkIsNotEmpty(context, "context", defName, reader)) {
0088             return false;
0089         }
0090         const bool includeAttribute = Xml::attrToBool(attrs.value(QLatin1String("includeAttrib")));
0091 
0092         initRuleData(rule.data.includeRules, context.toString(), includeAttribute);
0093         rule.type = Rule::Type::IncludeRules;
0094         isIncludeRules = true;
0095     } else if (name == QLatin1String("Detect2Chars")) {
0096         const auto s1 = attrs.value(QLatin1String("char"));
0097         const auto s2 = attrs.value(QLatin1String("char1"));
0098         if (!checkIsChar(s1, "char", defName, reader)) {
0099             return false;
0100         }
0101         if (!checkIsChar(s2, "char1", defName, reader)) {
0102             return false;
0103         }
0104 
0105         initRuleData(rule.data.detect2Chars, s1.at(0), s2.at(0));
0106         rule.type = Rule::Type::Detect2Chars;
0107     } else if (name == QLatin1String("keyword")) {
0108         const auto s = attrs.value(QLatin1String("String"));
0109         if (!checkIsNotEmpty(s, "String", defName, reader)) {
0110             return false;
0111         }
0112         Qt::CaseSensitivity caseSensitivityOverride = Qt::CaseInsensitive;
0113         bool hasCaseSensitivityOverride = false;
0114 
0115         /**
0116          * we might overwrite the case sensitivity
0117          * then we need to init the list for lookup of that sensitivity setting
0118          */
0119         if (attrs.hasAttribute(QLatin1String("insensitive"))) {
0120             hasCaseSensitivityOverride = true;
0121             caseSensitivityOverride = attrToCaseSensitivity(attrs.value(QLatin1String("insensitive")));
0122         }
0123 
0124         initRuleData(rule.data.keyword, s.toString(), loadAdditionalWordDelimiters(reader), caseSensitivityOverride, hasCaseSensitivityOverride);
0125         rule.type = Rule::Type::Keyword;
0126     } else if (name == QLatin1String("DetectSpaces")) {
0127         rule.type = Rule::Type::DetectSpaces;
0128     } else if (name == QLatin1String("StringDetect")) {
0129         const auto string = attrs.value(QLatin1String("String"));
0130         if (!checkIsNotEmpty(string, "String", defName, reader)) {
0131             return false;
0132         }
0133         const auto caseSensitivity = attrToCaseSensitivity(attrs.value(QLatin1String("insensitive")));
0134         const auto dynamic = Xml::attrToBool(attrs.value(QLatin1String("dynamic")));
0135         const bool isSensitive = (caseSensitivity == Qt::CaseSensitive);
0136 
0137         // String can be replaced with DetectChar or AnyChar
0138         if (!dynamic && string.size() == 1) {
0139             QChar c = string.at(0);
0140             if (isSensitive || c.toLower() == c.toUpper()) {
0141                 initRuleData(rule.data.detectChar, c, dynamic);
0142                 rule.type = Rule::Type::DetectChar;
0143             } else {
0144                 initRuleData(rule.data.anyChar, c.toLower() + c.toUpper());
0145                 rule.type = Rule::Type::AnyChar;
0146             }
0147         }
0148         // String can be replaced with Detect2Chars
0149         else if (isSensitive && !dynamic && string.size() == 2) {
0150             initRuleData(rule.data.detect2Chars, string.at(0), string.at(1));
0151             rule.type = Rule::Type::Detect2Chars;
0152         } else {
0153             initRuleData(rule.data.stringDetect, string.toString(), caseSensitivity, dynamic);
0154             rule.type = Rule::Type::StringDetect;
0155         }
0156     } else if (name == QLatin1String("WordDetect")) {
0157         const auto word = attrs.value(QLatin1String("String"));
0158         if (!checkIsNotEmpty(word, "String", defName, reader)) {
0159             return false;
0160         }
0161         const auto caseSensitivity = attrToCaseSensitivity(attrs.value(QLatin1String("insensitive")));
0162 
0163         initRuleData(rule.data.wordDetect, word.toString(), loadAdditionalWordDelimiters(reader), caseSensitivity);
0164         rule.type = Rule::Type::WordDetect;
0165     } else if (name == QLatin1String("AnyChar")) {
0166         const auto chars = attrs.value(QLatin1String("String"));
0167         if (!checkIsNotEmpty(chars, "String", defName, reader)) {
0168             return false;
0169         }
0170 
0171         // AnyChar can be replaced with DetectChar
0172         if (chars.size() == 1) {
0173             initRuleData(rule.data.detectChar, chars.at(0), false);
0174             rule.type = Rule::Type::DetectChar;
0175         } else {
0176             initRuleData(rule.data.anyChar, chars.toString());
0177             rule.type = Rule::Type::AnyChar;
0178         }
0179     } else if (name == QLatin1String("DetectIdentifier")) {
0180         rule.type = Rule::Type::DetectIdentifier;
0181     } else if (name == QLatin1String("LineContinue")) {
0182         const auto s = attrs.value(QLatin1String("char"));
0183         const QChar c = s.isEmpty() ? QLatin1Char('\\') : s.at(0);
0184 
0185         initRuleData(rule.data.lineContinue, c);
0186         rule.type = Rule::Type::LineContinue;
0187     } else if (name == QLatin1String("Int")) {
0188         initRuleData(rule.data.detectInt, loadAdditionalWordDelimiters(reader));
0189         rule.type = Rule::Type::Int;
0190     } else if (name == QLatin1String("Float")) {
0191         initRuleData(rule.data.detectFloat, loadAdditionalWordDelimiters(reader));
0192         rule.type = Rule::Type::Float;
0193     } else if (name == QLatin1String("HlCStringChar")) {
0194         rule.type = Rule::Type::HlCStringChar;
0195     } else if (name == QLatin1String("RangeDetect")) {
0196         const auto s1 = attrs.value(QLatin1String("char"));
0197         const auto s2 = attrs.value(QLatin1String("char1"));
0198         if (!checkIsChar(s1, "char", defName, reader)) {
0199             return false;
0200         }
0201         if (!checkIsChar(s2, "char1", defName, reader)) {
0202             return false;
0203         }
0204 
0205         initRuleData(rule.data.rangeDetect, s1.at(0), s2.at(0));
0206         rule.type = Rule::Type::RangeDetect;
0207     } else if (name == QLatin1String("HlCHex")) {
0208         initRuleData(rule.data.hlCHex, loadAdditionalWordDelimiters(reader));
0209         rule.type = Rule::Type::HlCHex;
0210     } else if (name == QLatin1String("HlCChar")) {
0211         rule.type = Rule::Type::HlCChar;
0212     } else if (name == QLatin1String("HlCOct")) {
0213         initRuleData(rule.data.hlCOct, loadAdditionalWordDelimiters(reader));
0214         rule.type = Rule::Type::HlCOct;
0215     } else {
0216         qCWarning(Log) << "Unknown rule type:" << name;
0217         return false;
0218     }
0219 
0220     if (!isIncludeRules) {
0221         rule.common.contextName = attrs.value(QLatin1String("context")).toString();
0222         rule.common.beginRegionName = attrs.value(QLatin1String("beginRegion")).toString();
0223         rule.common.endRegionName = attrs.value(QLatin1String("endRegion")).toString();
0224         rule.common.firstNonSpace = Xml::attrToBool(attrs.value(QLatin1String("firstNonSpace")));
0225         rule.common.lookAhead = Xml::attrToBool(attrs.value(QLatin1String("lookAhead")));
0226         // attribute is only used when lookAhead is false
0227         if (!rule.common.lookAhead) {
0228             rule.common.attributeName = attrs.value(QLatin1String("attribute")).toString();
0229         }
0230         bool colOk = false;
0231         rule.common.column = attrs.value(QLatin1String("column")).toInt(&colOk);
0232         if (!colOk) {
0233             rule.common.column = -1;
0234         }
0235     }
0236 
0237     return true;
0238 }
0239 
0240 template<class Data1, class Data2, class Visitor>
0241 static void dataRuleVisit(HighlightingContextData::Rule::Type type, Data1 &&data1, Data2 &&data2, Visitor &&visitor)
0242 {
0243     using Rule = HighlightingContextData::Rule;
0244     using Type = Rule::Type;
0245     switch (type) {
0246     case Type::AnyChar:
0247         visitor(data1.anyChar, data2.anyChar);
0248         break;
0249     case Type::DetectChar:
0250         visitor(data1.detectChar, data2.detectChar);
0251         break;
0252     case Type::Detect2Chars:
0253         visitor(data1.detect2Chars, data2.detect2Chars);
0254         break;
0255     case Type::HlCOct:
0256         visitor(data1.hlCOct, data2.hlCOct);
0257         break;
0258     case Type::IncludeRules:
0259         visitor(data1.includeRules, data2.includeRules);
0260         break;
0261     case Type::Int:
0262         visitor(data1.detectInt, data2.detectInt);
0263         break;
0264     case Type::Keyword:
0265         visitor(data1.keyword, data2.keyword);
0266         break;
0267     case Type::LineContinue:
0268         visitor(data1.lineContinue, data2.lineContinue);
0269         break;
0270     case Type::RangeDetect:
0271         visitor(data1.rangeDetect, data2.rangeDetect);
0272         break;
0273     case Type::RegExpr:
0274         visitor(data1.regExpr, data2.regExpr);
0275         break;
0276     case Type::StringDetect:
0277         visitor(data1.stringDetect, data2.stringDetect);
0278         break;
0279     case Type::WordDetect:
0280         visitor(data1.wordDetect, data2.wordDetect);
0281         break;
0282     case Type::Float:
0283         visitor(data1.detectFloat, data2.detectFloat);
0284         break;
0285     case Type::HlCHex:
0286         visitor(data1.hlCHex, data2.hlCHex);
0287         break;
0288 
0289     case Type::HlCStringChar:
0290     case Type::DetectIdentifier:
0291     case Type::DetectSpaces:
0292     case Type::HlCChar:
0293     case Type::Unknown:;
0294     }
0295 }
0296 
0297 HighlightingContextData::Rule::Rule() noexcept = default;
0298 
0299 HighlightingContextData::Rule::Rule(Rule &&other) noexcept
0300     : common(std::move(other.common))
0301 {
0302     dataRuleVisit(other.type, data, other.data, [](auto &data1, auto &data2) {
0303         using Data = std::remove_reference_t<decltype(data1)>;
0304         new (&data1) Data(std::move(data2));
0305     });
0306     type = other.type;
0307 }
0308 
0309 HighlightingContextData::Rule::Rule(const Rule &other)
0310     : common(other.common)
0311 {
0312     dataRuleVisit(other.type, data, other.data, [](auto &data1, auto &data2) {
0313         using Data = std::remove_reference_t<decltype(data1)>;
0314         new (&data1) Data(data2);
0315     });
0316     type = other.type;
0317 }
0318 
0319 HighlightingContextData::Rule::~Rule()
0320 {
0321     dataRuleVisit(type, data, data, [](auto &data, auto &) {
0322         using Data = std::remove_reference_t<decltype(data)>;
0323         data.~Data();
0324     });
0325 }
0326 
0327 HighlightingContextData::ContextSwitch::ContextSwitch(QStringView str)
0328 {
0329     if (str.isEmpty() || str == QStringLiteral("#stay")) {
0330         return;
0331     }
0332 
0333     while (str.startsWith(QStringLiteral("#pop"))) {
0334         ++m_popCount;
0335         if (str.size() > 4 && str.at(4) == QLatin1Char('!')) {
0336             str = str.mid(5);
0337             break;
0338         }
0339         str = str.mid(4);
0340     }
0341 
0342     if (str.isEmpty()) {
0343         return;
0344     }
0345 
0346     m_contextAndDefName = str.toString();
0347     m_defNameIndex = str.indexOf(QStringLiteral("##"));
0348 }
0349 
0350 bool HighlightingContextData::ContextSwitch::isStay() const
0351 {
0352     return m_popCount == -1 && m_contextAndDefName.isEmpty();
0353 }
0354 
0355 QStringView HighlightingContextData::ContextSwitch::contextName() const
0356 {
0357     if (m_defNameIndex == -1) {
0358         return m_contextAndDefName;
0359     }
0360     return QStringView(m_contextAndDefName).left(m_defNameIndex);
0361 }
0362 
0363 QStringView HighlightingContextData::ContextSwitch::defName() const
0364 {
0365     if (m_defNameIndex == -1) {
0366         return QStringView();
0367     }
0368     return QStringView(m_contextAndDefName).mid(m_defNameIndex + 2);
0369 }
0370 
0371 void HighlightingContextData::load(const QString &defName, QXmlStreamReader &reader)
0372 {
0373     Q_ASSERT(reader.name() == QLatin1String("context"));
0374     Q_ASSERT(reader.tokenType() == QXmlStreamReader::StartElement);
0375 
0376     name = reader.attributes().value(QLatin1String("name")).toString();
0377     attribute = reader.attributes().value(QLatin1String("attribute")).toString();
0378     lineEndContext = reader.attributes().value(QLatin1String("lineEndContext")).toString();
0379     lineEmptyContext = reader.attributes().value(QLatin1String("lineEmptyContext")).toString();
0380     fallthroughContext = reader.attributes().value(QLatin1String("fallthroughContext")).toString();
0381     noIndentationBasedFolding = Xml::attrToBool(reader.attributes().value(QLatin1String("noIndentationBasedFolding")));
0382 
0383     rules.reserve(8);
0384 
0385     reader.readNext();
0386     while (!reader.atEnd()) {
0387         switch (reader.tokenType()) {
0388         case QXmlStreamReader::StartElement: {
0389             auto &rule = rules.emplace_back();
0390             if (!loadRule(defName, rule, reader)) {
0391                 rules.pop_back();
0392             }
0393             // be done with this rule, skip all subelements, e.g. no longer supported sub-rules
0394             reader.skipCurrentElement();
0395             reader.readNext();
0396             break;
0397         }
0398         case QXmlStreamReader::EndElement:
0399             return;
0400         default:
0401             reader.readNext();
0402             break;
0403         }
0404     }
0405 }