File indexing completed on 2024-05-12 15:50:05
0001 /* 0002 SPDX-FileCopyrightText: 2021 Jonathan Poelen <jonathan.poelen@gmail.com> 0003 0004 SPDX-License-Identifier: MIT 0005 */ 0006 0007 #include "highlightingdata_p.hpp" 0008 #include "ksyntaxhighlighting_logging.h" 0009 #include "xml_p.h" 0010 0011 #include <QXmlStreamReader> 0012 #include <QStringView> 0013 0014 using namespace KSyntaxHighlighting; 0015 0016 template<class Data, class... Args> 0017 static void initRuleData(Data &data, Args &&...args) 0018 { 0019 new (&data) Data{std::move(args)...}; 0020 } 0021 0022 static Qt::CaseSensitivity attrToCaseSensitivity(QStringView str) 0023 { 0024 return Xml::attrToBool(str) ? Qt::CaseInsensitive : Qt::CaseSensitive; 0025 } 0026 0027 static HighlightingContextData::Rule::WordDelimiters loadAdditionalWordDelimiters(QXmlStreamReader &reader) 0028 { 0029 return HighlightingContextData::Rule::WordDelimiters{ 0030 reader.attributes().value(QLatin1String("additionalDeliminator")).toString(), 0031 reader.attributes().value(QLatin1String("weakDeliminator")).toString(), 0032 }; 0033 } 0034 0035 static bool checkIsNotEmpty(QStringView str, const char *attrName, const QString &defName, QXmlStreamReader &reader) 0036 { 0037 if (!str.isEmpty()) { 0038 return true; 0039 } 0040 0041 qCWarning(Log) << defName << "at line" << reader.lineNumber() << ": " << attrName << "attribute is empty"; 0042 return false; 0043 } 0044 0045 static bool checkIsChar(QStringView str, const char *attrName, const QString &defName, QXmlStreamReader &reader) 0046 { 0047 if (str.size() == 1) { 0048 return true; 0049 } 0050 0051 qCWarning(Log) << defName << "at line" << reader.lineNumber() << ": " << attrName << "attribute must contain exactly 1 character"; 0052 return false; 0053 } 0054 0055 static bool loadRule(const QString &defName, HighlightingContextData::Rule &rule, QXmlStreamReader &reader) 0056 { 0057 using Rule = HighlightingContextData::Rule; 0058 0059 QStringView name = reader.name(); 0060 const auto attrs = reader.attributes(); 0061 bool isIncludeRules = false; 0062 0063 if (name == QLatin1String("DetectChar")) { 0064 const auto s = attrs.value(QLatin1String("char")); 0065 if (!checkIsChar(s, "char", defName, reader)) { 0066 return false; 0067 } 0068 const QChar c = s.at(0); 0069 const bool dynamic = Xml::attrToBool(attrs.value(QLatin1String("dynamic"))); 0070 0071 initRuleData(rule.data.detectChar, c, dynamic); 0072 rule.type = Rule::Type::DetectChar; 0073 } else if (name == QLatin1String("RegExpr")) { 0074 const auto pattern = attrs.value(QLatin1String("String")); 0075 if (!checkIsNotEmpty(pattern, "String", defName, reader)) { 0076 return false; 0077 } 0078 0079 const auto isCaseInsensitive = attrToCaseSensitivity(attrs.value(QLatin1String("insensitive"))); 0080 const auto isMinimal = Xml::attrToBool(attrs.value(QLatin1String("minimal"))); 0081 const auto dynamic = Xml::attrToBool(attrs.value(QLatin1String("dynamic"))); 0082 0083 initRuleData(rule.data.regExpr, pattern.toString(), isCaseInsensitive, isMinimal, dynamic); 0084 rule.type = Rule::Type::RegExpr; 0085 } else if (name == QLatin1String("IncludeRules")) { 0086 const auto context = attrs.value(QLatin1String("context")); 0087 if (!checkIsNotEmpty(context, "context", defName, reader)) { 0088 return false; 0089 } 0090 const bool includeAttribute = Xml::attrToBool(attrs.value(QLatin1String("includeAttrib"))); 0091 0092 initRuleData(rule.data.includeRules, context.toString(), includeAttribute); 0093 rule.type = Rule::Type::IncludeRules; 0094 isIncludeRules = true; 0095 } else if (name == QLatin1String("Detect2Chars")) { 0096 const auto s1 = attrs.value(QLatin1String("char")); 0097 const auto s2 = attrs.value(QLatin1String("char1")); 0098 if (!checkIsChar(s1, "char", defName, reader)) { 0099 return false; 0100 } 0101 if (!checkIsChar(s2, "char1", defName, reader)) { 0102 return false; 0103 } 0104 0105 initRuleData(rule.data.detect2Chars, s1.at(0), s2.at(0)); 0106 rule.type = Rule::Type::Detect2Chars; 0107 } else if (name == QLatin1String("keyword")) { 0108 const auto s = attrs.value(QLatin1String("String")); 0109 if (!checkIsNotEmpty(s, "String", defName, reader)) { 0110 return false; 0111 } 0112 Qt::CaseSensitivity caseSensitivityOverride = Qt::CaseInsensitive; 0113 bool hasCaseSensitivityOverride = false; 0114 0115 /** 0116 * we might overwrite the case sensitivity 0117 * then we need to init the list for lookup of that sensitivity setting 0118 */ 0119 if (attrs.hasAttribute(QLatin1String("insensitive"))) { 0120 hasCaseSensitivityOverride = true; 0121 caseSensitivityOverride = attrToCaseSensitivity(attrs.value(QLatin1String("insensitive"))); 0122 } 0123 0124 initRuleData(rule.data.keyword, s.toString(), loadAdditionalWordDelimiters(reader), caseSensitivityOverride, hasCaseSensitivityOverride); 0125 rule.type = Rule::Type::Keyword; 0126 } else if (name == QLatin1String("DetectSpaces")) { 0127 rule.type = Rule::Type::DetectSpaces; 0128 } else if (name == QLatin1String("StringDetect")) { 0129 const auto string = attrs.value(QLatin1String("String")); 0130 if (!checkIsNotEmpty(string, "String", defName, reader)) { 0131 return false; 0132 } 0133 const auto caseSensitivity = attrToCaseSensitivity(attrs.value(QLatin1String("insensitive"))); 0134 const auto dynamic = Xml::attrToBool(attrs.value(QLatin1String("dynamic"))); 0135 const bool isSensitive = (caseSensitivity == Qt::CaseSensitive); 0136 0137 // String can be replaced with DetectChar or AnyChar 0138 if (!dynamic && string.size() == 1) { 0139 QChar c = string.at(0); 0140 if (isSensitive || c.toLower() == c.toUpper()) { 0141 initRuleData(rule.data.detectChar, c, dynamic); 0142 rule.type = Rule::Type::DetectChar; 0143 } else { 0144 initRuleData(rule.data.anyChar, c.toLower() + c.toUpper()); 0145 rule.type = Rule::Type::AnyChar; 0146 } 0147 } 0148 // String can be replaced with Detect2Chars 0149 else if (isSensitive && !dynamic && string.size() == 2) { 0150 initRuleData(rule.data.detect2Chars, string.at(0), string.at(1)); 0151 rule.type = Rule::Type::Detect2Chars; 0152 } else { 0153 initRuleData(rule.data.stringDetect, string.toString(), caseSensitivity, dynamic); 0154 rule.type = Rule::Type::StringDetect; 0155 } 0156 } else if (name == QLatin1String("WordDetect")) { 0157 const auto word = attrs.value(QLatin1String("String")); 0158 if (!checkIsNotEmpty(word, "String", defName, reader)) { 0159 return false; 0160 } 0161 const auto caseSensitivity = attrToCaseSensitivity(attrs.value(QLatin1String("insensitive"))); 0162 0163 initRuleData(rule.data.wordDetect, word.toString(), loadAdditionalWordDelimiters(reader), caseSensitivity); 0164 rule.type = Rule::Type::WordDetect; 0165 } else if (name == QLatin1String("AnyChar")) { 0166 const auto chars = attrs.value(QLatin1String("String")); 0167 if (!checkIsNotEmpty(chars, "String", defName, reader)) { 0168 return false; 0169 } 0170 0171 // AnyChar can be replaced with DetectChar 0172 if (chars.size() == 1) { 0173 initRuleData(rule.data.detectChar, chars.at(0), false); 0174 rule.type = Rule::Type::DetectChar; 0175 } else { 0176 initRuleData(rule.data.anyChar, chars.toString()); 0177 rule.type = Rule::Type::AnyChar; 0178 } 0179 } else if (name == QLatin1String("DetectIdentifier")) { 0180 rule.type = Rule::Type::DetectIdentifier; 0181 } else if (name == QLatin1String("LineContinue")) { 0182 const auto s = attrs.value(QLatin1String("char")); 0183 const QChar c = s.isEmpty() ? QLatin1Char('\\') : s.at(0); 0184 0185 initRuleData(rule.data.lineContinue, c); 0186 rule.type = Rule::Type::LineContinue; 0187 } else if (name == QLatin1String("Int")) { 0188 initRuleData(rule.data.detectInt, loadAdditionalWordDelimiters(reader)); 0189 rule.type = Rule::Type::Int; 0190 } else if (name == QLatin1String("Float")) { 0191 initRuleData(rule.data.detectFloat, loadAdditionalWordDelimiters(reader)); 0192 rule.type = Rule::Type::Float; 0193 } else if (name == QLatin1String("HlCStringChar")) { 0194 rule.type = Rule::Type::HlCStringChar; 0195 } else if (name == QLatin1String("RangeDetect")) { 0196 const auto s1 = attrs.value(QLatin1String("char")); 0197 const auto s2 = attrs.value(QLatin1String("char1")); 0198 if (!checkIsChar(s1, "char", defName, reader)) { 0199 return false; 0200 } 0201 if (!checkIsChar(s2, "char1", defName, reader)) { 0202 return false; 0203 } 0204 0205 initRuleData(rule.data.rangeDetect, s1.at(0), s2.at(0)); 0206 rule.type = Rule::Type::RangeDetect; 0207 } else if (name == QLatin1String("HlCHex")) { 0208 initRuleData(rule.data.hlCHex, loadAdditionalWordDelimiters(reader)); 0209 rule.type = Rule::Type::HlCHex; 0210 } else if (name == QLatin1String("HlCChar")) { 0211 rule.type = Rule::Type::HlCChar; 0212 } else if (name == QLatin1String("HlCOct")) { 0213 initRuleData(rule.data.hlCOct, loadAdditionalWordDelimiters(reader)); 0214 rule.type = Rule::Type::HlCOct; 0215 } else { 0216 qCWarning(Log) << "Unknown rule type:" << name; 0217 return false; 0218 } 0219 0220 if (!isIncludeRules) { 0221 rule.common.contextName = attrs.value(QLatin1String("context")).toString(); 0222 rule.common.beginRegionName = attrs.value(QLatin1String("beginRegion")).toString(); 0223 rule.common.endRegionName = attrs.value(QLatin1String("endRegion")).toString(); 0224 rule.common.firstNonSpace = Xml::attrToBool(attrs.value(QLatin1String("firstNonSpace"))); 0225 rule.common.lookAhead = Xml::attrToBool(attrs.value(QLatin1String("lookAhead"))); 0226 // attribute is only used when lookAhead is false 0227 if (!rule.common.lookAhead) { 0228 rule.common.attributeName = attrs.value(QLatin1String("attribute")).toString(); 0229 } 0230 bool colOk = false; 0231 rule.common.column = attrs.value(QLatin1String("column")).toInt(&colOk); 0232 if (!colOk) { 0233 rule.common.column = -1; 0234 } 0235 } 0236 0237 return true; 0238 } 0239 0240 template<class Data1, class Data2, class Visitor> 0241 static void dataRuleVisit(HighlightingContextData::Rule::Type type, Data1 &&data1, Data2 &&data2, Visitor &&visitor) 0242 { 0243 using Rule = HighlightingContextData::Rule; 0244 using Type = Rule::Type; 0245 switch (type) { 0246 case Type::AnyChar: 0247 visitor(data1.anyChar, data2.anyChar); 0248 break; 0249 case Type::DetectChar: 0250 visitor(data1.detectChar, data2.detectChar); 0251 break; 0252 case Type::Detect2Chars: 0253 visitor(data1.detect2Chars, data2.detect2Chars); 0254 break; 0255 case Type::HlCOct: 0256 visitor(data1.hlCOct, data2.hlCOct); 0257 break; 0258 case Type::IncludeRules: 0259 visitor(data1.includeRules, data2.includeRules); 0260 break; 0261 case Type::Int: 0262 visitor(data1.detectInt, data2.detectInt); 0263 break; 0264 case Type::Keyword: 0265 visitor(data1.keyword, data2.keyword); 0266 break; 0267 case Type::LineContinue: 0268 visitor(data1.lineContinue, data2.lineContinue); 0269 break; 0270 case Type::RangeDetect: 0271 visitor(data1.rangeDetect, data2.rangeDetect); 0272 break; 0273 case Type::RegExpr: 0274 visitor(data1.regExpr, data2.regExpr); 0275 break; 0276 case Type::StringDetect: 0277 visitor(data1.stringDetect, data2.stringDetect); 0278 break; 0279 case Type::WordDetect: 0280 visitor(data1.wordDetect, data2.wordDetect); 0281 break; 0282 case Type::Float: 0283 visitor(data1.detectFloat, data2.detectFloat); 0284 break; 0285 case Type::HlCHex: 0286 visitor(data1.hlCHex, data2.hlCHex); 0287 break; 0288 0289 case Type::HlCStringChar: 0290 case Type::DetectIdentifier: 0291 case Type::DetectSpaces: 0292 case Type::HlCChar: 0293 case Type::Unknown:; 0294 } 0295 } 0296 0297 HighlightingContextData::Rule::Rule() noexcept = default; 0298 0299 HighlightingContextData::Rule::Rule(Rule &&other) noexcept 0300 : common(std::move(other.common)) 0301 { 0302 dataRuleVisit(other.type, data, other.data, [](auto &data1, auto &data2) { 0303 using Data = std::remove_reference_t<decltype(data1)>; 0304 new (&data1) Data(std::move(data2)); 0305 }); 0306 type = other.type; 0307 } 0308 0309 HighlightingContextData::Rule::Rule(const Rule &other) 0310 : common(other.common) 0311 { 0312 dataRuleVisit(other.type, data, other.data, [](auto &data1, auto &data2) { 0313 using Data = std::remove_reference_t<decltype(data1)>; 0314 new (&data1) Data(data2); 0315 }); 0316 type = other.type; 0317 } 0318 0319 HighlightingContextData::Rule::~Rule() 0320 { 0321 dataRuleVisit(type, data, data, [](auto &data, auto &) { 0322 using Data = std::remove_reference_t<decltype(data)>; 0323 data.~Data(); 0324 }); 0325 } 0326 0327 HighlightingContextData::ContextSwitch::ContextSwitch(QStringView str) 0328 { 0329 if (str.isEmpty() || str == QStringLiteral("#stay")) { 0330 return; 0331 } 0332 0333 while (str.startsWith(QStringLiteral("#pop"))) { 0334 ++m_popCount; 0335 if (str.size() > 4 && str.at(4) == QLatin1Char('!')) { 0336 str = str.mid(5); 0337 break; 0338 } 0339 str = str.mid(4); 0340 } 0341 0342 if (str.isEmpty()) { 0343 return; 0344 } 0345 0346 m_contextAndDefName = str.toString(); 0347 m_defNameIndex = str.indexOf(QStringLiteral("##")); 0348 } 0349 0350 bool HighlightingContextData::ContextSwitch::isStay() const 0351 { 0352 return m_popCount == -1 && m_contextAndDefName.isEmpty(); 0353 } 0354 0355 QStringView HighlightingContextData::ContextSwitch::contextName() const 0356 { 0357 if (m_defNameIndex == -1) { 0358 return m_contextAndDefName; 0359 } 0360 return QStringView(m_contextAndDefName).left(m_defNameIndex); 0361 } 0362 0363 QStringView HighlightingContextData::ContextSwitch::defName() const 0364 { 0365 if (m_defNameIndex == -1) { 0366 return QStringView(); 0367 } 0368 return QStringView(m_contextAndDefName).mid(m_defNameIndex + 2); 0369 } 0370 0371 void HighlightingContextData::load(const QString &defName, QXmlStreamReader &reader) 0372 { 0373 Q_ASSERT(reader.name() == QLatin1String("context")); 0374 Q_ASSERT(reader.tokenType() == QXmlStreamReader::StartElement); 0375 0376 name = reader.attributes().value(QLatin1String("name")).toString(); 0377 attribute = reader.attributes().value(QLatin1String("attribute")).toString(); 0378 lineEndContext = reader.attributes().value(QLatin1String("lineEndContext")).toString(); 0379 lineEmptyContext = reader.attributes().value(QLatin1String("lineEmptyContext")).toString(); 0380 fallthroughContext = reader.attributes().value(QLatin1String("fallthroughContext")).toString(); 0381 noIndentationBasedFolding = Xml::attrToBool(reader.attributes().value(QLatin1String("noIndentationBasedFolding"))); 0382 0383 rules.reserve(8); 0384 0385 reader.readNext(); 0386 while (!reader.atEnd()) { 0387 switch (reader.tokenType()) { 0388 case QXmlStreamReader::StartElement: { 0389 auto &rule = rules.emplace_back(); 0390 if (!loadRule(defName, rule, reader)) { 0391 rules.pop_back(); 0392 } 0393 // be done with this rule, skip all subelements, e.g. no longer supported sub-rules 0394 reader.skipCurrentElement(); 0395 reader.readNext(); 0396 break; 0397 } 0398 case QXmlStreamReader::EndElement: 0399 return; 0400 default: 0401 reader.readNext(); 0402 break; 0403 } 0404 } 0405 }