File indexing completed on 2024-05-12 04:02:16
0001 /* 0002 SPDX-FileCopyrightText: 2016 Volker Krause <vkrause@kde.org> 0003 0004 SPDX-License-Identifier: MIT 0005 */ 0006 0007 #include "abstracthighlighter.h" 0008 #include "abstracthighlighter_p.h" 0009 #include "context_p.h" 0010 #include "definition_p.h" 0011 #include "foldingregion.h" 0012 #include "format.h" 0013 #include "ksyntaxhighlighting_logging.h" 0014 #include "repository.h" 0015 #include "repository_p.h" 0016 #include "rule_p.h" 0017 #include "state.h" 0018 #include "state_p.h" 0019 #include "theme.h" 0020 0021 using namespace KSyntaxHighlighting; 0022 0023 AbstractHighlighterPrivate::AbstractHighlighterPrivate() 0024 { 0025 } 0026 0027 AbstractHighlighterPrivate::~AbstractHighlighterPrivate() 0028 { 0029 } 0030 0031 void AbstractHighlighterPrivate::ensureDefinitionLoaded() 0032 { 0033 auto defData = DefinitionData::get(m_definition); 0034 if (Q_UNLIKELY(!m_definition.isValid() && defData->repo && !m_definition.name().isEmpty())) { 0035 qCDebug(Log) << "Definition became invalid, trying re-lookup."; 0036 m_definition = defData->repo->definitionForName(m_definition.name()); 0037 defData = DefinitionData::get(m_definition); 0038 } 0039 0040 if (Q_UNLIKELY(!defData->repo && !defData->fileName.isEmpty())) { 0041 qCCritical(Log) << "Repository got deleted while a highlighter is still active!"; 0042 } 0043 0044 if (m_definition.isValid()) { 0045 defData->load(); 0046 } 0047 } 0048 0049 AbstractHighlighter::AbstractHighlighter() 0050 : d_ptr(new AbstractHighlighterPrivate) 0051 { 0052 } 0053 0054 AbstractHighlighter::AbstractHighlighter(AbstractHighlighterPrivate *dd) 0055 : d_ptr(dd) 0056 { 0057 } 0058 0059 AbstractHighlighter::~AbstractHighlighter() 0060 { 0061 delete d_ptr; 0062 } 0063 0064 Definition AbstractHighlighter::definition() const 0065 { 0066 return d_ptr->m_definition; 0067 } 0068 0069 void AbstractHighlighter::setDefinition(const Definition &def) 0070 { 0071 Q_D(AbstractHighlighter); 0072 d->m_definition = def; 0073 } 0074 0075 Theme AbstractHighlighter::theme() const 0076 { 0077 Q_D(const AbstractHighlighter); 0078 return d->m_theme; 0079 } 0080 0081 void AbstractHighlighter::setTheme(const Theme &theme) 0082 { 0083 Q_D(AbstractHighlighter); 0084 d->m_theme = theme; 0085 } 0086 0087 /** 0088 * Returns the index of the first non-space character. If the line is empty, 0089 * or only contains white spaces, text.size() is returned. 0090 */ 0091 static inline int firstNonSpaceChar(QStringView text) 0092 { 0093 for (int i = 0; i < text.length(); ++i) { 0094 if (!text[i].isSpace()) { 0095 return i; 0096 } 0097 } 0098 return text.size(); 0099 } 0100 0101 State AbstractHighlighter::highlightLine(QStringView text, const State &state) 0102 { 0103 Q_D(AbstractHighlighter); 0104 0105 // verify definition, deal with no highlighting being enabled 0106 d->ensureDefinitionLoaded(); 0107 const auto defData = DefinitionData::get(d->m_definition); 0108 if (!d->m_definition.isValid() || !defData->isLoaded()) { 0109 applyFormat(0, text.size(), Format()); 0110 return State(); 0111 } 0112 0113 // limit the cache for unification to some reasonable size 0114 // we use here at the moment 64k elements to not hog too much memory 0115 // and to make the clearing no big stall 0116 if (defData->unify.size() > 64 * 1024) 0117 defData->unify.clear(); 0118 0119 // verify/initialize state 0120 auto newState = state; 0121 auto stateData = StateData::get(newState); 0122 bool isSharedData = true; 0123 if (Q_UNLIKELY(stateData && stateData->m_defId != defData->id)) { 0124 qCDebug(Log) << "Got invalid state, resetting."; 0125 stateData = nullptr; 0126 } 0127 if (Q_UNLIKELY(!stateData)) { 0128 stateData = StateData::reset(newState); 0129 stateData->push(defData->initialContext(), QStringList()); 0130 stateData->m_defId = defData->id; 0131 isSharedData = false; 0132 } 0133 0134 // process empty lines 0135 if (Q_UNLIKELY(text.isEmpty())) { 0136 /** 0137 * handle line empty context switches 0138 * guard against endless loops 0139 * see https://phabricator.kde.org/D18509 0140 */ 0141 int endlessLoopingCounter = 0; 0142 while (!stateData->topContext()->lineEmptyContext().isStay()) { 0143 /** 0144 * line empty context switches 0145 */ 0146 if (!d->switchContext(stateData, stateData->topContext()->lineEmptyContext(), QStringList(), newState, isSharedData)) { 0147 /** 0148 * end when trying to #pop the main context 0149 */ 0150 break; 0151 } 0152 0153 if (stateData->topContext()->stopEmptyLineContextSwitchLoop()) { 0154 break; 0155 } 0156 0157 // guard against endless loops 0158 ++endlessLoopingCounter; 0159 if (endlessLoopingCounter > 1024) { 0160 qCDebug(Log) << "Endless switch context transitions for line empty context, aborting highlighting of line."; 0161 break; 0162 } 0163 } 0164 auto context = stateData->topContext(); 0165 applyFormat(0, 0, context->attributeFormat()); 0166 return *defData->unify.insert(newState); 0167 } 0168 0169 auto &dynamicRegexpCache = RepositoryPrivate::get(defData->repo)->m_dynamicRegexpCache; 0170 0171 int offset = 0; 0172 int beginOffset = 0; 0173 bool lineContinuation = false; 0174 0175 /** 0176 * for expensive rules like regexes we do: 0177 * - match them for the complete line, as this is faster than re-trying them at all positions 0178 * - store the result of the first position that matches (or -1 for no match in the full line) in the skipOffsets hash for re-use 0179 * - have capturesForLastDynamicSkipOffset as guard for dynamic regexes to invalidate the cache if they might have changed 0180 */ 0181 QVarLengthArray<QPair<const Rule *, int>, 8> skipOffsets; 0182 QStringList capturesForLastDynamicSkipOffset; 0183 0184 auto getSkipOffsetValue = [&skipOffsets](const Rule *r) -> int { 0185 auto i = std::find_if(skipOffsets.begin(), skipOffsets.end(), [r](const auto &v) { 0186 return v.first == r; 0187 }); 0188 if (i == skipOffsets.end()) 0189 return 0; 0190 return i->second; 0191 }; 0192 0193 auto insertSkipOffset = [&skipOffsets](const Rule *r, int i) { 0194 auto it = std::find_if(skipOffsets.begin(), skipOffsets.end(), [r](const auto &v) { 0195 return v.first == r; 0196 }); 0197 if (it == skipOffsets.end()) { 0198 skipOffsets.push_back({r, i}); 0199 } else { 0200 it->second = i; 0201 } 0202 }; 0203 0204 /** 0205 * current active format 0206 * stored as pointer to avoid deconstruction/constructions inside the internal loop 0207 * the pointers are stable, the formats are either in the contexts or rules 0208 */ 0209 auto currentFormat = &stateData->topContext()->attributeFormat(); 0210 0211 /** 0212 * cached first non-space character, needs to be computed if < 0 0213 */ 0214 int firstNonSpace = -1; 0215 int lastOffset = offset; 0216 int endlessLoopingCounter = 0; 0217 do { 0218 /** 0219 * avoid that we loop endless for some broken hl definitions 0220 */ 0221 if (lastOffset == offset) { 0222 ++endlessLoopingCounter; 0223 if (endlessLoopingCounter > 1024) { 0224 qCDebug(Log) << "Endless state transitions, aborting highlighting of line."; 0225 break; 0226 } 0227 } else { 0228 // ensure we made progress, clear the endlessLoopingCounter 0229 Q_ASSERT(offset > lastOffset); 0230 lastOffset = offset; 0231 endlessLoopingCounter = 0; 0232 } 0233 0234 /** 0235 * try to match all rules in the context in order of declaration in XML 0236 */ 0237 bool isLookAhead = false; 0238 int newOffset = 0; 0239 const Format *newFormat = nullptr; 0240 for (const auto &ruleShared : stateData->topContext()->rules()) { 0241 auto rule = ruleShared.get(); 0242 /** 0243 * filter out rules that require a specific column 0244 */ 0245 if ((rule->requiredColumn() >= 0) && (rule->requiredColumn() != offset)) { 0246 continue; 0247 } 0248 0249 /** 0250 * filter out rules that only match for leading whitespace 0251 */ 0252 if (rule->firstNonSpace()) { 0253 /** 0254 * compute the first non-space lazy 0255 * avoids computing it for contexts without any such rules 0256 */ 0257 if (firstNonSpace < 0) { 0258 firstNonSpace = firstNonSpaceChar(text); 0259 } 0260 0261 /** 0262 * can we skip? 0263 */ 0264 if (offset > firstNonSpace) { 0265 continue; 0266 } 0267 } 0268 0269 int currentSkipOffset = 0; 0270 if (Q_UNLIKELY(rule->hasSkipOffset())) { 0271 /** 0272 * shall we skip application of this rule? two cases: 0273 * - rule can't match at all => currentSkipOffset < 0 0274 * - rule will only match for some higher offset => currentSkipOffset > offset 0275 * 0276 * we need to invalidate this if we are dynamic and have different captures then last time 0277 */ 0278 if (rule->isDynamic() && (capturesForLastDynamicSkipOffset != stateData->topCaptures())) { 0279 skipOffsets.clear(); 0280 } else { 0281 currentSkipOffset = getSkipOffsetValue(rule); 0282 if (currentSkipOffset < 0 || currentSkipOffset > offset) { 0283 continue; 0284 } 0285 } 0286 } 0287 0288 auto newResult = rule->doMatch(text, offset, stateData->topCaptures(), dynamicRegexpCache); 0289 newOffset = newResult.offset(); 0290 0291 /** 0292 * update skip offset if new one rules out any later match or is larger than current one 0293 */ 0294 if (newResult.skipOffset() < 0 || newResult.skipOffset() > currentSkipOffset) { 0295 insertSkipOffset(rule, newResult.skipOffset()); 0296 0297 // remember new captures, if dynamic to enforce proper reset above on change! 0298 if (rule->isDynamic()) { 0299 capturesForLastDynamicSkipOffset = stateData->topCaptures(); 0300 } 0301 } 0302 0303 if (newOffset <= offset) { 0304 continue; 0305 } 0306 0307 /** 0308 * apply folding. 0309 * special cases: 0310 * - rule with endRegion + beginRegion: in endRegion, the length is 0 0311 * - rule with lookAhead: length is 0 0312 */ 0313 if (rule->endRegion().isValid() && rule->beginRegion().isValid()) { 0314 applyFolding(offset, 0, rule->endRegion()); 0315 } else if (rule->endRegion().isValid()) { 0316 applyFolding(offset, rule->isLookAhead() ? 0 : newOffset - offset, rule->endRegion()); 0317 } 0318 if (rule->beginRegion().isValid()) { 0319 applyFolding(offset, rule->isLookAhead() ? 0 : newOffset - offset, rule->beginRegion()); 0320 } 0321 0322 if (rule->isLookAhead()) { 0323 Q_ASSERT(!rule->context().isStay()); 0324 d->switchContext(stateData, rule->context(), std::move(newResult.captures()), newState, isSharedData); 0325 isLookAhead = true; 0326 break; 0327 } 0328 0329 d->switchContext(stateData, rule->context(), std::move(newResult.captures()), newState, isSharedData); 0330 newFormat = rule->attributeFormat().isValid() ? &rule->attributeFormat() : &stateData->topContext()->attributeFormat(); 0331 if (newOffset == text.size() && rule->isLineContinue()) { 0332 lineContinuation = true; 0333 } 0334 break; 0335 } 0336 if (isLookAhead) { 0337 continue; 0338 } 0339 0340 if (newOffset <= offset) { // no matching rule 0341 if (stateData->topContext()->fallthrough()) { 0342 d->switchContext(stateData, stateData->topContext()->fallthroughContext(), QStringList(), newState, isSharedData); 0343 continue; 0344 } 0345 0346 newOffset = offset + 1; 0347 newFormat = &stateData->topContext()->attributeFormat(); 0348 } 0349 0350 /** 0351 * if we arrive here, some new format has to be set! 0352 */ 0353 Q_ASSERT(newFormat); 0354 0355 /** 0356 * on format change, apply the last one and switch to new one 0357 */ 0358 if (newFormat != currentFormat && newFormat->id() != currentFormat->id()) { 0359 if (offset > 0) { 0360 applyFormat(beginOffset, offset - beginOffset, *currentFormat); 0361 } 0362 beginOffset = offset; 0363 currentFormat = newFormat; 0364 } 0365 0366 /** 0367 * we must have made progress if we arrive here! 0368 */ 0369 Q_ASSERT(newOffset > offset); 0370 offset = newOffset; 0371 0372 } while (offset < text.size()); 0373 0374 /** 0375 * apply format for remaining text, if any 0376 */ 0377 if (beginOffset < offset) { 0378 applyFormat(beginOffset, text.size() - beginOffset, *currentFormat); 0379 } 0380 0381 /** 0382 * handle line end context switches 0383 * guard against endless loops 0384 * see https://phabricator.kde.org/D18509 0385 */ 0386 { 0387 int endlessLoopingCounter = 0; 0388 while (!stateData->topContext()->lineEndContext().isStay() && !lineContinuation) { 0389 if (!d->switchContext(stateData, stateData->topContext()->lineEndContext(), QStringList(), newState, isSharedData)) { 0390 break; 0391 } 0392 0393 // guard against endless loops 0394 ++endlessLoopingCounter; 0395 if (endlessLoopingCounter > 1024) { 0396 qCDebug(Log) << "Endless switch context transitions for line end context, aborting highlighting of line."; 0397 break; 0398 } 0399 } 0400 } 0401 0402 return *defData->unify.insert(newState); 0403 } 0404 0405 bool AbstractHighlighterPrivate::switchContext(StateData *&data, const ContextSwitch &contextSwitch, QStringList &&captures, State &state, bool &isSharedData) 0406 { 0407 const auto popCount = contextSwitch.popCount(); 0408 const auto context = contextSwitch.context(); 0409 if (popCount <= 0 && !context) { 0410 return true; 0411 } 0412 0413 // a modified state must be detached before modification 0414 if (isSharedData) { 0415 data = StateData::detach(state); 0416 isSharedData = false; 0417 } 0418 0419 // kill as many items as requested from the stack, will always keep the initial context alive! 0420 const bool initialContextSurvived = data->pop(popCount); 0421 0422 // if we have a new context to add, push it 0423 // then we always "succeed" 0424 if (context) { 0425 data->push(context, std::move(captures)); 0426 return true; 0427 } 0428 0429 // else we abort, if we did try to pop the initial context 0430 return initialContextSurvived; 0431 } 0432 0433 void AbstractHighlighter::applyFolding(int offset, int length, FoldingRegion region) 0434 { 0435 Q_UNUSED(offset); 0436 Q_UNUSED(length); 0437 Q_UNUSED(region); 0438 }