File indexing completed on 2024-05-12 04:02:16

0001 /*
0002     SPDX-FileCopyrightText: 2016 Volker Krause <vkrause@kde.org>
0003 
0004     SPDX-License-Identifier: MIT
0005 */
0006 
0007 #include "abstracthighlighter.h"
0008 #include "abstracthighlighter_p.h"
0009 #include "context_p.h"
0010 #include "definition_p.h"
0011 #include "foldingregion.h"
0012 #include "format.h"
0013 #include "ksyntaxhighlighting_logging.h"
0014 #include "repository.h"
0015 #include "repository_p.h"
0016 #include "rule_p.h"
0017 #include "state.h"
0018 #include "state_p.h"
0019 #include "theme.h"
0020 
0021 using namespace KSyntaxHighlighting;
0022 
0023 AbstractHighlighterPrivate::AbstractHighlighterPrivate()
0024 {
0025 }
0026 
0027 AbstractHighlighterPrivate::~AbstractHighlighterPrivate()
0028 {
0029 }
0030 
0031 void AbstractHighlighterPrivate::ensureDefinitionLoaded()
0032 {
0033     auto defData = DefinitionData::get(m_definition);
0034     if (Q_UNLIKELY(!m_definition.isValid() && defData->repo && !m_definition.name().isEmpty())) {
0035         qCDebug(Log) << "Definition became invalid, trying re-lookup.";
0036         m_definition = defData->repo->definitionForName(m_definition.name());
0037         defData = DefinitionData::get(m_definition);
0038     }
0039 
0040     if (Q_UNLIKELY(!defData->repo && !defData->fileName.isEmpty())) {
0041         qCCritical(Log) << "Repository got deleted while a highlighter is still active!";
0042     }
0043 
0044     if (m_definition.isValid()) {
0045         defData->load();
0046     }
0047 }
0048 
0049 AbstractHighlighter::AbstractHighlighter()
0050     : d_ptr(new AbstractHighlighterPrivate)
0051 {
0052 }
0053 
0054 AbstractHighlighter::AbstractHighlighter(AbstractHighlighterPrivate *dd)
0055     : d_ptr(dd)
0056 {
0057 }
0058 
0059 AbstractHighlighter::~AbstractHighlighter()
0060 {
0061     delete d_ptr;
0062 }
0063 
0064 Definition AbstractHighlighter::definition() const
0065 {
0066     return d_ptr->m_definition;
0067 }
0068 
0069 void AbstractHighlighter::setDefinition(const Definition &def)
0070 {
0071     Q_D(AbstractHighlighter);
0072     d->m_definition = def;
0073 }
0074 
0075 Theme AbstractHighlighter::theme() const
0076 {
0077     Q_D(const AbstractHighlighter);
0078     return d->m_theme;
0079 }
0080 
0081 void AbstractHighlighter::setTheme(const Theme &theme)
0082 {
0083     Q_D(AbstractHighlighter);
0084     d->m_theme = theme;
0085 }
0086 
0087 /**
0088  * Returns the index of the first non-space character. If the line is empty,
0089  * or only contains white spaces, text.size() is returned.
0090  */
0091 static inline int firstNonSpaceChar(QStringView text)
0092 {
0093     for (int i = 0; i < text.length(); ++i) {
0094         if (!text[i].isSpace()) {
0095             return i;
0096         }
0097     }
0098     return text.size();
0099 }
0100 
0101 State AbstractHighlighter::highlightLine(QStringView text, const State &state)
0102 {
0103     Q_D(AbstractHighlighter);
0104 
0105     // verify definition, deal with no highlighting being enabled
0106     d->ensureDefinitionLoaded();
0107     const auto defData = DefinitionData::get(d->m_definition);
0108     if (!d->m_definition.isValid() || !defData->isLoaded()) {
0109         applyFormat(0, text.size(), Format());
0110         return State();
0111     }
0112 
0113     // limit the cache for unification to some reasonable size
0114     // we use here at the moment 64k elements to not hog too much memory
0115     // and to make the clearing no big stall
0116     if (defData->unify.size() > 64 * 1024)
0117         defData->unify.clear();
0118 
0119     // verify/initialize state
0120     auto newState = state;
0121     auto stateData = StateData::get(newState);
0122     bool isSharedData = true;
0123     if (Q_UNLIKELY(stateData && stateData->m_defId != defData->id)) {
0124         qCDebug(Log) << "Got invalid state, resetting.";
0125         stateData = nullptr;
0126     }
0127     if (Q_UNLIKELY(!stateData)) {
0128         stateData = StateData::reset(newState);
0129         stateData->push(defData->initialContext(), QStringList());
0130         stateData->m_defId = defData->id;
0131         isSharedData = false;
0132     }
0133 
0134     // process empty lines
0135     if (Q_UNLIKELY(text.isEmpty())) {
0136         /**
0137          * handle line empty context switches
0138          * guard against endless loops
0139          * see https://phabricator.kde.org/D18509
0140          */
0141         int endlessLoopingCounter = 0;
0142         while (!stateData->topContext()->lineEmptyContext().isStay()) {
0143             /**
0144              * line empty context switches
0145              */
0146             if (!d->switchContext(stateData, stateData->topContext()->lineEmptyContext(), QStringList(), newState, isSharedData)) {
0147                 /**
0148                  * end when trying to #pop the main context
0149                  */
0150                 break;
0151             }
0152 
0153             if (stateData->topContext()->stopEmptyLineContextSwitchLoop()) {
0154                 break;
0155             }
0156 
0157             // guard against endless loops
0158             ++endlessLoopingCounter;
0159             if (endlessLoopingCounter > 1024) {
0160                 qCDebug(Log) << "Endless switch context transitions for line empty context, aborting highlighting of line.";
0161                 break;
0162             }
0163         }
0164         auto context = stateData->topContext();
0165         applyFormat(0, 0, context->attributeFormat());
0166         return *defData->unify.insert(newState);
0167     }
0168 
0169     auto &dynamicRegexpCache = RepositoryPrivate::get(defData->repo)->m_dynamicRegexpCache;
0170 
0171     int offset = 0;
0172     int beginOffset = 0;
0173     bool lineContinuation = false;
0174 
0175     /**
0176      * for expensive rules like regexes we do:
0177      *   - match them for the complete line, as this is faster than re-trying them at all positions
0178      *   - store the result of the first position that matches (or -1 for no match in the full line) in the skipOffsets hash for re-use
0179      *   - have capturesForLastDynamicSkipOffset as guard for dynamic regexes to invalidate the cache if they might have changed
0180      */
0181     QVarLengthArray<QPair<const Rule *, int>, 8> skipOffsets;
0182     QStringList capturesForLastDynamicSkipOffset;
0183 
0184     auto getSkipOffsetValue = [&skipOffsets](const Rule *r) -> int {
0185         auto i = std::find_if(skipOffsets.begin(), skipOffsets.end(), [r](const auto &v) {
0186             return v.first == r;
0187         });
0188         if (i == skipOffsets.end())
0189             return 0;
0190         return i->second;
0191     };
0192 
0193     auto insertSkipOffset = [&skipOffsets](const Rule *r, int i) {
0194         auto it = std::find_if(skipOffsets.begin(), skipOffsets.end(), [r](const auto &v) {
0195             return v.first == r;
0196         });
0197         if (it == skipOffsets.end()) {
0198             skipOffsets.push_back({r, i});
0199         } else {
0200             it->second = i;
0201         }
0202     };
0203 
0204     /**
0205      * current active format
0206      * stored as pointer to avoid deconstruction/constructions inside the internal loop
0207      * the pointers are stable, the formats are either in the contexts or rules
0208      */
0209     auto currentFormat = &stateData->topContext()->attributeFormat();
0210 
0211     /**
0212      * cached first non-space character, needs to be computed if < 0
0213      */
0214     int firstNonSpace = -1;
0215     int lastOffset = offset;
0216     int endlessLoopingCounter = 0;
0217     do {
0218         /**
0219          * avoid that we loop endless for some broken hl definitions
0220          */
0221         if (lastOffset == offset) {
0222             ++endlessLoopingCounter;
0223             if (endlessLoopingCounter > 1024) {
0224                 qCDebug(Log) << "Endless state transitions, aborting highlighting of line.";
0225                 break;
0226             }
0227         } else {
0228             // ensure we made progress, clear the endlessLoopingCounter
0229             Q_ASSERT(offset > lastOffset);
0230             lastOffset = offset;
0231             endlessLoopingCounter = 0;
0232         }
0233 
0234         /**
0235          * try to match all rules in the context in order of declaration in XML
0236          */
0237         bool isLookAhead = false;
0238         int newOffset = 0;
0239         const Format *newFormat = nullptr;
0240         for (const auto &ruleShared : stateData->topContext()->rules()) {
0241             auto rule = ruleShared.get();
0242             /**
0243              * filter out rules that require a specific column
0244              */
0245             if ((rule->requiredColumn() >= 0) && (rule->requiredColumn() != offset)) {
0246                 continue;
0247             }
0248 
0249             /**
0250              * filter out rules that only match for leading whitespace
0251              */
0252             if (rule->firstNonSpace()) {
0253                 /**
0254                  * compute the first non-space lazy
0255                  * avoids computing it for contexts without any such rules
0256                  */
0257                 if (firstNonSpace < 0) {
0258                     firstNonSpace = firstNonSpaceChar(text);
0259                 }
0260 
0261                 /**
0262                  * can we skip?
0263                  */
0264                 if (offset > firstNonSpace) {
0265                     continue;
0266                 }
0267             }
0268 
0269             int currentSkipOffset = 0;
0270             if (Q_UNLIKELY(rule->hasSkipOffset())) {
0271                 /**
0272                  * shall we skip application of this rule? two cases:
0273                  *   - rule can't match at all => currentSkipOffset < 0
0274                  *   - rule will only match for some higher offset => currentSkipOffset > offset
0275                  *
0276                  * we need to invalidate this if we are dynamic and have different captures then last time
0277                  */
0278                 if (rule->isDynamic() && (capturesForLastDynamicSkipOffset != stateData->topCaptures())) {
0279                     skipOffsets.clear();
0280                 } else {
0281                     currentSkipOffset = getSkipOffsetValue(rule);
0282                     if (currentSkipOffset < 0 || currentSkipOffset > offset) {
0283                         continue;
0284                     }
0285                 }
0286             }
0287 
0288             auto newResult = rule->doMatch(text, offset, stateData->topCaptures(), dynamicRegexpCache);
0289             newOffset = newResult.offset();
0290 
0291             /**
0292              * update skip offset if new one rules out any later match or is larger than current one
0293              */
0294             if (newResult.skipOffset() < 0 || newResult.skipOffset() > currentSkipOffset) {
0295                 insertSkipOffset(rule, newResult.skipOffset());
0296 
0297                 // remember new captures, if dynamic to enforce proper reset above on change!
0298                 if (rule->isDynamic()) {
0299                     capturesForLastDynamicSkipOffset = stateData->topCaptures();
0300                 }
0301             }
0302 
0303             if (newOffset <= offset) {
0304                 continue;
0305             }
0306 
0307             /**
0308              * apply folding.
0309              * special cases:
0310              *   - rule with endRegion + beginRegion: in endRegion, the length is 0
0311              *   - rule with lookAhead: length is 0
0312              */
0313             if (rule->endRegion().isValid() && rule->beginRegion().isValid()) {
0314                 applyFolding(offset, 0, rule->endRegion());
0315             } else if (rule->endRegion().isValid()) {
0316                 applyFolding(offset, rule->isLookAhead() ? 0 : newOffset - offset, rule->endRegion());
0317             }
0318             if (rule->beginRegion().isValid()) {
0319                 applyFolding(offset, rule->isLookAhead() ? 0 : newOffset - offset, rule->beginRegion());
0320             }
0321 
0322             if (rule->isLookAhead()) {
0323                 Q_ASSERT(!rule->context().isStay());
0324                 d->switchContext(stateData, rule->context(), std::move(newResult.captures()), newState, isSharedData);
0325                 isLookAhead = true;
0326                 break;
0327             }
0328 
0329             d->switchContext(stateData, rule->context(), std::move(newResult.captures()), newState, isSharedData);
0330             newFormat = rule->attributeFormat().isValid() ? &rule->attributeFormat() : &stateData->topContext()->attributeFormat();
0331             if (newOffset == text.size() && rule->isLineContinue()) {
0332                 lineContinuation = true;
0333             }
0334             break;
0335         }
0336         if (isLookAhead) {
0337             continue;
0338         }
0339 
0340         if (newOffset <= offset) { // no matching rule
0341             if (stateData->topContext()->fallthrough()) {
0342                 d->switchContext(stateData, stateData->topContext()->fallthroughContext(), QStringList(), newState, isSharedData);
0343                 continue;
0344             }
0345 
0346             newOffset = offset + 1;
0347             newFormat = &stateData->topContext()->attributeFormat();
0348         }
0349 
0350         /**
0351          * if we arrive here, some new format has to be set!
0352          */
0353         Q_ASSERT(newFormat);
0354 
0355         /**
0356          * on format change, apply the last one and switch to new one
0357          */
0358         if (newFormat != currentFormat && newFormat->id() != currentFormat->id()) {
0359             if (offset > 0) {
0360                 applyFormat(beginOffset, offset - beginOffset, *currentFormat);
0361             }
0362             beginOffset = offset;
0363             currentFormat = newFormat;
0364         }
0365 
0366         /**
0367          * we must have made progress if we arrive here!
0368          */
0369         Q_ASSERT(newOffset > offset);
0370         offset = newOffset;
0371 
0372     } while (offset < text.size());
0373 
0374     /**
0375      * apply format for remaining text, if any
0376      */
0377     if (beginOffset < offset) {
0378         applyFormat(beginOffset, text.size() - beginOffset, *currentFormat);
0379     }
0380 
0381     /**
0382      * handle line end context switches
0383      * guard against endless loops
0384      * see https://phabricator.kde.org/D18509
0385      */
0386     {
0387         int endlessLoopingCounter = 0;
0388         while (!stateData->topContext()->lineEndContext().isStay() && !lineContinuation) {
0389             if (!d->switchContext(stateData, stateData->topContext()->lineEndContext(), QStringList(), newState, isSharedData)) {
0390                 break;
0391             }
0392 
0393             // guard against endless loops
0394             ++endlessLoopingCounter;
0395             if (endlessLoopingCounter > 1024) {
0396                 qCDebug(Log) << "Endless switch context transitions for line end context, aborting highlighting of line.";
0397                 break;
0398             }
0399         }
0400     }
0401 
0402     return *defData->unify.insert(newState);
0403 }
0404 
0405 bool AbstractHighlighterPrivate::switchContext(StateData *&data, const ContextSwitch &contextSwitch, QStringList &&captures, State &state, bool &isSharedData)
0406 {
0407     const auto popCount = contextSwitch.popCount();
0408     const auto context = contextSwitch.context();
0409     if (popCount <= 0 && !context) {
0410         return true;
0411     }
0412 
0413     // a modified state must be detached before modification
0414     if (isSharedData) {
0415         data = StateData::detach(state);
0416         isSharedData = false;
0417     }
0418 
0419     // kill as many items as requested from the stack, will always keep the initial context alive!
0420     const bool initialContextSurvived = data->pop(popCount);
0421 
0422     // if we have a new context to add, push it
0423     // then we always "succeed"
0424     if (context) {
0425         data->push(context, std::move(captures));
0426         return true;
0427     }
0428 
0429     // else we abort, if we did try to pop the initial context
0430     return initialContextSurvived;
0431 }
0432 
0433 void AbstractHighlighter::applyFolding(int offset, int length, FoldingRegion region)
0434 {
0435     Q_UNUSED(offset);
0436     Q_UNUSED(length);
0437     Q_UNUSED(region);
0438 }