File indexing completed on 2024-05-12 15:50:01

0001 /*
0002     SPDX-FileCopyrightText: 2016 Volker Krause <vkrause@kde.org>
0003 
0004     SPDX-License-Identifier: MIT
0005 */
0006 
0007 #include "abstracthighlighter.h"
0008 #include "abstracthighlighter_p.h"
0009 #include "context_p.h"
0010 #include "definition_p.h"
0011 #include "foldingregion.h"
0012 #include "format.h"
0013 #include "ksyntaxhighlighting_logging.h"
0014 #include "repository.h"
0015 #include "rule_p.h"
0016 #include "state.h"
0017 #include "state_p.h"
0018 #include "theme.h"
0019 
0020 using namespace KSyntaxHighlighting;
0021 
0022 AbstractHighlighterPrivate::AbstractHighlighterPrivate()
0023 {
0024 }
0025 
0026 AbstractHighlighterPrivate::~AbstractHighlighterPrivate()
0027 {
0028 }
0029 
0030 void AbstractHighlighterPrivate::ensureDefinitionLoaded()
0031 {
0032     auto defData = DefinitionData::get(m_definition);
0033     if (Q_UNLIKELY(!m_definition.isValid() && defData->repo && !m_definition.name().isEmpty())) {
0034         qCDebug(Log) << "Definition became invalid, trying re-lookup.";
0035         m_definition = defData->repo->definitionForName(m_definition.name());
0036         defData = DefinitionData::get(m_definition);
0037     }
0038 
0039     if (Q_UNLIKELY(!defData->repo && !defData->fileName.isEmpty())) {
0040         qCCritical(Log) << "Repository got deleted while a highlighter is still active!";
0041     }
0042 
0043     if (m_definition.isValid()) {
0044         defData->load();
0045     }
0046 }
0047 
0048 AbstractHighlighter::AbstractHighlighter()
0049     : d_ptr(new AbstractHighlighterPrivate)
0050 {
0051 }
0052 
0053 AbstractHighlighter::AbstractHighlighter(AbstractHighlighterPrivate *dd)
0054     : d_ptr(dd)
0055 {
0056 }
0057 
0058 AbstractHighlighter::~AbstractHighlighter()
0059 {
0060     delete d_ptr;
0061 }
0062 
0063 Definition AbstractHighlighter::definition() const
0064 {
0065     return d_ptr->m_definition;
0066 }
0067 
0068 void AbstractHighlighter::setDefinition(const Definition &def)
0069 {
0070     Q_D(AbstractHighlighter);
0071     d->m_definition = def;
0072 }
0073 
0074 Theme AbstractHighlighter::theme() const
0075 {
0076     Q_D(const AbstractHighlighter);
0077     return d->m_theme;
0078 }
0079 
0080 void AbstractHighlighter::setTheme(const Theme &theme)
0081 {
0082     Q_D(AbstractHighlighter);
0083     d->m_theme = theme;
0084 }
0085 
0086 /**
0087  * Returns the index of the first non-space character. If the line is empty,
0088  * or only contains white spaces, text.size() is returned.
0089  */
0090 static inline int firstNonSpaceChar(QStringView text)
0091 {
0092     for (int i = 0; i < text.length(); ++i) {
0093         if (!text[i].isSpace()) {
0094             return i;
0095         }
0096     }
0097     return text.size();
0098 }
0099 
0100 #if KSYNTAXHIGHLIGHTING_BUILD_DEPRECATED_SINCE(5, 87)
0101 State AbstractHighlighter::highlightLine(const QString &text, const State &state)
0102 {
0103     return highlightLine(QStringView(text), state);
0104 }
0105 #endif
0106 
0107 State AbstractHighlighter::highlightLine(QStringView text, const State &state)
0108 {
0109     Q_D(AbstractHighlighter);
0110 
0111     // verify definition, deal with no highlighting being enabled
0112     d->ensureDefinitionLoaded();
0113     const auto defData = DefinitionData::get(d->m_definition);
0114     if (!d->m_definition.isValid() || !defData->isLoaded()) {
0115         applyFormat(0, text.size(), Format());
0116         return State();
0117     }
0118 
0119     // verify/initialize state
0120     auto newState = state;
0121     auto stateData = StateData::get(newState);
0122     const auto definitionId = DefinitionData::get(d->m_definition)->id;
0123     if (!stateData->isEmpty() && stateData->m_defId != definitionId) {
0124         qCDebug(Log) << "Got invalid state, resetting.";
0125         stateData->clear();
0126     }
0127     if (stateData->isEmpty()) {
0128         stateData->push(defData->initialContext(), QStringList());
0129         stateData->m_defId = definitionId;
0130     }
0131 
0132     // process empty lines
0133     if (text.isEmpty()) {
0134         /**
0135          * handle line empty context switches
0136          * guard against endless loops
0137          * see https://phabricator.kde.org/D18509
0138          */
0139         int endlessLoopingCounter = 0;
0140         while (!stateData->topContext()->lineEmptyContext().isStay() || !stateData->topContext()->lineEndContext().isStay()) {
0141             /**
0142              * line empty context switches
0143              */
0144             if (!stateData->topContext()->lineEmptyContext().isStay()) {
0145                 if (!d->switchContext(stateData, stateData->topContext()->lineEmptyContext(), QStringList())) {
0146                     /**
0147                      * end when trying to #pop the main context
0148                      */
0149                     break;
0150                 }
0151                 /**
0152                  * line end context switches only when lineEmptyContext is #stay. This avoids
0153                  * skipping empty lines after a line continuation character (see bug 405903)
0154                  */
0155             } else if (!d->switchContext(stateData, stateData->topContext()->lineEndContext(), QStringList())) {
0156                 break;
0157             }
0158 
0159             // guard against endless loops
0160             ++endlessLoopingCounter;
0161             if (endlessLoopingCounter > 1024) {
0162                 qCDebug(Log) << "Endless switch context transitions for line empty context, aborting highlighting of line.";
0163                 break;
0164             }
0165         }
0166         auto context = stateData->topContext();
0167         applyFormat(0, 0, context->attributeFormat());
0168         return newState;
0169     }
0170 
0171     int offset = 0;
0172     int beginOffset = 0;
0173     bool lineContinuation = false;
0174 
0175     /**
0176      * for expensive rules like regexes we do:
0177      *   - match them for the complete line, as this is faster than re-trying them at all positions
0178      *   - store the result of the first position that matches (or -1 for no match in the full line) in the skipOffsets hash for re-use
0179      *   - have capturesForLastDynamicSkipOffset as guard for dynamic regexes to invalidate the cache if they might have changed
0180      */
0181     QVarLengthArray<QPair<Rule *, int>, 8> skipOffsets;
0182     QStringList capturesForLastDynamicSkipOffset;
0183 
0184     auto getSkipOffsetValue = [&skipOffsets](Rule *r) -> int {
0185         auto i = std::find_if(skipOffsets.begin(), skipOffsets.end(), [r](const auto &v) {
0186             return v.first == r;
0187         });
0188         if (i == skipOffsets.end())
0189             return 0;
0190         return i->second;
0191     };
0192 
0193     auto insertSkipOffset = [&skipOffsets](Rule *r, int i) {
0194         auto it = std::find_if(skipOffsets.begin(), skipOffsets.end(), [r](const auto &v) {
0195             return v.first == r;
0196         });
0197         if (it == skipOffsets.end()) {
0198             skipOffsets.push_back({r, i});
0199         } else {
0200             it->second = i;
0201         }
0202     };
0203 
0204     /**
0205      * current active format
0206      * stored as pointer to avoid deconstruction/constructions inside the internal loop
0207      * the pointers are stable, the formats are either in the contexts or rules
0208      */
0209     auto currentFormat = &stateData->topContext()->attributeFormat();
0210 
0211     /**
0212      * cached first non-space character, needs to be computed if < 0
0213      */
0214     int firstNonSpace = -1;
0215     int lastOffset = offset;
0216     int endlessLoopingCounter = 0;
0217     do {
0218         /**
0219          * avoid that we loop endless for some broken hl definitions
0220          */
0221         if (lastOffset == offset) {
0222             ++endlessLoopingCounter;
0223             if (endlessLoopingCounter > 1024) {
0224                 qCDebug(Log) << "Endless state transitions, aborting highlighting of line.";
0225                 break;
0226             }
0227         } else {
0228             // ensure we made progress, clear the endlessLoopingCounter
0229             Q_ASSERT(offset > lastOffset);
0230             lastOffset = offset;
0231             endlessLoopingCounter = 0;
0232         }
0233 
0234         /**
0235          * try to match all rules in the context in order of declaration in XML
0236          */
0237         bool isLookAhead = false;
0238         int newOffset = 0;
0239         const Format *newFormat = nullptr;
0240         for (const auto &rule : stateData->topContext()->rules()) {
0241             /**
0242              * filter out rules that require a specific column
0243              */
0244             if ((rule->requiredColumn() >= 0) && (rule->requiredColumn() != offset)) {
0245                 continue;
0246             }
0247 
0248             /**
0249              * filter out rules that only match for leading whitespace
0250              */
0251             if (rule->firstNonSpace()) {
0252                 /**
0253                  * compute the first non-space lazy
0254                  * avoids computing it for contexts without any such rules
0255                  */
0256                 if (firstNonSpace < 0) {
0257                     firstNonSpace = firstNonSpaceChar(text);
0258                 }
0259 
0260                 /**
0261                  * can we skip?
0262                  */
0263                 if (offset > firstNonSpace) {
0264                     continue;
0265                 }
0266             }
0267 
0268             /**
0269              * shall we skip application of this rule? two cases:
0270              *   - rule can't match at all => currentSkipOffset < 0
0271              *   - rule will only match for some higher offset => currentSkipOffset > offset
0272              *
0273              * we need to invalidate this if we are dynamic and have different captures then last time
0274              */
0275             if (rule->isDynamic() && (capturesForLastDynamicSkipOffset != stateData->topCaptures())) {
0276                 skipOffsets.clear();
0277             }
0278             const auto currentSkipOffset = getSkipOffsetValue(rule.get());
0279             if (currentSkipOffset < 0 || currentSkipOffset > offset) {
0280                 continue;
0281             }
0282 
0283             const auto newResult = rule->doMatch(text, offset, stateData->topCaptures());
0284             newOffset = newResult.offset();
0285 
0286             /**
0287              * update skip offset if new one rules out any later match or is larger than current one
0288              */
0289             if (newResult.skipOffset() < 0 || newResult.skipOffset() > currentSkipOffset) {
0290                 insertSkipOffset(rule.get(), newResult.skipOffset());
0291 
0292                 // remember new captures, if dynamic to enforce proper reset above on change!
0293                 if (rule->isDynamic()) {
0294                     capturesForLastDynamicSkipOffset = stateData->topCaptures();
0295                 }
0296             }
0297 
0298             if (newOffset <= offset) {
0299                 continue;
0300             }
0301 
0302             /**
0303              * apply folding.
0304              * special cases:
0305              *   - rule with endRegion + beginRegion: in endRegion, the length is 0
0306              *   - rule with lookAhead: length is 0
0307              */
0308             if (rule->endRegion().isValid() && rule->beginRegion().isValid()) {
0309                 applyFolding(offset, 0, rule->endRegion());
0310             } else if (rule->endRegion().isValid()) {
0311                 applyFolding(offset, rule->isLookAhead() ? 0 : newOffset - offset, rule->endRegion());
0312             }
0313             if (rule->beginRegion().isValid()) {
0314                 applyFolding(offset, rule->isLookAhead() ? 0 : newOffset - offset, rule->beginRegion());
0315             }
0316 
0317             if (rule->isLookAhead()) {
0318                 Q_ASSERT(!rule->context().isStay());
0319                 d->switchContext(stateData, rule->context(), newResult.captures());
0320                 isLookAhead = true;
0321                 break;
0322             }
0323 
0324             d->switchContext(stateData, rule->context(), newResult.captures());
0325             newFormat = rule->attributeFormat().isValid() ? &rule->attributeFormat() : &stateData->topContext()->attributeFormat();
0326             if (newOffset == text.size() && rule->isLineContinue()) {
0327                 lineContinuation = true;
0328             }
0329             break;
0330         }
0331         if (isLookAhead) {
0332             continue;
0333         }
0334 
0335         if (newOffset <= offset) { // no matching rule
0336             if (stateData->topContext()->fallthrough()) {
0337                 d->switchContext(stateData, stateData->topContext()->fallthroughContext(), QStringList());
0338                 continue;
0339             }
0340 
0341             newOffset = offset + 1;
0342             newFormat = &stateData->topContext()->attributeFormat();
0343         }
0344 
0345         /**
0346          * if we arrive here, some new format has to be set!
0347          */
0348         Q_ASSERT(newFormat);
0349 
0350         /**
0351          * on format change, apply the last one and switch to new one
0352          */
0353         if (newFormat != currentFormat && newFormat->id() != currentFormat->id()) {
0354             if (offset > 0) {
0355                 applyFormat(beginOffset, offset - beginOffset, *currentFormat);
0356             }
0357             beginOffset = offset;
0358             currentFormat = newFormat;
0359         }
0360 
0361         /**
0362          * we must have made progress if we arrive here!
0363          */
0364         Q_ASSERT(newOffset > offset);
0365         offset = newOffset;
0366 
0367     } while (offset < text.size());
0368 
0369     /**
0370      * apply format for remaining text, if any
0371      */
0372     if (beginOffset < offset) {
0373         applyFormat(beginOffset, text.size() - beginOffset, *currentFormat);
0374     }
0375 
0376     /**
0377      * handle line end context switches
0378      * guard against endless loops
0379      * see https://phabricator.kde.org/D18509
0380      */
0381     {
0382         int endlessLoopingCounter = 0;
0383         while (!stateData->topContext()->lineEndContext().isStay() && !lineContinuation) {
0384             if (!d->switchContext(stateData, stateData->topContext()->lineEndContext(), QStringList())) {
0385                 break;
0386             }
0387 
0388             // guard against endless loops
0389             ++endlessLoopingCounter;
0390             if (endlessLoopingCounter > 1024) {
0391                 qCDebug(Log) << "Endless switch context transitions for line end context, aborting highlighting of line.";
0392                 break;
0393             }
0394         }
0395     }
0396 
0397     return newState;
0398 }
0399 
0400 bool AbstractHighlighterPrivate::switchContext(StateData *data, const ContextSwitch &contextSwitch, const QStringList &captures)
0401 {
0402     // kill as many items as requested from the stack, will always keep the initial context alive!
0403     const bool initialContextSurvived = data->pop(contextSwitch.popCount());
0404 
0405     // if we have a new context to add, push it
0406     // then we always "succeed"
0407     if (contextSwitch.context()) {
0408         data->push(contextSwitch.context(), captures);
0409         return true;
0410     }
0411 
0412     // else we abort, if we did try to pop the initial context
0413     return initialContextSurvived;
0414 }
0415 
0416 void AbstractHighlighter::applyFolding(int offset, int length, FoldingRegion region)
0417 {
0418     Q_UNUSED(offset);
0419     Q_UNUSED(length);
0420     Q_UNUSED(region);
0421 }