File indexing completed on 2025-02-09 04:28:37
0001 /* 0002 This file is part of the KTextTemplate library 0003 0004 SPDX-FileCopyrightText: 2009, 2010, 2011 Stephen Kelly <steveire@gmail.com> 0005 0006 SPDX-License-Identifier: LGPL-2.1-or-later 0007 0008 */ 0009 0010 #include "lexer_p.h" 0011 0012 using namespace KTextTemplate; 0013 0014 using TextProcessingState = State<TextProcessingMachine::Type>; 0015 using TextProcessingTransition = TextProcessingMachine::Transition; 0016 0017 using ChurningState = LexerObject<TextProcessingState, NullTest, MarksClearer>; 0018 using FinalizeTokenState = LexerObject<TextProcessingState, NullTest, TokenFinalizer>; 0019 using EofHandler = LexerObject<TextProcessingTransition, NullTest, TokenFinalizer>; 0020 using EofHandlerWithTrimming = LexerObject<TextProcessingTransition, NullTest, TokenFinalizerWithTrimming>; 0021 0022 using MaybeTemplateSyntaxHandler = CharacterTransition<'{'>; 0023 0024 using TagStartHandler = CharacterTransition<'%', MarkStartSyntax>; 0025 using CommentStartHandler = CharacterTransition<'#', MarkStartSyntax>; 0026 using TagEndHandler = CharacterTransition<'%'>; 0027 using CommentEndHandler = CharacterTransition<'#'>; 0028 using BeginValueHandler = CharacterTransition<'{', MarkStartSyntax>; 0029 using MaybeEndValueHandler = CharacterTransition<'}'>; 0030 using NewlineHandler = CharacterTransition<'\n', MarkNewline>; 0031 using EndTemplateSyntaxHandler = CharacterTransition<'}', MarkEndSyntax>; 0032 using NotEndTemplateSyntaxHandler = NegateCharacterTransition<'}'>; 0033 0034 using NotBeginTemplateSyntaxHandler = LexerObject<TextProcessingTransition, Negate<OrTest<CharacterTest<'{'>, OrTest<CharacterTest<'#'>, CharacterTest<'%'>>>>>; 0035 0036 using NotBeginTemplateSyntaxOrNewlineHandler = 0037 LexerObject<TextProcessingTransition, Negate<OrTest<CharacterTest<'{'>, OrTest<CharacterTest<'#'>, OrTest<CharacterTest<'%'>, CharacterTest<'\n'>>>>>>; 0038 0039 using NotTagCommentOrNewlineHandler = 0040 LexerObject<TextProcessingTransition, Negate<OrTest<CharacterTest<'#'>, OrTest<CharacterTest<'%'>, CharacterTest<'\n'>>>>>; 0041 0042 using NonWhitespaceLineTextHandler = LexerObject<TextProcessingTransition, Negate<OrTest<IsSpace, CharacterTest<'{'>>>>; 0043 0044 using WhitespaceNonNewlineHandler = LexerObject<TextProcessingTransition, AndTest<Negate<CharacterTest<'\n'>>, IsSpace>>; 0045 0046 using FinalizingLineTextHandler = LexerObject<TextProcessingTransition, Negate<OrTest<CharacterTest<'{'>, IsSpace>>, TokenFinalizer>; 0047 0048 using SyntaxBoundaryNewlineHandler = CharacterTransition<'\n', TokenFinalizerWithTrimmingAndNewline>; 0049 using SyntaxBoundaryHandler = CharacterTransition<'{', FinalizeAndMarkStartSyntax>; 0050 0051 template<typename Transition> 0052 void addTransition(TextProcessingState *source, Lexer *lexer, TextProcessingState *target) 0053 { 0054 auto tr = new Transition(lexer, source); 0055 tr->setTargetState(target); 0056 } 0057 0058 TextProcessingMachine *createMachine(Lexer *lexer, Lexer::TrimType type) 0059 { 0060 auto machine = new TextProcessingMachine; 0061 0062 auto notFinished = new TextProcessingState(machine); 0063 auto finished = new TextProcessingState(machine); 0064 machine->setInitialState(notFinished); 0065 0066 auto processingText = new ChurningState(lexer, notFinished); 0067 auto processingPostNewline = new TextProcessingState(notFinished); 0068 auto processingBeginTemplateSyntax = new TextProcessingState(notFinished); 0069 auto processingTag = new TextProcessingState(notFinished); 0070 auto processingComment = new TextProcessingState(notFinished); 0071 auto processingValue = new TextProcessingState(notFinished); 0072 auto maybeProcessingValue = new TextProcessingState(notFinished); 0073 auto processingEndTag = new TextProcessingState(notFinished); 0074 auto processingEndComment = new TextProcessingState(notFinished); 0075 auto processingEndValue = new TextProcessingState(notFinished); 0076 TextProcessingState *processingPostTemplateSyntax; 0077 0078 if (type == Lexer::SmartTrim) 0079 processingPostTemplateSyntax = new TextProcessingState(notFinished); 0080 else 0081 processingPostTemplateSyntax = new FinalizeTokenState(lexer, notFinished); 0082 auto processingPostTemplateSyntaxWhitespace = new TextProcessingState(notFinished); 0083 0084 if (type == Lexer::SmartTrim) 0085 notFinished->setInitialState(processingPostNewline); 0086 else 0087 notFinished->setInitialState(processingText); 0088 0089 if (type == Lexer::SmartTrim) { 0090 addTransition<NewlineHandler>(processingText, lexer, processingPostNewline); 0091 0092 addTransition<NewlineHandler>(processingPostNewline, lexer, processingPostNewline); 0093 addTransition<MaybeTemplateSyntaxHandler>(processingPostNewline, lexer, processingBeginTemplateSyntax); 0094 addTransition<NonWhitespaceLineTextHandler>(processingPostNewline, lexer, processingText); 0095 } 0096 addTransition<MaybeTemplateSyntaxHandler>(processingText, lexer, processingBeginTemplateSyntax); 0097 0098 addTransition<TagStartHandler>(processingBeginTemplateSyntax, lexer, processingTag); 0099 addTransition<CommentStartHandler>(processingBeginTemplateSyntax, lexer, processingComment); 0100 addTransition<BeginValueHandler>(processingBeginTemplateSyntax, lexer, maybeProcessingValue); 0101 0102 if (type == Lexer::SmartTrim) { 0103 addTransition<NotBeginTemplateSyntaxOrNewlineHandler>(processingBeginTemplateSyntax, lexer, processingText); 0104 addTransition<NewlineHandler>(processingBeginTemplateSyntax, lexer, processingPostNewline); 0105 } else { 0106 addTransition<NotBeginTemplateSyntaxHandler>(processingBeginTemplateSyntax, lexer, processingText); 0107 } 0108 0109 addTransition<NewlineHandler>(processingTag, lexer, type == Lexer::SmartTrim ? processingPostNewline : processingText); 0110 addTransition<TagEndHandler>(processingTag, lexer, processingEndTag); 0111 0112 addTransition<NewlineHandler>(processingComment, lexer, type == Lexer::SmartTrim ? processingPostNewline : processingText); 0113 addTransition<CommentEndHandler>(processingComment, lexer, processingEndComment); 0114 0115 addTransition<TagStartHandler>(maybeProcessingValue, lexer, processingTag); 0116 addTransition<CommentStartHandler>(maybeProcessingValue, lexer, processingComment); 0117 addTransition<NotTagCommentOrNewlineHandler>(maybeProcessingValue, lexer, processingValue); 0118 addTransition<NewlineHandler>(maybeProcessingValue, lexer, type == Lexer::SmartTrim ? processingPostNewline : processingText); 0119 0120 addTransition<NewlineHandler>(processingValue, lexer, type == Lexer::SmartTrim ? processingPostNewline : processingText); 0121 addTransition<MaybeEndValueHandler>(processingValue, lexer, processingEndValue); 0122 0123 addTransition<NewlineHandler>(processingEndTag, lexer, processingPostNewline); 0124 addTransition<NotEndTemplateSyntaxHandler>(processingEndTag, lexer, processingTag); 0125 addTransition<EndTemplateSyntaxHandler>(processingEndTag, lexer, processingPostTemplateSyntax); 0126 0127 addTransition<NewlineHandler>(processingEndComment, lexer, processingPostNewline); 0128 addTransition<NotEndTemplateSyntaxHandler>(processingEndComment, lexer, processingComment); 0129 addTransition<EndTemplateSyntaxHandler>(processingEndComment, lexer, processingPostTemplateSyntax); 0130 0131 addTransition<NewlineHandler>(processingEndValue, lexer, processingPostNewline); 0132 addTransition<NotEndTemplateSyntaxHandler>(processingEndValue, lexer, processingValue); 0133 addTransition<EndTemplateSyntaxHandler>(processingEndValue, lexer, processingPostTemplateSyntax); 0134 0135 if (type != Lexer::SmartTrim) { 0136 processingPostTemplateSyntax->setUnconditionalTransition(processingText); 0137 } else { 0138 addTransition<SyntaxBoundaryNewlineHandler>(processingPostTemplateSyntax, lexer, processingPostNewline); 0139 addTransition<WhitespaceNonNewlineHandler>(processingPostTemplateSyntax, lexer, processingPostTemplateSyntaxWhitespace); 0140 addTransition<FinalizingLineTextHandler>(processingPostTemplateSyntax, lexer, processingText); 0141 addTransition<SyntaxBoundaryHandler>(processingPostTemplateSyntax, lexer, processingBeginTemplateSyntax); 0142 0143 // NOTE: We only have to transition to this if there was whitespace 0144 // before the opening tag. Maybe store that in an external state property? 0145 // Actually, this may be a bug if we try to finalize with trimming and 0146 // there is no leading whitespace. 0147 addTransition<SyntaxBoundaryNewlineHandler>(processingPostTemplateSyntaxWhitespace, lexer, processingPostNewline); 0148 addTransition<FinalizingLineTextHandler>(processingPostTemplateSyntaxWhitespace, lexer, processingText); 0149 addTransition<SyntaxBoundaryHandler>(processingPostTemplateSyntaxWhitespace, lexer, processingBeginTemplateSyntax); 0150 } 0151 0152 { 0153 auto handler = new EofHandler(lexer, notFinished); 0154 handler->setTargetState(finished); 0155 notFinished->setEndTransition(handler); 0156 } 0157 0158 if (type == Lexer::SmartTrim) { 0159 { 0160 auto handler = new EofHandlerWithTrimming(lexer, processingPostTemplateSyntaxWhitespace); 0161 handler->setTargetState(finished); 0162 processingPostTemplateSyntaxWhitespace->setEndTransition(handler); 0163 } 0164 { 0165 auto handler = new EofHandlerWithTrimming(lexer, processingPostTemplateSyntax); 0166 handler->setTargetState(finished); 0167 processingPostTemplateSyntax->setEndTransition(handler); 0168 } 0169 } 0170 return machine; 0171 } 0172 0173 Lexer::Lexer(const QString &templateString) 0174 : m_templateString(templateString) 0175 { 0176 } 0177 0178 Lexer::~Lexer() = default; 0179 0180 void Lexer::clearMarkers() 0181 { 0182 m_startSyntaxPosition = -1; 0183 m_endSyntaxPosition = -1; 0184 m_newlinePosition = -1; 0185 } 0186 0187 void Lexer::reset() 0188 { 0189 m_tokenList.clear(); 0190 m_lineCount = 0; 0191 m_upto = 0; 0192 m_processedUpto = 0; 0193 clearMarkers(); 0194 } 0195 0196 QList<Token> Lexer::tokenize(TrimType type) 0197 { 0198 auto machine = createMachine(this, type); 0199 0200 machine->start(); 0201 0202 auto it = m_templateString.constBegin(); 0203 const auto end = m_templateString.constEnd(); 0204 0205 reset(); 0206 for (; it != end; ++it, ++m_upto) 0207 machine->processCharacter(it); 0208 0209 machine->finished(); 0210 0211 machine->stop(); 0212 0213 delete machine; 0214 0215 return m_tokenList; 0216 } 0217 0218 void Lexer::markStartSyntax() 0219 { 0220 m_startSyntaxPosition = m_upto; 0221 } 0222 0223 void Lexer::markEndSyntax() 0224 { 0225 m_endSyntaxPosition = m_upto + 1; 0226 } 0227 0228 void Lexer::markNewline() 0229 { 0230 m_newlinePosition = m_upto; 0231 ++m_lineCount; 0232 } 0233 0234 void Lexer::finalizeToken() 0235 { 0236 auto nextPosition = m_upto; 0237 const auto validSyntax = m_endSyntaxPosition > m_startSyntaxPosition && (m_startSyntaxPosition >= m_processedUpto); 0238 0239 if (validSyntax) { 0240 Q_ASSERT(m_startSyntaxPosition >= 0); 0241 nextPosition = m_startSyntaxPosition - 1; 0242 } 0243 finalizeToken(nextPosition, validSyntax); 0244 } 0245 0246 void Lexer::finalizeTokenWithTrimmedWhitespace() 0247 { 0248 auto nextPosition = m_upto; 0249 // We know this to be true because the state machine has already guaranteed 0250 // it. This method is only called from transition and state actions which 0251 // occur after valid syntax. 0252 // TODO Investigate performance and other implications of changing the state 0253 // machine to assure similar in finalizeToken() 0254 Q_ASSERT(m_endSyntaxPosition > m_startSyntaxPosition); 0255 0256 Q_ASSERT(m_startSyntaxPosition >= 0); 0257 if (m_newlinePosition >= 0 && m_newlinePosition >= m_processedUpto) 0258 nextPosition = qMin(m_startSyntaxPosition - 1, m_newlinePosition); 0259 else 0260 nextPosition = m_startSyntaxPosition - 1; 0261 finalizeToken(nextPosition, true); 0262 } 0263 0264 void Lexer::finalizeToken(int nextPosition, bool processSyntax) 0265 { 0266 { 0267 Token token; 0268 token.content = m_templateString.mid(m_processedUpto, nextPosition - m_processedUpto); 0269 token.tokenType = TextToken; 0270 token.linenumber = m_lineCount; 0271 m_tokenList.append(token); 0272 } 0273 0274 m_processedUpto = nextPosition; 0275 0276 if (!processSyntax) 0277 return; 0278 0279 m_processedUpto = m_endSyntaxPosition; 0280 0281 const auto differentiator = m_templateString.at(m_startSyntaxPosition); 0282 if (differentiator == QLatin1Char('#')) 0283 return; 0284 0285 Token syntaxToken; 0286 syntaxToken.content = m_templateString.mid(m_startSyntaxPosition + 1, m_endSyntaxPosition - m_startSyntaxPosition - 3).trimmed(); 0287 syntaxToken.linenumber = m_lineCount; 0288 0289 if (differentiator == QLatin1Char('{')) { 0290 syntaxToken.tokenType = VariableToken; 0291 } else { 0292 Q_ASSERT(differentiator == QLatin1Char('%')); 0293 syntaxToken.tokenType = BlockToken; 0294 } 0295 m_tokenList.append(syntaxToken); 0296 }