File indexing completed on 2024-12-01 03:32:22
0001 /* 0002 SPDX-FileCopyrightText: 2003-2006 Cies Breijs <cies AT kde DOT nl> 0003 0004 SPDX-License-Identifier: GPL-2.0-or-later 0005 */ 0006 0007 0008 #ifndef _TOKENIZER_H_ 0009 #define _TOKENIZER_H_ 0010 0011 #include <QChar> 0012 0013 0014 #include "token.h" 0015 #include "translator.h" 0016 0017 0018 /** 0019 * @short Generates Token objects from a QString using the Translator. 0020 * 0021 * The Tokenizer reads, one-by-one, characters from a QString (unicode text). 0022 * By trying to translate the tokens it tries to find out the type of 0023 * the tokens, since KTurtle code can be in many different languages 0024 * the programming commands are not known on forehand. 0025 * 0026 * @author Cies Breijs 0027 */ 0028 class Tokenizer 0029 { 0030 public: 0031 /** 0032 * @short Constructor. Initialses a Tokenizer. 0033 * Does nothing special. @see initialize(). 0034 */ 0035 Tokenizer() {} 0036 0037 /** @short Destructor. Does nothing special. */ 0038 ~Tokenizer() {} 0039 0040 /** 0041 * @short Initializes (resets) the Tokenizer 0042 * Use this method to reset the Tokenizer. 0043 * @param inStream the QString that the Tokenizer will tokenize 0044 */ 0045 void initialize(const QString& inStream); 0046 0047 /** 0048 * Reads a bunch of characters of the input stream and tries to 0049 * recognize them as a certain token type, and returns a Token of that type. 0050 * If nothing is recognized a Token of the type Unknown is returned. 0051 * The singleton Translator object is used to determine the type. 0052 * @returns a pointer to a newly created token as read from the input stream 0053 */ 0054 Token* getToken(); 0055 0056 0057 private: 0058 QChar getChar(); // gets a the next QChar and sets the row and col accordingly 0059 void ungetChar(); // undoes a getChar() call 0060 static bool isWordChar(const QChar& c); // convenience functions 0061 static bool isBreak(const QChar& c); 0062 static bool isSpace(const QChar& c); 0063 static bool isTab(const QChar& c); 0064 0065 Translator *translator; 0066 QString inputString; 0067 0068 int at, row, col, prevCol; 0069 0070 bool atEnd; // true if the QString is fully tokenized 0071 }; 0072 0073 0074 #endif // _TOKENIZER_H_