File indexing completed on 2024-12-01 03:32:22

0001 /*
0002     SPDX-FileCopyrightText: 2003-2006 Cies Breijs <cies AT kde DOT nl>
0003 
0004     SPDX-License-Identifier: GPL-2.0-or-later
0005 */
0006 
0007 
0008 #ifndef _TOKENIZER_H_
0009 #define _TOKENIZER_H_
0010 
0011 #include <QChar>
0012 
0013 
0014 #include "token.h"
0015 #include "translator.h"
0016 
0017 
0018 /**
0019  * @short Generates Token objects from a QString using the Translator.
0020  *
0021  * The Tokenizer reads, one-by-one, characters from a QString (unicode text).
0022  * By trying to translate the tokens it tries to find out the type of
0023  * the tokens, since KTurtle code can be in many different languages
0024  * the programming commands are not known on forehand.
0025  *
0026  * @author Cies Breijs
0027  */
0028 class Tokenizer
0029 {
0030     public:
0031         /**
0032          * @short Constructor. Initialses a Tokenizer.
0033          * Does nothing special. @see initialize().
0034          */
0035         Tokenizer() {}
0036 
0037         /** @short Destructor. Does nothing special. */
0038         ~Tokenizer() {}
0039 
0040         /**
0041          * @short Initializes (resets) the Tokenizer
0042          * Use this method to reset the Tokenizer.
0043          * @param inStream the QString that the Tokenizer will tokenize
0044          */
0045         void initialize(const QString& inStream);
0046 
0047         /**
0048          * Reads a bunch of characters of the input stream and tries to
0049          * recognize them as a certain token type, and returns a Token of that type.
0050          * If nothing is recognized a Token of the type Unknown is returned.
0051          * The singleton Translator object is used to determine the type.
0052          * @returns a pointer to a newly created token as read from the input stream
0053          */
0054         Token* getToken();
0055 
0056 
0057     private:
0058         QChar getChar();    // gets a the next QChar and sets the row and col accordingly
0059         void  ungetChar();  // undoes a getChar() call
0060         static bool isWordChar(const QChar& c);  // convenience functions
0061         static bool isBreak(const QChar& c);
0062         static bool isSpace(const QChar& c);
0063         static bool isTab(const QChar& c);
0064 
0065         Translator *translator;
0066         QString     inputString;
0067 
0068         int at, row, col, prevCol;
0069 
0070         bool atEnd;  // true if the QString is fully tokenized
0071 };
0072 
0073 
0074 #endif  // _TOKENIZER_H_