File indexing completed on 2024-04-28 11:39:42

0001 /*
0002  * tokenizer.h - Copyright 2005 Maksim Orlovich <maksim@kde.org>
0003  *
0004  * Redistribution and use in source and binary forms, with or without
0005  * modification, are permitted provided that the following conditions
0006  * are met:
0007  *
0008  * 1. Redistributions of source code must retain the above copyright
0009  *    notice, this list of conditions and the following disclaimer.
0010  * 2. Redistributions in binary form must reproduce the above copyright
0011  *    notice, this list of conditions and the following disclaimer in the
0012  *    documentation and/or other materials provided with the distribution.
0013  *
0014  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
0015  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
0016  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
0017  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
0018  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
0019  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
0020  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
0021  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0022  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
0023  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0024  */
0025 #ifndef TOKENIZER_H
0026 #define TOKENIZER_H
0027 
0028 #include <QSet>
0029 #include <QString>
0030 #include <QHash>
0031 
0032 #include "step.h"
0033 #include "path.h"
0034 #include "predicate.h"
0035 #include "expression.h"
0036 #include "util.h"
0037 #include "parser.h"
0038 
0039 namespace khtml
0040 {
0041 namespace XPath
0042 {
0043 
0044 struct Token {
0045     int     type;
0046     QString value;
0047     int     intValue; //0 if not set
0048     bool    hasString;
0049 
0050     Token(int _type): type(_type), intValue(0), hasString(false) {}
0051     Token(QString _value): type(ERROR + 1), value(_value), intValue(0), hasString(true) {}
0052     Token(int _type, QString _value): type(_type), value(_value), intValue(0), hasString(true) {}
0053     Token(int _type, int     _value): type(_type), intValue(_value), hasString(false) {}
0054 };
0055 
0056 class Tokenizer
0057 {
0058 private:
0059     int m_nextPos;
0060     QString m_data;
0061     int m_lastTokenType;
0062 
0063     static QHash<QString, Step::AxisType> *s_axisNamesDict;
0064     static QSet<QString> *s_nodeTypeNamesDict;
0065 
0066     enum XMLCat {
0067         NameStart,
0068         NameCont,
0069         NotPartOfName
0070     };
0071 
0072     XMLCat charCat(QChar aChar);
0073 
0074     bool isAxisName(QString name, Step::AxisType *type = nullptr);
0075     bool isNodeTypeName(QString name);
0076     bool isOperatorContext();
0077 
0078     void  skipWS();
0079     Token makeTokenAndAdvance(int code, int advance = 1);
0080     Token makeIntTokenAndAdvance(int code, int val, int advance = 1);
0081     char  peekAheadHelper();
0082     char  peekCurHelper();
0083 
0084     Token lexString();
0085     Token lexNumber();
0086     Token lexNCName();
0087     Token lexQName();
0088 
0089     Token nextTokenInternal();
0090     Tokenizer();
0091     Tokenizer(const Tokenizer &rhs);                  // disabled
0092     Tokenizer &operator=(const Tokenizer &rhs);       // disabled
0093     ~Tokenizer();
0094 public:
0095     static Tokenizer &self();
0096 
0097     void reset(QString);
0098     Token nextToken();
0099 };
0100 
0101 // Interface to the parser
0102 int khtmlxpathyylex();
0103 void khtmlxpathyyerror(const char *str);
0104 void initTokenizer(const DOM::DOMString &string);
0105 
0106 } // namespace XPath
0107 
0108 } // namespace khtml
0109 
0110 #endif