examples/fact/factlexer.cpp

0001 /*
0002  * Copyright 2005, 2006 Jakob Petsovits <jpetso@gmx.at>
0003  * Based on QMake Parser Copyright 2006 Andreas Pakulat <apaku@gmx.de>
0004  *
0005  * This program is free software; you can redistribute it and/or
0006  * modify it under the terms of the GNU General Public License
0007  * as published by the Free Software Foundation; either version 2
0008  * of the License, or (at your option) any later version.
0009  *
0010  * This program is distributed in the hope that it will be useful,
0011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
0012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0013  * GNU General Public License for more details.
0014  *
0015  * You should have received a copy of the GNU General Public License
0016  * along with this program; if not, write to the Free Software
0017  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
0018  * 02110-1301, USA.
0019  */
0020
0021 #include "factlexer.h"
0022
0023 #include <QString>
0024 #include <QStringList>
0025 #include <QDebug>
0026 #include "factparser.h"
0027 #include <kdev-pg-location-table.h>
0028 #include <kdev-pg-token-stream.h>
0029
0030 using namespace fact;
0031
0032 Lexer::Lexer( Parser* _parser, const QString& content ):
0033         m_content( content ), m_parser( _parser ),
0034         m_curpos( 0 ), m_contentSize( m_content.size() ),
0035         m_tokenBegin( 0 ), m_tokenEnd( 0 )
0036 {
0037     pushState( ErrorState );
0038     pushState( DefaultState );
0039 }
0040
0041 int Lexer::state() const
0042 {
0043     return mState.top();
0044 }
0045
0046 void Lexer::pushState( int state )
0047 {
0048     mState.push( state );
0049 }
0050
0051 void Lexer::popState()
0052 {
0053     mState.pop();
0054 }
0055
0056 int Lexer::nextTokenKind()
0057 {
0058     int token = Parser::Token_INVALID;
0059     if ( m_curpos >= m_contentSize )
0060     {
0061         return 0;
0062     }
0063     QChar* it = m_content.data();
0064     it += m_curpos;
0065
0066     // Ignore whitespace
0067     while ( m_curpos < m_contentSize && ( it->isSpace() ) )
0068     {
0069         if (it->unicode() == '\n') {
0070             createNewline(m_curpos);
0071         }
0072         ++it;
0073         ++m_curpos;
0074     }
0075
0076     switch ( state() )
0077     {
0078         case DefaultState:
0079             m_tokenBegin = m_curpos;
0080             if ( m_curpos < m_contentSize )
0081             {
0082                 if ( it->isLetter() )
0083                 {
0084                     QString identifier;
0085                     while ( m_curpos < m_contentSize && ( it->isLetter() || it->isDigit() ) && !it->isSpace() ) {
0086                         identifier.append(*it);
0087                         ++it;
0088                         ++m_curpos;
0089                     }
0090                     m_curpos--;
0091                     QChar* it1 = m_content.data();
0092                     it1 += m_curpos;
0093
0094                     if ( identifier == "if" ) {
0095                         token = Parser::Token_IF;
0096                     } else if ( identifier == "else" ) {
0097                         token = Parser::Token_ELSE;
0098                     } else if ( identifier == "var" ) {
0099                         token = Parser::Token_VAR;
0100                     } else if ( identifier == "function" ) {
0101                         token = Parser::Token_FUNCTION;
0102                     } else if ( identifier == "return" ) {
0103                         token = Parser::Token_RETURN;
0104                     } else {
0105                         token = Parser::Token_IDENTIFIER;
0106                     }
0107                 }
0108                 else if ( it->isDigit() )
0109                 {
0110                     token = Parser::Token_NUMBER;
0111                     while ( m_curpos < m_contentSize && ( it->isDigit() ) ) {
0112                         ++it;
0113                         ++m_curpos;
0114                     }
0115                     --m_curpos;
0116                 }
0117                 else
0118                 {
0119                     switch ( it->unicode() )
0120                     {
0121                         case ',':
0122                             token = Parser::Token_COMMA;
0123                             break;
0124                         case ';':
0125                             token = Parser::Token_SEMICOLON;
0126                             break;
0127                         case '(':
0128                             token = Parser::Token_LPAREN;
0129                             break;
0130                         case ')':
0131                             token = Parser::Token_RPAREN;
0132                             break;
0133                         case '{':
0134                             token = Parser::Token_LBRACE;
0135                             break;
0136                         case '}':
0137                             token = Parser::Token_RBRACE;
0138                             break;
0139                         case '=':
0140                         {
0141                             QChar* c2 = m_curpos < m_contentSize ? it + 1 : 0 ;
0142                             if ( c2 && c2->unicode() == '=' )
0143                             {
0144                                 m_curpos++;
0145                                 token = Parser::Token_EQUAL;
0146                             } else {
0147                                 token = Parser::Token_ASSIGN;
0148                             }
0149                             break;
0150                         }
0151                         case '*':
0152                             token = Parser::Token_STAR;
0153                             break;
0154                         case '-':
0155                             token = Parser::Token_MINUS;
0156                             break;
0157                         default:
0158                             break;
0159                     }
0160                 }
0161             }
0162             break;
0163         default:
0164             token = Parser::Token_INVALID;
0165             break;
0166     }
0167     if ( m_curpos >= m_contentSize )
0168     {
0169         return 0;
0170     }
0171     m_tokenEnd = m_curpos;
0172     m_curpos++;
0173     return token;
0174 }
0175
0176 qint64 Lexer::tokenBegin() const
0177 {
0178   return m_tokenBegin;
0179 }
0180
0181 qint64 Lexer::tokenEnd() const
0182 {
0183   return m_tokenEnd;
0184 }
0185
0186 void Lexer::createNewline( int pos )
0187 {
0188     if( m_parser )
0189         m_parser->tokenStream->locationTable()->newline( pos );
0190 }
0191 // kate: space-indent on; indent-width 4; tab-width 4; replace-tabs on