File indexing completed on 2024-05-12 04:39:43

0001 /*
0002     SPDX-FileCopyrightText: 2004 Roberto Raggi <roberto@kdevelop.org>
0003 
0004     SPDX-License-Identifier: LGPL-2.0-or-later
0005 */
0006 
0007 #include "milexer.h"
0008 #include "tokens.h"
0009 #include <cctype>
0010 #include <iostream>
0011 
0012 using namespace KDevMI::MI;
0013 
0014 bool MILexer::s_initialized = false;
0015 scan_fun_ptr MILexer::s_scan_table[];
0016 
0017 
0018 MILexer::MILexer()
0019 {
0020     if (!s_initialized)
0021         setupScanTable();
0022 }
0023 
0024 MILexer::~MILexer()
0025 {
0026 }
0027 
0028 void MILexer::setupScanTable()
0029 {
0030     s_initialized = true;
0031 
0032     for (int i=0; i<128; ++i) {
0033         switch (i) {
0034         case '\n':
0035             s_scan_table[i] = &MILexer::scanNewline;
0036             break;
0037 
0038         case '"':
0039             s_scan_table[i] = &MILexer::scanStringLiteral;
0040             break;
0041 
0042         default:
0043             if (isspace(i))
0044                 s_scan_table[i] = &MILexer::scanWhiteSpaces;
0045             else if (isalpha(i) || i == '_')
0046                 s_scan_table[i] = &MILexer::scanIdentifier;
0047             else if (isdigit(i))
0048                 s_scan_table[i] = &MILexer::scanNumberLiteral;
0049             else
0050                 s_scan_table[i] = &MILexer::scanChar;
0051         }
0052     }
0053 
0054     s_scan_table[128] = &MILexer::scanUnicodeChar;
0055 }
0056 
0057 /*
0058 
0059     m_firstToken = m_tokens.data();
0060     m_currentToken = 0;
0061 
0062     m_firstToken = m_tokens.data();
0063     m_currentToken = m_firstToken;
0064  */
0065 
0066 TokenStream *MILexer::tokenize(const FileSymbol *fileSymbol)
0067 {
0068     m_tokensCount = 0;
0069     m_tokens.resize(64);
0070 
0071     m_contents = fileSymbol->contents;
0072     m_length = m_contents.length();
0073     m_ptr = 0;
0074 
0075     m_lines.resize(8);
0076     m_line = 0;
0077 
0078     m_lines[m_line++] = 0;
0079 
0080     m_cursor = 0;
0081 
0082     // tokenize
0083     int pos, len;
0084 
0085     for (;;) {
0086         if (m_tokensCount == (int)m_tokens.size())
0087             m_tokens.resize(m_tokensCount * 2);
0088 
0089         Token &tk = m_tokens[m_tokensCount++];
0090         tk.kind = nextToken(pos, len);
0091         tk.position = pos;
0092         tk.length = len;
0093 
0094         if (tk.kind == 0)
0095             break;
0096     }
0097 
0098     auto *tokenStream = new TokenStream;
0099     tokenStream->m_contents = m_contents;
0100 
0101     tokenStream->m_lines = m_lines;
0102     tokenStream->m_line = m_line;
0103 
0104     tokenStream->m_tokens = m_tokens;
0105     tokenStream->m_tokensCount = m_tokensCount;
0106 
0107     tokenStream->m_firstToken = tokenStream->m_tokens.data();
0108     tokenStream->m_currentToken = tokenStream->m_firstToken;
0109 
0110     tokenStream->m_cursor = m_cursor;
0111 
0112     return tokenStream;
0113 }
0114 
0115 int MILexer::nextToken(int &pos, int &len)
0116 {
0117     while (m_ptr < m_length) {
0118         const int start = m_ptr;
0119 
0120         const char ch = m_contents[m_ptr];
0121         Q_ASSERT(ch >= 0);
0122         int kind = 0;
0123         (this->*s_scan_table[static_cast<uchar>(ch)])(&kind);
0124 
0125         switch (kind) {
0126             case Token_whitespaces:
0127             case '\n':
0128                 break;
0129 
0130             default:
0131                 pos = start;
0132                 len = m_ptr - start;
0133                 return kind;
0134         }
0135     }
0136 
0137     return 0;
0138 }
0139 
0140 void MILexer::scanChar(int *kind)
0141 {
0142     *kind = m_contents[m_ptr++];
0143 }
0144 
0145 void MILexer::scanWhiteSpaces(int *kind)
0146 {
0147     *kind = Token_whitespaces;
0148 
0149     while (m_ptr < m_length) {
0150         char ch = m_contents[m_ptr];
0151         if (!(isspace(ch) && ch != '\n'))
0152             break;
0153 
0154         ++m_ptr;
0155     }
0156 }
0157 
0158 void MILexer::scanNewline(int *kind)
0159 {
0160     if (m_line == (int)m_lines.size())
0161         m_lines.resize(m_lines.size() * 2);
0162 
0163     if (m_lines.at(m_line) < m_ptr)
0164         m_lines[m_line++] = m_ptr;
0165 
0166     *kind = m_contents[m_ptr++];
0167 }
0168 
0169 void MILexer::scanUnicodeChar(int *kind)
0170 {
0171     *kind = m_contents[m_ptr++];
0172 }
0173 
0174 void MILexer::scanStringLiteral(int *kind)
0175 {
0176     ++m_ptr;
0177     while (char c = m_contents[m_ptr]) {
0178         switch (c) {
0179         case '\n':
0180             // ### error
0181             *kind = Token_string_literal;
0182             return;
0183         case '\\':
0184             {
0185                 char next = m_contents.at(m_ptr+1);
0186                 if (next == '"' || next == '\\')
0187                     m_ptr += 2;
0188                 else
0189                     ++m_ptr;
0190             }
0191             break;
0192         case '"':
0193             ++m_ptr;
0194             *kind = Token_string_literal;
0195             return;
0196         default:
0197             ++m_ptr;
0198             break;
0199         }
0200     }
0201 
0202     // ### error
0203     *kind = Token_string_literal;
0204 }
0205 
0206 void MILexer::scanIdentifier(int *kind)
0207 {
0208     while (m_ptr < m_length) {
0209         const char ch = m_contents[m_ptr];
0210         if (!(isalnum(ch) || ch == '-' || ch == '_'))
0211             break;
0212 
0213         ++m_ptr;
0214     }
0215 
0216     *kind = Token_identifier;
0217 }
0218 
0219 void MILexer::scanNumberLiteral(int *kind)
0220 {
0221     while (m_ptr < m_length) {
0222         const char ch = m_contents[m_ptr];
0223         if (!(isalnum(ch) || ch == '.'))
0224             break;
0225 
0226         ++m_ptr;
0227     }
0228 
0229     // ### finish to implement me!!
0230     *kind = Token_number_literal;
0231 }
0232 
0233 void TokenStream::positionAt(int position, int *line, int *column) const
0234 {
0235     if (!(line && column && !m_lines.isEmpty()))
0236         return;
0237 
0238     int first = 0;
0239     int len = m_line;
0240 
0241     while (len > 0) {
0242         const int half = len >> 1;
0243         const int middle = first + half;
0244 
0245         if (m_lines[middle] < position) {
0246             first = middle;
0247             ++first;
0248             len = len - half - 1;
0249         }
0250         else
0251             len = half;
0252     }
0253 
0254     *line = qMax(first - 1, 0);
0255     *column = position - m_lines.at(*line);
0256 
0257     Q_ASSERT( *column >= 0 );
0258 }
0259 
0260 QByteArray TokenStream::tokenText(int index) const
0261 {
0262     Token *t = index < 0 ? m_currentToken : m_firstToken + index;
0263     const char* data = m_contents.constData();
0264     return QByteArray(data + t->position, t->length);
0265 }
0266