File indexing completed on 2024-05-12 04:39:43
0001 /* 0002 SPDX-FileCopyrightText: 2004 Roberto Raggi <roberto@kdevelop.org> 0003 0004 SPDX-License-Identifier: LGPL-2.0-or-later 0005 */ 0006 0007 #include "milexer.h" 0008 #include "tokens.h" 0009 #include <cctype> 0010 #include <iostream> 0011 0012 using namespace KDevMI::MI; 0013 0014 bool MILexer::s_initialized = false; 0015 scan_fun_ptr MILexer::s_scan_table[]; 0016 0017 0018 MILexer::MILexer() 0019 { 0020 if (!s_initialized) 0021 setupScanTable(); 0022 } 0023 0024 MILexer::~MILexer() 0025 { 0026 } 0027 0028 void MILexer::setupScanTable() 0029 { 0030 s_initialized = true; 0031 0032 for (int i=0; i<128; ++i) { 0033 switch (i) { 0034 case '\n': 0035 s_scan_table[i] = &MILexer::scanNewline; 0036 break; 0037 0038 case '"': 0039 s_scan_table[i] = &MILexer::scanStringLiteral; 0040 break; 0041 0042 default: 0043 if (isspace(i)) 0044 s_scan_table[i] = &MILexer::scanWhiteSpaces; 0045 else if (isalpha(i) || i == '_') 0046 s_scan_table[i] = &MILexer::scanIdentifier; 0047 else if (isdigit(i)) 0048 s_scan_table[i] = &MILexer::scanNumberLiteral; 0049 else 0050 s_scan_table[i] = &MILexer::scanChar; 0051 } 0052 } 0053 0054 s_scan_table[128] = &MILexer::scanUnicodeChar; 0055 } 0056 0057 /* 0058 0059 m_firstToken = m_tokens.data(); 0060 m_currentToken = 0; 0061 0062 m_firstToken = m_tokens.data(); 0063 m_currentToken = m_firstToken; 0064 */ 0065 0066 TokenStream *MILexer::tokenize(const FileSymbol *fileSymbol) 0067 { 0068 m_tokensCount = 0; 0069 m_tokens.resize(64); 0070 0071 m_contents = fileSymbol->contents; 0072 m_length = m_contents.length(); 0073 m_ptr = 0; 0074 0075 m_lines.resize(8); 0076 m_line = 0; 0077 0078 m_lines[m_line++] = 0; 0079 0080 m_cursor = 0; 0081 0082 // tokenize 0083 int pos, len; 0084 0085 for (;;) { 0086 if (m_tokensCount == (int)m_tokens.size()) 0087 m_tokens.resize(m_tokensCount * 2); 0088 0089 Token &tk = m_tokens[m_tokensCount++]; 0090 tk.kind = nextToken(pos, len); 0091 tk.position = pos; 0092 tk.length = len; 0093 0094 if (tk.kind == 0) 0095 break; 0096 } 0097 0098 auto *tokenStream = new TokenStream; 0099 tokenStream->m_contents = m_contents; 0100 0101 tokenStream->m_lines = m_lines; 0102 tokenStream->m_line = m_line; 0103 0104 tokenStream->m_tokens = m_tokens; 0105 tokenStream->m_tokensCount = m_tokensCount; 0106 0107 tokenStream->m_firstToken = tokenStream->m_tokens.data(); 0108 tokenStream->m_currentToken = tokenStream->m_firstToken; 0109 0110 tokenStream->m_cursor = m_cursor; 0111 0112 return tokenStream; 0113 } 0114 0115 int MILexer::nextToken(int &pos, int &len) 0116 { 0117 while (m_ptr < m_length) { 0118 const int start = m_ptr; 0119 0120 const char ch = m_contents[m_ptr]; 0121 Q_ASSERT(ch >= 0); 0122 int kind = 0; 0123 (this->*s_scan_table[static_cast<uchar>(ch)])(&kind); 0124 0125 switch (kind) { 0126 case Token_whitespaces: 0127 case '\n': 0128 break; 0129 0130 default: 0131 pos = start; 0132 len = m_ptr - start; 0133 return kind; 0134 } 0135 } 0136 0137 return 0; 0138 } 0139 0140 void MILexer::scanChar(int *kind) 0141 { 0142 *kind = m_contents[m_ptr++]; 0143 } 0144 0145 void MILexer::scanWhiteSpaces(int *kind) 0146 { 0147 *kind = Token_whitespaces; 0148 0149 while (m_ptr < m_length) { 0150 char ch = m_contents[m_ptr]; 0151 if (!(isspace(ch) && ch != '\n')) 0152 break; 0153 0154 ++m_ptr; 0155 } 0156 } 0157 0158 void MILexer::scanNewline(int *kind) 0159 { 0160 if (m_line == (int)m_lines.size()) 0161 m_lines.resize(m_lines.size() * 2); 0162 0163 if (m_lines.at(m_line) < m_ptr) 0164 m_lines[m_line++] = m_ptr; 0165 0166 *kind = m_contents[m_ptr++]; 0167 } 0168 0169 void MILexer::scanUnicodeChar(int *kind) 0170 { 0171 *kind = m_contents[m_ptr++]; 0172 } 0173 0174 void MILexer::scanStringLiteral(int *kind) 0175 { 0176 ++m_ptr; 0177 while (char c = m_contents[m_ptr]) { 0178 switch (c) { 0179 case '\n': 0180 // ### error 0181 *kind = Token_string_literal; 0182 return; 0183 case '\\': 0184 { 0185 char next = m_contents.at(m_ptr+1); 0186 if (next == '"' || next == '\\') 0187 m_ptr += 2; 0188 else 0189 ++m_ptr; 0190 } 0191 break; 0192 case '"': 0193 ++m_ptr; 0194 *kind = Token_string_literal; 0195 return; 0196 default: 0197 ++m_ptr; 0198 break; 0199 } 0200 } 0201 0202 // ### error 0203 *kind = Token_string_literal; 0204 } 0205 0206 void MILexer::scanIdentifier(int *kind) 0207 { 0208 while (m_ptr < m_length) { 0209 const char ch = m_contents[m_ptr]; 0210 if (!(isalnum(ch) || ch == '-' || ch == '_')) 0211 break; 0212 0213 ++m_ptr; 0214 } 0215 0216 *kind = Token_identifier; 0217 } 0218 0219 void MILexer::scanNumberLiteral(int *kind) 0220 { 0221 while (m_ptr < m_length) { 0222 const char ch = m_contents[m_ptr]; 0223 if (!(isalnum(ch) || ch == '.')) 0224 break; 0225 0226 ++m_ptr; 0227 } 0228 0229 // ### finish to implement me!! 0230 *kind = Token_number_literal; 0231 } 0232 0233 void TokenStream::positionAt(int position, int *line, int *column) const 0234 { 0235 if (!(line && column && !m_lines.isEmpty())) 0236 return; 0237 0238 int first = 0; 0239 int len = m_line; 0240 0241 while (len > 0) { 0242 const int half = len >> 1; 0243 const int middle = first + half; 0244 0245 if (m_lines[middle] < position) { 0246 first = middle; 0247 ++first; 0248 len = len - half - 1; 0249 } 0250 else 0251 len = half; 0252 } 0253 0254 *line = qMax(first - 1, 0); 0255 *column = position - m_lines.at(*line); 0256 0257 Q_ASSERT( *column >= 0 ); 0258 } 0259 0260 QByteArray TokenStream::tokenText(int index) const 0261 { 0262 Token *t = index < 0 ? m_currentToken : m_firstToken + index; 0263 const char* data = m_contents.constData(); 0264 return QByteArray(data + t->position, t->length); 0265 } 0266