Warning, /kdevelop/kdevelop-pg-qt/kdev-pg/kdev-pg-lexer.ll is written in an unsupported language. File is not indexed.
0001 %{ 0002 /* This file is part of kdev-pg-qt 0003 Copyright (C) 2006 Jakob Petsovits <jpetso@gmx.at> 0004 0005 This library is free software; you can redistribute it and/or 0006 modify it under the terms of the GNU Library General Public 0007 License as published by the Free Software Foundation; either 0008 version 2 of the License, or (at your option) any later version. 0009 0010 This library is distributed in the hope that it will be useful, 0011 but WITHOUT ANY WARRANTY; without even the implied warranty of 0012 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 0013 Library General Public License for more details. 0014 0015 You should have received a copy of the GNU Library General Public License 0016 along with this library; see the file COPYING.LIB. If not, write to 0017 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 0018 Boston, MA 02110-1301, USA. 0019 */ 0020 0021 #include <QDebug> 0022 0023 #include "kdev-pg.h" 0024 #include "kdev-pg-parser.hh" 0025 0026 #include <QFile> 0027 #include <QFileInfo> 0028 #include <QTextStream> 0029 0030 int inp(); 0031 void appendLineBuffer(); 0032 void newline(); 0033 void countNewlines(const char*, int); 0034 void yyerror(const char* ); 0035 extern int yyLine; 0036 extern int currentOffset; 0037 extern bool yymoreFlag; 0038 0039 namespace KDevPG 0040 { 0041 extern QFile file; 0042 extern QFileInfo fileInfo; 0043 extern QTextStream checkOut; 0044 } 0045 0046 #define YYMORE yymoreFlag = true; yymore(); 0047 0048 #define YY_INPUT(buf, result, max_size) \ 0049 { \ 0050 int c = inp(); \ 0051 result = (c == EOF) ? YY_NULL : (buf[0] = (char)c, 1); \ 0052 } 0053 0054 #define YY_USER_ACTION appendLineBuffer(); 0055 0056 #define COPY_TO_YYLVAL(string, len) \ 0057 yylval.str = (char*) calloc(len+1, sizeof(char)); \ 0058 strncpy(yylval.str, string, len); \ 0059 yylval.str[len] = '\0'; 0060 #define COPY_CODE_TO_YYLVAL(string, len) \ 0061 if(KDevPG::globalSystem.lineNumberPolicy == KDevPG::World::BeautifulCode) \ 0062 { \ 0063 COPY_TO_YYLVAL(string, len) \ 0064 } \ 0065 else \ 0066 { \ 0067 QByteArray tmp("\n#"); \ 0068 if(KDevPG::globalSystem.lineNumberPolicy == KDevPG::World::CompatibilityLineNumbers) \ 0069 tmp += "line"; \ 0070 tmp += " " + QString::number(firstCodeLine).toLocal8Bit(); \ 0071 tmp += " \"" + KDevPG::fileInfo.absoluteFilePath().toLocal8Bit() + "\""; \ 0072 if(KDevPG::globalSystem.lineNumberPolicy == KDevPG::World::FullLineNumbers) \ 0073 tmp += " 1"; \ 0074 tmp += "\n"; \ 0075 size_t memlen = tmp.size() + firstCodeColumn + len + 16 + 1; \ 0076 yylval.str = (char*) calloc(memlen, sizeof(char)); \ 0077 strncpy(yylval.str, tmp.data(), tmp.size()); \ 0078 memset(yylval.str + tmp.size(), ' ', firstCodeColumn); \ 0079 strncpy(yylval.str + tmp.size() + firstCodeColumn, string, len); \ 0080 yylval.str[memlen-1] = '\0'; \ 0081 } 0082 0083 #define ESCAPE_CHARACTER(chr) \ 0084 yylval.str = (char*) calloc(2, sizeof(char)); yylval.str[0] = chr; yylval.str[1] = '\0'; return T_STRING; 0085 0086 namespace { 0087 enum RulePosition { 0088 RuleBody, 0089 RuleFooter, 0090 RuleLexer 0091 }; 0092 RulePosition rulePosition = RuleBody; 0093 int openBrackets; // for rule arguments and regexp usage 0094 int firstCodeLine; // where the current code-block begins 0095 int firstCodeColumn; 0096 } 0097 0098 #ifdef _WIN32 0099 #include <io.h> 0100 #else 0101 #include <unistd.h> 0102 #endif 0103 0104 %} 0105 0106 Whitespace [ \f\t] 0107 Newline "\r\n"|\r|\n 0108 String ["]([^\r\n\"]|[\\][^\r\n])*["] 0109 Char [_a-zA-Z0-9]|\\[xXuU][0-9a-fA-F]{1,6}|\\[oO][0-7][0-7]*|\\[dD][0-9]{1,7}|\\[yY][01]{1,21}|\\[\x20-\x7f] 0110 0111 %x CODE 0112 %x PARSERCLASS 0113 %x RULE_ARGUMENTS 0114 %x RULE_PARAMETERS_HEADER 0115 %x RULE_PARAMETERS_VARNAME 0116 %x RULE_LEXER 0117 0118 %% 0119 0120 0121 {Whitespace}* /* skip */ ; 0122 {Newline} newline(); 0123 "--"[^\r\n]* /* line comments, skip */ ; 0124 0125 ";"+ rulePosition = RuleBody; return ';'; 0126 "->" if(rulePosition == RuleLexer) BEGIN(RULE_LEXER); else rulePosition = RuleFooter; return T_ARROW; 0127 ".=" return T_INLINE; 0128 0129 "(" return '('; 0130 ")" return ')'; 0131 "{" return '{'; 0132 "}" return '}'; 0133 "," return ','; 0134 "0" return '0'; 0135 "#" return '#'; 0136 "." return '.'; 0137 ":" return ':'; 0138 "=" return '='; 0139 "+" return '+'; 0140 "*" return '*'; 0141 "?" return '?'; 0142 "@" return '@'; 0143 "|" return '|'; 0144 "&" return '&'; 0145 0146 "try/recover" return T_TRY_RECOVER; 0147 "try/rollback" return T_TRY_ROLLBACK; 0148 "catch" return T_CATCH; 0149 0150 "%export_macro" return T_EXPORT_MACRO; 0151 "%export_macro_header" return T_EXPORT_MACRO_HEADER; 0152 "%token" return T_TOKEN_DECLARATION; 0153 "%token_stream" return T_TOKEN_STREAM_DECLARATION; 0154 "%namespace" return T_NAMESPACE_DECLARATION; 0155 "%parserclass" BEGIN(PARSERCLASS); return T_PARSERCLASS_DECLARATION; 0156 "%lexerclass" BEGIN(PARSERCLASS); return T_LEXERCLASS_DECLARATION; 0157 "%input_stream" return T_INPUT_STREAM; 0158 "%ast_extra_members" return T_AST_DECLARATION; 0159 "%parser_declaration_header" return T_PARSER_DECLARATION_HEADER; 0160 "%parser_bits_header" return T_PARSER_BITS_HEADER; 0161 "%ast_header" return T_AST_HEADER; 0162 "%lexer_declaration_header" return T_LEXER_DECLARATION_HEADER; 0163 "%lexer_bits_header" return T_LEXER_BITS_HEADER; 0164 "%input_encoding" return T_INPUT_ENCODING; 0165 "%table_lexer" return T_TABLE_LEXER; 0166 "%sequence_lexer" return T_SEQUENCE_LEXER; 0167 "%ast_base" return T_AST_BASE; 0168 "%parser_base" return T_PARSER_BASE; 0169 "%lexer_base" return T_LEXER_BASE; 0170 "%bin" return T_BIN; 0171 "%pre" return T_PRE; 0172 "%post" return T_POST; 0173 "%tern" return T_TERN; 0174 "%paren" return T_PAREN; 0175 "%priority" return T_PRIORITY; 0176 "%<" rulePosition = RuleBody; return T_LOPR; 0177 "%>" rulePosition = RuleFooter; return T_ROPR; 0178 "%left" return T_LEFT_ASSOC; 0179 "%right" return T_RIGHT_ASSOC; 0180 "%isLeft" return T_IS_LEFT_ASSOC; 0181 "%isRight" return T_IS_RIGHT_ASSOC; 0182 "%lexer" rulePosition = RuleLexer; return T_LEXER; 0183 0184 <PARSERCLASS>{ 0185 {Whitespace}* /* skip */ ; 0186 {Newline} newline(); 0187 "(" return '('; 0188 "public" return T_PUBLIC; 0189 "private" return T_PRIVATE; 0190 "protected" return T_PROTECTED; 0191 "declaration" return T_DECLARATION; 0192 "constructor" return T_CONSTRUCTOR; 0193 "destructor" return T_DESTRUCTOR; 0194 "bits" return T_BITS; 0195 ")" BEGIN(INITIAL); return ')'; 0196 . BEGIN(INITIAL); REJECT; /* everything else */ 0197 } 0198 0199 0200 "[" { 0201 if (rulePosition == RuleBody) { /* use the arguments in a rule call */ 0202 firstCodeLine = yyLine; 0203 openBrackets = 0; 0204 BEGIN(RULE_ARGUMENTS); 0205 } 0206 else if (rulePosition == RuleFooter) { /* declare the arguments */ 0207 BEGIN(RULE_PARAMETERS_HEADER); return '['; 0208 } 0209 } 0210 0211 <RULE_LEXER>{ 0212 "--"[^\r\n]* /* line comments, skip */ ; 0213 {Newline} newline(); 0214 "{"[a-zA-Z_][a-zA-Z_0-9]*"}" ++yytext; COPY_TO_YYLVAL(yytext,yyleng-2); return T_NAMED_REGEXP; 0215 ";"+(("--"[^\r\n]*[\r\n])|[ \f\t\r\n])+/";"+ countNewlines(yytext, yyleng); rulePosition = RuleBody; BEGIN(INITIAL); return ';'; 0216 ";"+ return ';'; 0217 "[" ++openBrackets; return '['; 0218 "]" --openBrackets; return ']'; 0219 "(" ++openBrackets; return '('; 0220 ")" --openBrackets; return ')'; 0221 "?" return '?'; 0222 "|" return '|'; 0223 "^" return '^'; 0224 {Char}"-"{Char} COPY_TO_YYLVAL(yytext,yyleng); return T_RANGE; 0225 "&" return '&'; 0226 "~" return '~'; 0227 "*" return '*'; 0228 "+" return '+'; 0229 "@" return '@'; 0230 "." return '.'; 0231 "->" return T_ARROW; 0232 "%continue" return T_CONTINUE; 0233 "%fail" return T_FAIL; 0234 "%enter" return T_ENTER_RULE_SET; 0235 "%leave" return T_LEAVE_RULE_SET; 0236 "%la" return T_LOOKAHEAD; 0237 "%ba" return T_BARRIER; 0238 "[:" firstCodeLine = yyLine; firstCodeColumn = currentOffset + 2; BEGIN(CODE); 0239 [_A-Z]+/[ \f\t\r\n]*";" COPY_TO_YYLVAL(yytext,yyleng); return T_TERMINAL; 0240 [_a-zA-Z0-9]+/[ \f\t\r\n]*";" COPY_TO_YYLVAL(yytext,yyleng); return T_IDENTIFIER; 0241 {Char}+ COPY_TO_YYLVAL(yytext,yyleng); return T_UNQUOTED_STRING; 0242 {Whitespace} /* skip */ 0243 {String} yytext++; COPY_TO_YYLVAL(yytext,yyleng-2); return T_STRING; 0244 0245 <<EOF>> { 0246 BEGIN(INITIAL); // is not set automatically by yyrestart() 0247 KDevPG::checkOut << "** ERROR Encountered end of file in an unclosed rule lexer definition..." << Qt::endl; 0248 yyerror(""); 0249 return 0; 0250 } 0251 } 0252 0253 <RULE_ARGUMENTS>{ 0254 {Newline} newline(); YYMORE; 0255 {String} YYMORE; /* this and... */ 0256 ["] YYMORE; /* ...this prevent brackets inside strings to be counted */ 0257 [^\[\]\n\r\"]* YYMORE; /* gather everything that's not a bracket, and append what comes next */ 0258 "[" openBrackets++; YYMORE; 0259 "]" { 0260 openBrackets--; 0261 if (openBrackets < 0) { 0262 COPY_CODE_TO_YYLVAL(yytext,(yyleng-1)); /* cut off the trailing bracket */ 0263 BEGIN(INITIAL); 0264 return T_RULE_ARGUMENTS; 0265 } 0266 } 0267 <<EOF>> { 0268 BEGIN(INITIAL); // is not set automatically by yyrestart() 0269 KDevPG::checkOut << "** ERROR Encountered end of file in an unclosed rule argument specification..." << Qt::endl; 0270 yyerror(""); 0271 return 0; 0272 } 0273 } 0274 0275 <RULE_PARAMETERS_HEADER>{ 0276 {Whitespace}* /* skip */ ; 0277 {Newline} newline(); 0278 "--"[^\r\n]* /* line comments, skip */ ; 0279 ":"{Whitespace}* BEGIN(RULE_PARAMETERS_VARNAME); return ':'; 0280 "#" return '#'; 0281 "member" return T_MEMBER; 0282 "temporary" return T_TEMPORARY; 0283 "argument" return T_ARGUMENT; 0284 "node" return T_NODE; 0285 "token" return T_TOKEN; 0286 "variable" return T_VARIABLE; 0287 ";" return ';'; /* only used for "token" types */ 0288 [_a-zA-Z]*[_a-zA-Z0-9]+ COPY_TO_YYLVAL(yytext,yyleng); return T_IDENTIFIER; 0289 "]" BEGIN(INITIAL); return ']'; 0290 . BEGIN(INITIAL); REJECT; /* everything else */ 0291 } 0292 0293 <RULE_PARAMETERS_VARNAME>{ 0294 {Newline} newline(); YYMORE; 0295 [^;\r\n]* YYMORE; /* gather everything that's not a semicolon, and append what comes next */ 0296 ";" { 0297 // strip trailing whitespace 0298 int length = yyleng-1; // and first, the trailing semicolon 0299 for (int i = length-1; i < 1; i--) { 0300 switch(yytext[i-1]) 0301 { 0302 case ' ': 0303 case '\f': 0304 case '\t': 0305 continue; 0306 default: 0307 length = i; 0308 break; 0309 } 0310 } 0311 COPY_TO_YYLVAL(yytext,length); 0312 BEGIN(RULE_PARAMETERS_HEADER); 0313 return T_IDENTIFIER; 0314 } 0315 . BEGIN(INITIAL); REJECT; /* everything else */ 0316 } 0317 0318 0319 "[:" firstCodeLine = yyLine; firstCodeColumn = currentOffset + 2; BEGIN(CODE); 0320 <CODE>{ 0321 {Newline} newline(); YYMORE; 0322 [^:\n\r]* YYMORE; /* gather everything that's not a colon, and append what comes next */ 0323 ":"+[^:\]\n\r]* YYMORE; /* also gather colons that are not followed by colons or newlines */ 0324 ":]" { 0325 COPY_CODE_TO_YYLVAL(yytext, (yyleng-2)); /* cut off the trailing stuff */ 0326 if(rulePosition == RuleLexer) 0327 BEGIN(RULE_LEXER); 0328 else 0329 BEGIN(INITIAL); 0330 return T_CODE; 0331 } 0332 <<EOF>> { 0333 BEGIN(INITIAL); // is not set automatically by yyrestart() 0334 KDevPG::checkOut << "** ERROR Encountered end of file in an unclosed code segment..." << Qt::endl; 0335 yyerror(""); 0336 return 0; 0337 } 0338 } 0339 0340 0341 [_A-Z]+ COPY_TO_YYLVAL(yytext,yyleng); return T_TERMINAL; 0342 [_a-zA-Z][_a-zA-Z0-9]* COPY_TO_YYLVAL(yytext,yyleng); return T_IDENTIFIER; 0343 [0-9]+ COPY_TO_YYLVAL(yytext,yyleng); return T_NUMBER; 0344 0345 0346 {String} { 0347 yytext++; /* start inside the quotes */ 0348 COPY_TO_YYLVAL(yytext,yyleng-2); /* cut off the trailing quote */ 0349 return T_STRING; 0350 } 0351 0352 . { 0353 KDevPG::checkOut << "Unexpected character: ``" << yytext[0] << "''" << Qt::endl; 0354 yyerror(""); 0355 } 0356 0357 0358 %% 0359 0360 char ch; 0361 int yyLine = 1, currentOffset = 0; 0362 bool endOfLine = false, yymoreFlag = false; 0363 int yyTextLineLeng = 1024; 0364 char *yyTextLine = (char*)malloc(yyTextLineLeng); 0365 0366 int inp() 0367 { 0368 if( KDevPG::file.atEnd() ) 0369 return EOF; 0370 KDevPG::file.getChar( &ch ); 0371 return ch; 0372 } 0373 0374 void newline() 0375 { 0376 ++yyLine; 0377 endOfLine = true; 0378 } 0379 0380 void countNewlines(const char* code, int leng) 0381 { 0382 for(int i = 0; i != leng; ++i) 0383 if(code[i] == '\n') 0384 ++yyLine; 0385 } 0386 0387 /* initialize the line buffer */ 0388 void clearLineBuffer() 0389 { 0390 yyTextLine[0] = '\0'; 0391 currentOffset = 0; 0392 endOfLine = false; 0393 } 0394 0395 struct InitLineBuffer 0396 { 0397 InitLineBuffer() 0398 { 0399 clearLineBuffer(); 0400 } 0401 } _initLineBuffer; 0402 0403 /* add the current token to the current line */ 0404 void appendLineBuffer() 0405 { 0406 if (endOfLine == true) 0407 clearLineBuffer(); 0408 0409 static int lastTextLeng = 0; 0410 0411 currentOffset = strlen(yyTextLine); /* start of current */ 0412 0413 int newLeng = currentOffset + strlen(yytext) - (yymoreFlag ? lastTextLeng : 0) + 1; 0414 if(newLeng > yyTextLineLeng) 0415 { 0416 do 0417 { 0418 yyTextLineLeng *= 2; 0419 } 0420 while(newLeng > yyTextLineLeng); 0421 yyTextLine = (char*)realloc(yyTextLine, yyTextLineLeng); 0422 } 0423 0424 strcpy(yyTextLine+currentOffset, yytext + (yymoreFlag ? lastTextLeng : 0)); /* append current */ 0425 /* strcpy is faster than strcat */ 0426 0427 Q_ASSERT(strlen(yyTextLine) < size_t(yyTextLineLeng)); 0428 0429 lastTextLeng = strlen(yytext); 0430 yymoreFlag = false; 0431 } 0432 0433 void yyerror(const char* msg ) 0434 { 0435 Q_UNUSED(msg); 0436 KDevPG::checkOut << "** LEXICAL ERROR at line " << yyLine << " column " << currentOffset << Qt::endl; 0437 0438 char *current_end = yyTextLine + strlen(yyTextLine); 0439 char *p; 0440 0441 /* get the rest of the line if we are not already at the end */ 0442 if(!endOfLine) 0443 { 0444 p = current_end; 0445 int c = ch; 0446 0447 while(c != EOF && c != '\n') 0448 { 0449 *p++ = c; 0450 c = inp(); 0451 } 0452 0453 *p++ = '\n'; 0454 *p = 0; 0455 } 0456 0457 /* yyTextLine[] now has the whole line, with the current token */ 0458 /* at currentOffset */ 0459 0460 /* print error message and current line */ 0461 KDevPG::checkOut << yyTextLine; 0462 0463 /* print a ^ under the most recent token */ 0464 KDevPG::checkOut << QString(currentOffset, ' ').append('^') << Qt::endl; /* currentOffset spaces, then ^ */ 0465 0466 exit(EXIT_FAILURE); 0467 } 0468 0469 int yywrap() { return 1; } 0470