Warning, /kdevelop/kdevelop-pg-qt/kdev-pg/kdev-pg-lexer.ll is written in an unsupported language. File is not indexed.
0001 %{
0002 /* This file is part of kdev-pg-qt
0003 Copyright (C) 2006 Jakob Petsovits <jpetso@gmx.at>
0004
0005 This library is free software; you can redistribute it and/or
0006 modify it under the terms of the GNU Library General Public
0007 License as published by the Free Software Foundation; either
0008 version 2 of the License, or (at your option) any later version.
0009
0010 This library is distributed in the hope that it will be useful,
0011 but WITHOUT ANY WARRANTY; without even the implied warranty of
0012 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
0013 Library General Public License for more details.
0014
0015 You should have received a copy of the GNU Library General Public License
0016 along with this library; see the file COPYING.LIB. If not, write to
0017 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
0018 Boston, MA 02110-1301, USA.
0019 */
0020
0021 #include <QDebug>
0022
0023 #include "kdev-pg.h"
0024 #include "kdev-pg-parser.hh"
0025
0026 #include <QFile>
0027 #include <QFileInfo>
0028 #include <QTextStream>
0029
0030 int inp();
0031 void appendLineBuffer();
0032 void newline();
0033 void countNewlines(const char*, int);
0034 void yyerror(const char* );
0035 extern int yyLine;
0036 extern int currentOffset;
0037 extern bool yymoreFlag;
0038
0039 namespace KDevPG
0040 {
0041 extern QFile file;
0042 extern QFileInfo fileInfo;
0043 extern QTextStream checkOut;
0044 }
0045
0046 #define YYMORE yymoreFlag = true; yymore();
0047
0048 #define YY_INPUT(buf, result, max_size) \
0049 { \
0050 int c = inp(); \
0051 result = (c == EOF) ? YY_NULL : (buf[0] = (char)c, 1); \
0052 }
0053
0054 #define YY_USER_ACTION appendLineBuffer();
0055
0056 #define COPY_TO_YYLVAL(string, len) \
0057 yylval.str = (char*) calloc(len+1, sizeof(char)); \
0058 strncpy(yylval.str, string, len); \
0059 yylval.str[len] = '\0';
0060 #define COPY_CODE_TO_YYLVAL(string, len) \
0061 if(KDevPG::globalSystem.lineNumberPolicy == KDevPG::World::BeautifulCode) \
0062 { \
0063 COPY_TO_YYLVAL(string, len) \
0064 } \
0065 else \
0066 { \
0067 QByteArray tmp("\n#"); \
0068 if(KDevPG::globalSystem.lineNumberPolicy == KDevPG::World::CompatibilityLineNumbers) \
0069 tmp += "line"; \
0070 tmp += " " + QString::number(firstCodeLine).toLocal8Bit(); \
0071 tmp += " \"" + KDevPG::fileInfo.absoluteFilePath().toLocal8Bit() + "\""; \
0072 if(KDevPG::globalSystem.lineNumberPolicy == KDevPG::World::FullLineNumbers) \
0073 tmp += " 1"; \
0074 tmp += "\n"; \
0075 size_t memlen = tmp.size() + firstCodeColumn + len + 16 + 1; \
0076 yylval.str = (char*) calloc(memlen, sizeof(char)); \
0077 strncpy(yylval.str, tmp.data(), tmp.size()); \
0078 memset(yylval.str + tmp.size(), ' ', firstCodeColumn); \
0079 strncpy(yylval.str + tmp.size() + firstCodeColumn, string, len); \
0080 yylval.str[memlen-1] = '\0'; \
0081 }
0082
0083 #define ESCAPE_CHARACTER(chr) \
0084 yylval.str = (char*) calloc(2, sizeof(char)); yylval.str[0] = chr; yylval.str[1] = '\0'; return T_STRING;
0085
0086 namespace {
0087 enum RulePosition {
0088 RuleBody,
0089 RuleFooter,
0090 RuleLexer
0091 };
0092 RulePosition rulePosition = RuleBody;
0093 int openBrackets; // for rule arguments and regexp usage
0094 int firstCodeLine; // where the current code-block begins
0095 int firstCodeColumn;
0096 }
0097
0098 #ifdef _WIN32
0099 #include <io.h>
0100 #else
0101 #include <unistd.h>
0102 #endif
0103
0104 %}
0105
0106 Whitespace [ \f\t]
0107 Newline "\r\n"|\r|\n
0108 String ["]([^\r\n\"]|[\\][^\r\n])*["]
0109 Char [_a-zA-Z0-9]|\\[xXuU][0-9a-fA-F]{1,6}|\\[oO][0-7][0-7]*|\\[dD][0-9]{1,7}|\\[yY][01]{1,21}|\\[\x20-\x7f]
0110
0111 %x CODE
0112 %x PARSERCLASS
0113 %x RULE_ARGUMENTS
0114 %x RULE_PARAMETERS_HEADER
0115 %x RULE_PARAMETERS_VARNAME
0116 %x RULE_LEXER
0117
0118 %%
0119
0120
0121 {Whitespace}* /* skip */ ;
0122 {Newline} newline();
0123 "--"[^\r\n]* /* line comments, skip */ ;
0124
0125 ";"+ rulePosition = RuleBody; return ';';
0126 "->" if(rulePosition == RuleLexer) BEGIN(RULE_LEXER); else rulePosition = RuleFooter; return T_ARROW;
0127 ".=" return T_INLINE;
0128
0129 "(" return '(';
0130 ")" return ')';
0131 "{" return '{';
0132 "}" return '}';
0133 "," return ',';
0134 "0" return '0';
0135 "#" return '#';
0136 "." return '.';
0137 ":" return ':';
0138 "=" return '=';
0139 "+" return '+';
0140 "*" return '*';
0141 "?" return '?';
0142 "@" return '@';
0143 "|" return '|';
0144 "&" return '&';
0145
0146 "try/recover" return T_TRY_RECOVER;
0147 "try/rollback" return T_TRY_ROLLBACK;
0148 "catch" return T_CATCH;
0149
0150 "%export_macro" return T_EXPORT_MACRO;
0151 "%export_macro_header" return T_EXPORT_MACRO_HEADER;
0152 "%token" return T_TOKEN_DECLARATION;
0153 "%token_stream" return T_TOKEN_STREAM_DECLARATION;
0154 "%namespace" return T_NAMESPACE_DECLARATION;
0155 "%parserclass" BEGIN(PARSERCLASS); return T_PARSERCLASS_DECLARATION;
0156 "%lexerclass" BEGIN(PARSERCLASS); return T_LEXERCLASS_DECLARATION;
0157 "%input_stream" return T_INPUT_STREAM;
0158 "%ast_extra_members" return T_AST_DECLARATION;
0159 "%parser_declaration_header" return T_PARSER_DECLARATION_HEADER;
0160 "%parser_bits_header" return T_PARSER_BITS_HEADER;
0161 "%ast_header" return T_AST_HEADER;
0162 "%lexer_declaration_header" return T_LEXER_DECLARATION_HEADER;
0163 "%lexer_bits_header" return T_LEXER_BITS_HEADER;
0164 "%input_encoding" return T_INPUT_ENCODING;
0165 "%table_lexer" return T_TABLE_LEXER;
0166 "%sequence_lexer" return T_SEQUENCE_LEXER;
0167 "%ast_base" return T_AST_BASE;
0168 "%parser_base" return T_PARSER_BASE;
0169 "%lexer_base" return T_LEXER_BASE;
0170 "%bin" return T_BIN;
0171 "%pre" return T_PRE;
0172 "%post" return T_POST;
0173 "%tern" return T_TERN;
0174 "%paren" return T_PAREN;
0175 "%priority" return T_PRIORITY;
0176 "%<" rulePosition = RuleBody; return T_LOPR;
0177 "%>" rulePosition = RuleFooter; return T_ROPR;
0178 "%left" return T_LEFT_ASSOC;
0179 "%right" return T_RIGHT_ASSOC;
0180 "%isLeft" return T_IS_LEFT_ASSOC;
0181 "%isRight" return T_IS_RIGHT_ASSOC;
0182 "%lexer" rulePosition = RuleLexer; return T_LEXER;
0183
0184 <PARSERCLASS>{
0185 {Whitespace}* /* skip */ ;
0186 {Newline} newline();
0187 "(" return '(';
0188 "public" return T_PUBLIC;
0189 "private" return T_PRIVATE;
0190 "protected" return T_PROTECTED;
0191 "declaration" return T_DECLARATION;
0192 "constructor" return T_CONSTRUCTOR;
0193 "destructor" return T_DESTRUCTOR;
0194 "bits" return T_BITS;
0195 ")" BEGIN(INITIAL); return ')';
0196 . BEGIN(INITIAL); REJECT; /* everything else */
0197 }
0198
0199
0200 "[" {
0201 if (rulePosition == RuleBody) { /* use the arguments in a rule call */
0202 firstCodeLine = yyLine;
0203 openBrackets = 0;
0204 BEGIN(RULE_ARGUMENTS);
0205 }
0206 else if (rulePosition == RuleFooter) { /* declare the arguments */
0207 BEGIN(RULE_PARAMETERS_HEADER); return '[';
0208 }
0209 }
0210
0211 <RULE_LEXER>{
0212 "--"[^\r\n]* /* line comments, skip */ ;
0213 {Newline} newline();
0214 "{"[a-zA-Z_][a-zA-Z_0-9]*"}" ++yytext; COPY_TO_YYLVAL(yytext,yyleng-2); return T_NAMED_REGEXP;
0215 ";"+(("--"[^\r\n]*[\r\n])|[ \f\t\r\n])+/";"+ countNewlines(yytext, yyleng); rulePosition = RuleBody; BEGIN(INITIAL); return ';';
0216 ";"+ return ';';
0217 "[" ++openBrackets; return '[';
0218 "]" --openBrackets; return ']';
0219 "(" ++openBrackets; return '(';
0220 ")" --openBrackets; return ')';
0221 "?" return '?';
0222 "|" return '|';
0223 "^" return '^';
0224 {Char}"-"{Char} COPY_TO_YYLVAL(yytext,yyleng); return T_RANGE;
0225 "&" return '&';
0226 "~" return '~';
0227 "*" return '*';
0228 "+" return '+';
0229 "@" return '@';
0230 "." return '.';
0231 "->" return T_ARROW;
0232 "%continue" return T_CONTINUE;
0233 "%fail" return T_FAIL;
0234 "%enter" return T_ENTER_RULE_SET;
0235 "%leave" return T_LEAVE_RULE_SET;
0236 "%la" return T_LOOKAHEAD;
0237 "%ba" return T_BARRIER;
0238 "[:" firstCodeLine = yyLine; firstCodeColumn = currentOffset + 2; BEGIN(CODE);
0239 [_A-Z]+/[ \f\t\r\n]*";" COPY_TO_YYLVAL(yytext,yyleng); return T_TERMINAL;
0240 [_a-zA-Z0-9]+/[ \f\t\r\n]*";" COPY_TO_YYLVAL(yytext,yyleng); return T_IDENTIFIER;
0241 {Char}+ COPY_TO_YYLVAL(yytext,yyleng); return T_UNQUOTED_STRING;
0242 {Whitespace} /* skip */
0243 {String} yytext++; COPY_TO_YYLVAL(yytext,yyleng-2); return T_STRING;
0244
0245 <<EOF>> {
0246 BEGIN(INITIAL); // is not set automatically by yyrestart()
0247 KDevPG::checkOut << "** ERROR Encountered end of file in an unclosed rule lexer definition..." << Qt::endl;
0248 yyerror("");
0249 return 0;
0250 }
0251 }
0252
0253 <RULE_ARGUMENTS>{
0254 {Newline} newline(); YYMORE;
0255 {String} YYMORE; /* this and... */
0256 ["] YYMORE; /* ...this prevent brackets inside strings to be counted */
0257 [^\[\]\n\r\"]* YYMORE; /* gather everything that's not a bracket, and append what comes next */
0258 "[" openBrackets++; YYMORE;
0259 "]" {
0260 openBrackets--;
0261 if (openBrackets < 0) {
0262 COPY_CODE_TO_YYLVAL(yytext,(yyleng-1)); /* cut off the trailing bracket */
0263 BEGIN(INITIAL);
0264 return T_RULE_ARGUMENTS;
0265 }
0266 }
0267 <<EOF>> {
0268 BEGIN(INITIAL); // is not set automatically by yyrestart()
0269 KDevPG::checkOut << "** ERROR Encountered end of file in an unclosed rule argument specification..." << Qt::endl;
0270 yyerror("");
0271 return 0;
0272 }
0273 }
0274
0275 <RULE_PARAMETERS_HEADER>{
0276 {Whitespace}* /* skip */ ;
0277 {Newline} newline();
0278 "--"[^\r\n]* /* line comments, skip */ ;
0279 ":"{Whitespace}* BEGIN(RULE_PARAMETERS_VARNAME); return ':';
0280 "#" return '#';
0281 "member" return T_MEMBER;
0282 "temporary" return T_TEMPORARY;
0283 "argument" return T_ARGUMENT;
0284 "node" return T_NODE;
0285 "token" return T_TOKEN;
0286 "variable" return T_VARIABLE;
0287 ";" return ';'; /* only used for "token" types */
0288 [_a-zA-Z]*[_a-zA-Z0-9]+ COPY_TO_YYLVAL(yytext,yyleng); return T_IDENTIFIER;
0289 "]" BEGIN(INITIAL); return ']';
0290 . BEGIN(INITIAL); REJECT; /* everything else */
0291 }
0292
0293 <RULE_PARAMETERS_VARNAME>{
0294 {Newline} newline(); YYMORE;
0295 [^;\r\n]* YYMORE; /* gather everything that's not a semicolon, and append what comes next */
0296 ";" {
0297 // strip trailing whitespace
0298 int length = yyleng-1; // and first, the trailing semicolon
0299 for (int i = length-1; i < 1; i--) {
0300 switch(yytext[i-1])
0301 {
0302 case ' ':
0303 case '\f':
0304 case '\t':
0305 continue;
0306 default:
0307 length = i;
0308 break;
0309 }
0310 }
0311 COPY_TO_YYLVAL(yytext,length);
0312 BEGIN(RULE_PARAMETERS_HEADER);
0313 return T_IDENTIFIER;
0314 }
0315 . BEGIN(INITIAL); REJECT; /* everything else */
0316 }
0317
0318
0319 "[:" firstCodeLine = yyLine; firstCodeColumn = currentOffset + 2; BEGIN(CODE);
0320 <CODE>{
0321 {Newline} newline(); YYMORE;
0322 [^:\n\r]* YYMORE; /* gather everything that's not a colon, and append what comes next */
0323 ":"+[^:\]\n\r]* YYMORE; /* also gather colons that are not followed by colons or newlines */
0324 ":]" {
0325 COPY_CODE_TO_YYLVAL(yytext, (yyleng-2)); /* cut off the trailing stuff */
0326 if(rulePosition == RuleLexer)
0327 BEGIN(RULE_LEXER);
0328 else
0329 BEGIN(INITIAL);
0330 return T_CODE;
0331 }
0332 <<EOF>> {
0333 BEGIN(INITIAL); // is not set automatically by yyrestart()
0334 KDevPG::checkOut << "** ERROR Encountered end of file in an unclosed code segment..." << Qt::endl;
0335 yyerror("");
0336 return 0;
0337 }
0338 }
0339
0340
0341 [_A-Z]+ COPY_TO_YYLVAL(yytext,yyleng); return T_TERMINAL;
0342 [_a-zA-Z][_a-zA-Z0-9]* COPY_TO_YYLVAL(yytext,yyleng); return T_IDENTIFIER;
0343 [0-9]+ COPY_TO_YYLVAL(yytext,yyleng); return T_NUMBER;
0344
0345
0346 {String} {
0347 yytext++; /* start inside the quotes */
0348 COPY_TO_YYLVAL(yytext,yyleng-2); /* cut off the trailing quote */
0349 return T_STRING;
0350 }
0351
0352 . {
0353 KDevPG::checkOut << "Unexpected character: ``" << yytext[0] << "''" << Qt::endl;
0354 yyerror("");
0355 }
0356
0357
0358 %%
0359
0360 char ch;
0361 int yyLine = 1, currentOffset = 0;
0362 bool endOfLine = false, yymoreFlag = false;
0363 int yyTextLineLeng = 1024;
0364 char *yyTextLine = (char*)malloc(yyTextLineLeng);
0365
0366 int inp()
0367 {
0368 if( KDevPG::file.atEnd() )
0369 return EOF;
0370 KDevPG::file.getChar( &ch );
0371 return ch;
0372 }
0373
0374 void newline()
0375 {
0376 ++yyLine;
0377 endOfLine = true;
0378 }
0379
0380 void countNewlines(const char* code, int leng)
0381 {
0382 for(int i = 0; i != leng; ++i)
0383 if(code[i] == '\n')
0384 ++yyLine;
0385 }
0386
0387 /* initialize the line buffer */
0388 void clearLineBuffer()
0389 {
0390 yyTextLine[0] = '\0';
0391 currentOffset = 0;
0392 endOfLine = false;
0393 }
0394
0395 struct InitLineBuffer
0396 {
0397 InitLineBuffer()
0398 {
0399 clearLineBuffer();
0400 }
0401 } _initLineBuffer;
0402
0403 /* add the current token to the current line */
0404 void appendLineBuffer()
0405 {
0406 if (endOfLine == true)
0407 clearLineBuffer();
0408
0409 static int lastTextLeng = 0;
0410
0411 currentOffset = strlen(yyTextLine); /* start of current */
0412
0413 int newLeng = currentOffset + strlen(yytext) - (yymoreFlag ? lastTextLeng : 0) + 1;
0414 if(newLeng > yyTextLineLeng)
0415 {
0416 do
0417 {
0418 yyTextLineLeng *= 2;
0419 }
0420 while(newLeng > yyTextLineLeng);
0421 yyTextLine = (char*)realloc(yyTextLine, yyTextLineLeng);
0422 }
0423
0424 strcpy(yyTextLine+currentOffset, yytext + (yymoreFlag ? lastTextLeng : 0)); /* append current */
0425 /* strcpy is faster than strcat */
0426
0427 Q_ASSERT(strlen(yyTextLine) < size_t(yyTextLineLeng));
0428
0429 lastTextLeng = strlen(yytext);
0430 yymoreFlag = false;
0431 }
0432
0433 void yyerror(const char* msg )
0434 {
0435 Q_UNUSED(msg);
0436 KDevPG::checkOut << "** LEXICAL ERROR at line " << yyLine << " column " << currentOffset << Qt::endl;
0437
0438 char *current_end = yyTextLine + strlen(yyTextLine);
0439 char *p;
0440
0441 /* get the rest of the line if we are not already at the end */
0442 if(!endOfLine)
0443 {
0444 p = current_end;
0445 int c = ch;
0446
0447 while(c != EOF && c != '\n')
0448 {
0449 *p++ = c;
0450 c = inp();
0451 }
0452
0453 *p++ = '\n';
0454 *p = 0;
0455 }
0456
0457 /* yyTextLine[] now has the whole line, with the current token */
0458 /* at currentOffset */
0459
0460 /* print error message and current line */
0461 KDevPG::checkOut << yyTextLine;
0462
0463 /* print a ^ under the most recent token */
0464 KDevPG::checkOut << QString(currentOffset, ' ').append('^') << Qt::endl; /* currentOffset spaces, then ^ */
0465
0466 exit(EXIT_FAILURE);
0467 }
0468
0469 int yywrap() { return 1; }
0470