Warning, /kdevelop/kdevelop-pg-qt/kdev-pg/kdev-pg-lexer.ll is written in an unsupported language. File is not indexed.

0001 %{
0002 /* This file is part of kdev-pg-qt
0003    Copyright (C) 2006 Jakob Petsovits <jpetso@gmx.at>
0004 
0005    This library is free software; you can redistribute it and/or
0006    modify it under the terms of the GNU Library General Public
0007    License as published by the Free Software Foundation; either
0008    version 2 of the License, or (at your option) any later version.
0009 
0010    This library is distributed in the hope that it will be useful,
0011    but WITHOUT ANY WARRANTY; without even the implied warranty of
0012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0013    Library General Public License for more details.
0014 
0015    You should have received a copy of the GNU Library General Public License
0016    along with this library; see the file COPYING.LIB.  If not, write to
0017    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
0018    Boston, MA 02110-1301, USA.
0019 */
0020 
0021 #include <QDebug>
0022 
0023 #include "kdev-pg.h"
0024 #include "kdev-pg-parser.hh"
0025 
0026 #include <QFile>
0027 #include <QFileInfo>
0028 #include <QTextStream>
0029 
0030 int inp();
0031 void appendLineBuffer();
0032 void newline();
0033 void countNewlines(const char*, int);
0034 void yyerror(const char* );
0035 extern int yyLine;
0036 extern int currentOffset;
0037 extern bool yymoreFlag;
0038 
0039 namespace KDevPG
0040 {
0041   extern QFile file;
0042   extern QFileInfo fileInfo;
0043   extern QTextStream checkOut;
0044 }
0045 
0046 #define YYMORE yymoreFlag = true; yymore();
0047 
0048 #define YY_INPUT(buf, result, max_size) \
0049   { \
0050     int c = inp(); \
0051     result = (c == EOF) ? YY_NULL : (buf[0] = (char)c, 1); \
0052   }
0053 
0054 #define YY_USER_ACTION  appendLineBuffer();
0055 
0056 #define COPY_TO_YYLVAL(string, len) \
0057     yylval.str = (char*) calloc(len+1, sizeof(char)); \
0058     strncpy(yylval.str, string, len); \
0059     yylval.str[len] = '\0';
0060 #define COPY_CODE_TO_YYLVAL(string, len) \
0061     if(KDevPG::globalSystem.lineNumberPolicy == KDevPG::World::BeautifulCode) \
0062     { \
0063       COPY_TO_YYLVAL(string, len) \
0064     } \
0065     else \
0066     { \
0067       QByteArray tmp("\n#"); \
0068       if(KDevPG::globalSystem.lineNumberPolicy == KDevPG::World::CompatibilityLineNumbers) \
0069         tmp += "line"; \
0070       tmp += " " + QString::number(firstCodeLine).toLocal8Bit(); \
0071       tmp += " \"" + KDevPG::fileInfo.absoluteFilePath().toLocal8Bit() + "\""; \
0072       if(KDevPG::globalSystem.lineNumberPolicy == KDevPG::World::FullLineNumbers) \
0073         tmp += " 1"; \
0074       tmp += "\n"; \
0075       size_t memlen = tmp.size() + firstCodeColumn + len + 16 + 1; \
0076       yylval.str = (char*) calloc(memlen, sizeof(char)); \
0077       strncpy(yylval.str, tmp.data(), tmp.size()); \
0078       memset(yylval.str + tmp.size(), ' ', firstCodeColumn); \
0079       strncpy(yylval.str + tmp.size() + firstCodeColumn, string, len); \
0080       yylval.str[memlen-1] = '\0'; \
0081     }
0082 
0083 #define ESCAPE_CHARACTER(chr) \
0084         yylval.str = (char*) calloc(2, sizeof(char)); yylval.str[0] = chr; yylval.str[1] = '\0'; return T_STRING;
0085 
0086 namespace {
0087   enum RulePosition {
0088     RuleBody,
0089     RuleFooter,
0090     RuleLexer
0091   };
0092   RulePosition rulePosition = RuleBody;
0093   int openBrackets; // for rule arguments and regexp usage
0094   int firstCodeLine; // where the current code-block begins
0095   int firstCodeColumn;
0096 }
0097 
0098 #ifdef _WIN32
0099 #include <io.h>
0100 #else
0101 #include <unistd.h>
0102 #endif
0103 
0104 %}
0105 
0106 Whitespace  [ \f\t]
0107 Newline     "\r\n"|\r|\n
0108 String      ["]([^\r\n\"]|[\\][^\r\n])*["]
0109 Char        [_a-zA-Z0-9]|\\[xXuU][0-9a-fA-F]{1,6}|\\[oO][0-7][0-7]*|\\[dD][0-9]{1,7}|\\[yY][01]{1,21}|\\[\x20-\x7f]
0110 
0111 %x CODE
0112 %x PARSERCLASS
0113 %x RULE_ARGUMENTS
0114 %x RULE_PARAMETERS_HEADER
0115 %x RULE_PARAMETERS_VARNAME
0116 %x RULE_LEXER
0117 
0118 %%
0119 
0120 
0121 {Whitespace}*           /* skip */ ;
0122 {Newline}               newline();
0123 "--"[^\r\n]*            /* line comments, skip */ ;
0124 
0125 ";"+                    rulePosition = RuleBody; return ';';
0126 "->"                    if(rulePosition == RuleLexer) BEGIN(RULE_LEXER); else rulePosition = RuleFooter; return T_ARROW;
0127 ".="                    return T_INLINE;
0128 
0129 "("                     return '(';
0130 ")"                     return ')';
0131 "{"                     return '{';
0132 "}"                     return '}';
0133 ","                     return ',';
0134 "0"                     return '0';
0135 "#"                     return '#';
0136 "."                     return '.';
0137 ":"                     return ':';
0138 "="                     return '=';
0139 "+"                     return '+';
0140 "*"                     return '*';
0141 "?"                     return '?';
0142 "@"                     return '@';
0143 "|"                     return '|';
0144 "&"                     return '&';
0145 
0146 "try/recover"           return T_TRY_RECOVER;
0147 "try/rollback"          return T_TRY_ROLLBACK;
0148 "catch"                 return T_CATCH;
0149 
0150 "%export_macro"         return T_EXPORT_MACRO;
0151 "%export_macro_header"  return T_EXPORT_MACRO_HEADER;
0152 "%token"                return T_TOKEN_DECLARATION;
0153 "%token_stream"         return T_TOKEN_STREAM_DECLARATION;
0154 "%namespace"            return T_NAMESPACE_DECLARATION;
0155 "%parserclass"          BEGIN(PARSERCLASS); return T_PARSERCLASS_DECLARATION;
0156 "%lexerclass"           BEGIN(PARSERCLASS); return T_LEXERCLASS_DECLARATION;
0157 "%input_stream"         return T_INPUT_STREAM;
0158 "%ast_extra_members"    return T_AST_DECLARATION;
0159 "%parser_declaration_header"   return T_PARSER_DECLARATION_HEADER;
0160 "%parser_bits_header"   return T_PARSER_BITS_HEADER;
0161 "%ast_header"           return T_AST_HEADER;
0162 "%lexer_declaration_header"   return T_LEXER_DECLARATION_HEADER;
0163 "%lexer_bits_header" return T_LEXER_BITS_HEADER;
0164 "%input_encoding"       return T_INPUT_ENCODING;
0165 "%table_lexer"          return T_TABLE_LEXER;
0166 "%sequence_lexer"       return T_SEQUENCE_LEXER;
0167 "%ast_base"             return T_AST_BASE;
0168 "%parser_base"          return T_PARSER_BASE;
0169 "%lexer_base"           return T_LEXER_BASE;
0170 "%bin"                  return T_BIN;
0171 "%pre"                  return T_PRE;
0172 "%post"                 return T_POST;
0173 "%tern"                 return T_TERN;
0174 "%paren"                return T_PAREN;
0175 "%priority"             return T_PRIORITY;
0176 "%<"                    rulePosition = RuleBody; return T_LOPR;
0177 "%>"                    rulePosition = RuleFooter; return T_ROPR;
0178 "%left"                 return T_LEFT_ASSOC;
0179 "%right"                return T_RIGHT_ASSOC;
0180 "%isLeft"               return T_IS_LEFT_ASSOC;
0181 "%isRight"              return T_IS_RIGHT_ASSOC;
0182 "%lexer"                rulePosition = RuleLexer; return T_LEXER;
0183 
0184 <PARSERCLASS>{
0185   {Whitespace}*           /* skip */ ;
0186   {Newline}               newline();
0187   "("                     return '(';
0188   "public"                return T_PUBLIC;
0189   "private"               return T_PRIVATE;
0190   "protected"             return T_PROTECTED;
0191   "declaration"           return T_DECLARATION;
0192   "constructor"           return T_CONSTRUCTOR;
0193   "destructor"            return T_DESTRUCTOR;
0194   "bits"                  return T_BITS;
0195   ")"                     BEGIN(INITIAL); return ')';
0196   .                       BEGIN(INITIAL); REJECT; /* everything else */
0197 }
0198 
0199 
0200 "[" {
0201     if (rulePosition == RuleBody) { /* use the arguments in a rule call */
0202       firstCodeLine = yyLine;
0203       openBrackets = 0;
0204       BEGIN(RULE_ARGUMENTS);
0205     }
0206     else if (rulePosition == RuleFooter) { /* declare the arguments */
0207       BEGIN(RULE_PARAMETERS_HEADER); return '[';
0208     }
0209 }
0210 
0211 <RULE_LEXER>{
0212   "--"[^\r\n]*            /* line comments, skip */ ;
0213   {Newline}               newline();
0214   "{"[a-zA-Z_][a-zA-Z_0-9]*"}"          ++yytext; COPY_TO_YYLVAL(yytext,yyleng-2); return T_NAMED_REGEXP;
0215   ";"+(("--"[^\r\n]*[\r\n])|[ \f\t\r\n])+/";"+   countNewlines(yytext, yyleng); rulePosition = RuleBody; BEGIN(INITIAL); return ';';
0216   ";"+                    return ';';
0217   "["                     ++openBrackets; return '[';
0218   "]"                     --openBrackets; return ']';
0219   "("                     ++openBrackets; return '(';
0220   ")"                     --openBrackets; return ')';
0221   "?"                     return '?';
0222   "|"                     return '|';
0223   "^"                     return '^';
0224   {Char}"-"{Char}         COPY_TO_YYLVAL(yytext,yyleng); return T_RANGE;
0225   "&"                     return '&';
0226   "~"                     return '~';
0227   "*"                     return '*';
0228   "+"                     return '+';
0229   "@"                     return '@';
0230   "."                     return '.';
0231   "->"                    return T_ARROW;
0232   "%continue"             return T_CONTINUE;
0233   "%fail"                 return T_FAIL;
0234   "%enter"                return T_ENTER_RULE_SET;
0235   "%leave"                return T_LEAVE_RULE_SET;
0236   "%la"                   return T_LOOKAHEAD;
0237   "%ba"                   return T_BARRIER;
0238   "[:"                    firstCodeLine = yyLine; firstCodeColumn = currentOffset + 2; BEGIN(CODE);
0239   [_A-Z]+/[ \f\t\r\n]*";" COPY_TO_YYLVAL(yytext,yyleng); return T_TERMINAL;
0240   [_a-zA-Z0-9]+/[ \f\t\r\n]*";" COPY_TO_YYLVAL(yytext,yyleng); return T_IDENTIFIER;
0241   {Char}+ COPY_TO_YYLVAL(yytext,yyleng); return T_UNQUOTED_STRING;
0242   {Whitespace}            /* skip */
0243   {String}                yytext++; COPY_TO_YYLVAL(yytext,yyleng-2); return T_STRING;
0244   
0245   <<EOF>> {
0246     BEGIN(INITIAL); // is not set automatically by yyrestart()
0247     KDevPG::checkOut << "** ERROR Encountered end of file in an unclosed rule lexer definition..." << Qt::endl;
0248     yyerror("");
0249     return 0;
0250   }
0251 }
0252 
0253 <RULE_ARGUMENTS>{
0254   {Newline}               newline(); YYMORE;
0255   {String}                YYMORE; /* this and... */
0256   ["]                     YYMORE; /* ...this prevent brackets inside strings to be counted */
0257   [^\[\]\n\r\"]*          YYMORE; /* gather everything that's not a bracket, and append what comes next */
0258   "["                     openBrackets++; YYMORE;
0259   "]" {
0260       openBrackets--;
0261       if (openBrackets < 0) {
0262         COPY_CODE_TO_YYLVAL(yytext,(yyleng-1)); /* cut off the trailing bracket */
0263         BEGIN(INITIAL);
0264         return T_RULE_ARGUMENTS;
0265       }
0266   }
0267   <<EOF>> {
0268       BEGIN(INITIAL); // is not set automatically by yyrestart()
0269       KDevPG::checkOut << "** ERROR Encountered end of file in an unclosed rule argument specification..." << Qt::endl;
0270       yyerror("");
0271       return 0;
0272   }
0273 }
0274 
0275 <RULE_PARAMETERS_HEADER>{
0276   {Whitespace}*           /* skip */ ;
0277   {Newline}               newline();
0278   "--"[^\r\n]*            /* line comments, skip */ ;
0279   ":"{Whitespace}*        BEGIN(RULE_PARAMETERS_VARNAME); return ':';
0280   "#"                     return '#';
0281   "member"                return T_MEMBER;
0282   "temporary"             return T_TEMPORARY;
0283   "argument"              return T_ARGUMENT;
0284   "node"                  return T_NODE;
0285   "token"                 return T_TOKEN;
0286   "variable"              return T_VARIABLE;
0287   ";"                     return ';';  /* only used for "token" types */
0288   [_a-zA-Z]*[_a-zA-Z0-9]+           COPY_TO_YYLVAL(yytext,yyleng); return T_IDENTIFIER;
0289   "]"                     BEGIN(INITIAL); return ']';
0290   .                       BEGIN(INITIAL); REJECT; /* everything else */
0291 }
0292 
0293 <RULE_PARAMETERS_VARNAME>{
0294   {Newline}               newline(); YYMORE;
0295   [^;\r\n]*               YYMORE; /* gather everything that's not a semicolon, and append what comes next */
0296   ";" {
0297       // strip trailing whitespace
0298       int length = yyleng-1; // and first, the trailing semicolon
0299       for (int i = length-1; i < 1; i--) {
0300         switch(yytext[i-1])
0301         {
0302           case ' ':
0303           case '\f':
0304           case '\t':
0305             continue;
0306           default:
0307             length = i;
0308             break;
0309         }
0310       }
0311       COPY_TO_YYLVAL(yytext,length);
0312       BEGIN(RULE_PARAMETERS_HEADER);
0313       return T_IDENTIFIER;
0314   }
0315   .                       BEGIN(INITIAL); REJECT; /* everything else */
0316 }
0317 
0318 
0319 "[:"                    firstCodeLine = yyLine; firstCodeColumn = currentOffset + 2; BEGIN(CODE);
0320 <CODE>{
0321   {Newline}               newline(); YYMORE;
0322   [^:\n\r]*               YYMORE; /* gather everything that's not a colon, and append what comes next */
0323   ":"+[^:\]\n\r]*         YYMORE; /* also gather colons that are not followed by colons or newlines */
0324   ":]" {
0325       COPY_CODE_TO_YYLVAL(yytext, (yyleng-2)); /* cut off the trailing stuff */
0326       if(rulePosition == RuleLexer)
0327         BEGIN(RULE_LEXER);
0328       else
0329         BEGIN(INITIAL);
0330       return T_CODE;
0331   }
0332   <<EOF>> {
0333       BEGIN(INITIAL); // is not set automatically by yyrestart()
0334       KDevPG::checkOut << "** ERROR Encountered end of file in an unclosed code segment..." << Qt::endl;
0335       yyerror("");
0336       return 0;
0337   }
0338 }
0339 
0340 
0341 [_A-Z]+                 COPY_TO_YYLVAL(yytext,yyleng); return T_TERMINAL;
0342 [_a-zA-Z][_a-zA-Z0-9]*           COPY_TO_YYLVAL(yytext,yyleng); return T_IDENTIFIER;
0343 [0-9]+                  COPY_TO_YYLVAL(yytext,yyleng); return T_NUMBER;
0344 
0345 
0346 {String} {
0347    yytext++;                         /* start inside the quotes */
0348    COPY_TO_YYLVAL(yytext,yyleng-2);  /* cut off the trailing quote */
0349    return T_STRING;
0350 }
0351 
0352 . {
0353   KDevPG::checkOut << "Unexpected character: ``" << yytext[0] << "''" << Qt::endl;
0354   yyerror("");
0355 }
0356 
0357 
0358 %%
0359 
0360 char ch;
0361 int yyLine = 1, currentOffset = 0;
0362 bool endOfLine = false, yymoreFlag = false;
0363 int yyTextLineLeng = 1024;
0364 char *yyTextLine = (char*)malloc(yyTextLineLeng);
0365 
0366 int inp()
0367 {
0368   if( KDevPG::file.atEnd() )
0369     return EOF;
0370   KDevPG::file.getChar( &ch );
0371   return ch;
0372 }
0373 
0374 void newline()
0375 {
0376   ++yyLine;
0377   endOfLine = true;
0378 }
0379 
0380 void countNewlines(const char* code, int leng)
0381 {
0382   for(int i = 0; i != leng; ++i)
0383     if(code[i] == '\n')
0384       ++yyLine;
0385 }
0386 
0387 /* initialize the line buffer */
0388 void clearLineBuffer()
0389 {
0390   yyTextLine[0] = '\0';
0391   currentOffset = 0;
0392   endOfLine = false;
0393 }
0394 
0395 struct InitLineBuffer
0396 {
0397   InitLineBuffer()
0398   {
0399     clearLineBuffer();
0400   }
0401 } _initLineBuffer;
0402 
0403  /* add the current token to the current line */
0404 void appendLineBuffer()
0405 {
0406   if (endOfLine == true)
0407     clearLineBuffer();
0408   
0409   static int lastTextLeng = 0;
0410   
0411   currentOffset = strlen(yyTextLine); /* start of current */
0412   
0413   int newLeng = currentOffset + strlen(yytext) - (yymoreFlag ? lastTextLeng : 0) + 1;
0414   if(newLeng > yyTextLineLeng)
0415   {
0416     do
0417     {
0418       yyTextLineLeng *= 2;
0419     }
0420     while(newLeng > yyTextLineLeng);
0421     yyTextLine = (char*)realloc(yyTextLine, yyTextLineLeng);
0422   }
0423   
0424   strcpy(yyTextLine+currentOffset, yytext + (yymoreFlag ? lastTextLeng : 0)); /* append current */
0425   /* strcpy is faster than strcat */
0426   
0427   Q_ASSERT(strlen(yyTextLine) < size_t(yyTextLineLeng));
0428   
0429   lastTextLeng = strlen(yytext);
0430   yymoreFlag = false;
0431 }
0432 
0433 void yyerror(const char* msg )
0434 {
0435   Q_UNUSED(msg);
0436   KDevPG::checkOut << "** LEXICAL ERROR at line " << yyLine << " column " << currentOffset << Qt::endl;
0437 
0438   char *current_end = yyTextLine + strlen(yyTextLine);
0439   char *p;
0440 
0441   /* get the rest of the line if we are not already at the end */
0442   if(!endOfLine)
0443     {
0444       p = current_end;
0445       int c = ch;
0446 
0447       while(c != EOF && c != '\n')
0448         {
0449           *p++ = c;
0450           c = inp();
0451         }
0452 
0453       *p++ = '\n';
0454       *p = 0;
0455     }
0456 
0457   /* yyTextLine[] now has the whole line, with the current token */
0458   /* at currentOffset */
0459 
0460   /* print error message and current line */
0461   KDevPG::checkOut << yyTextLine;
0462 
0463   /* print a ^ under the most recent token */
0464   KDevPG::checkOut << QString(currentOffset, ' ').append('^') << Qt::endl; /* currentOffset spaces, then ^ */
0465 
0466   exit(EXIT_FAILURE);
0467 }
0468 
0469 int yywrap() { return 1; }
0470