Warning, /kdevelop/kdevelop-pg-qt/examples/cc/cc.ll is written in an unsupported language. File is not indexed.
0001 %{ 0002 /*-- Copyright (C) 2009 Jonathan Schmidt-Dominé <devel@the-user.org> 0003 -- Derived from the KDevelop-Java-Lexer 0004 -- 0005 -- This library is free software; you can redistribute it and/or 0006 -- modify it under the terms of the GNU Library General Public 0007 -- License as published by the Free Software Foundation; either 0008 -- version 2 of the License, or (at your option) any later version. 0009 -- 0010 -- This library is distributed in the hope that it will be useful, 0011 -- but WITHOUT ANY WARRANTY; without even the implied warranty of 0012 -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 0013 -- Library General Public License for more details. 0014 -- 0015 -- You should have received a copy of the GNU Library General Public License 0016 -- along with this library; see the file COPYING.LIB. If not, write to 0017 -- the Free Software Foundation, Inc., 59 Temple Place - Suite 330, 0018 -- Boston, MA 02111-1307, USA.*/ 0019 %} 0020 0021 %option c++ 0022 %option yyclass="cc::Lexer" 0023 %option noyywrap 0024 0025 0026 %{ 0027 0028 #define DONT_INCLUDE_FLEXLEXER 0029 #include "lexer.h" 0030 #include <QDebug> 0031 0032 %} 0033 0034 0035 /* UTF-8 sequences, generated with the Unicode.hs script from 0036 * http://lists.gnu.org/archive/html/help-flex/2005-01/msg00043.html */ 0037 0038 /* \u0024, \u0041-\u005a, \u005f, \u0061-\u007a: one byte in UTF-8 */ 0039 Letter1 [A-Za-z_$] 0040 /* \u00c0-\u00d6, \u00d8-\u00f6, \u00f8-\u00ff */ 0041 Letter2 [\xC3]([\x80-\x96]|[\x98-\xB6]|[\xB8-\xBF]) 0042 /* \u0100-\u1fff */ 0043 Letter3 [\xC4-\xDF][\x80-\xBF]|([\xE0][\xA0-\xBF]|[\xE1][\x80-\xBF])[\x80-\xBF] 0044 /* \u3040-\u318f */ 0045 Letter4 [\xE3]([\x86][\x80-\x8F]|[\x81-\x85][\x80-\xBF]) 0046 /* \u3300-\u337f */ 0047 Letter5 [\xE3][\x8C-\x8D][\x80-\xBF] 0048 /* \u3400-\u3d2d */ 0049 Letter6 [\xE3](\xB4[\x80-\xAD]|[\x90-\xB3][\x80-\xBF]) 0050 /* \u4e00-\u9fff */ 0051 Letter7 ([\xE4][\xB8-\xBF]|[\xE5-\xE9][\x80-\xBF])[\x80-\xBF] 0052 /* \uf900-\ufaff */ 0053 Letter8 [\xEF][\xA4-\xAB][\x80-\xBF] 0054 0055 Letter {Letter1}|{Letter2}|{Letter3}|{Letter4}|{Letter5}|{Letter6}|{Letter7}|{Letter8} 0056 0057 /* \u0030-\u0039: ISO-LATIN-1 digits */ 0058 Digit1 [0-9] 0059 /* \u0660-\u0669, \u06f0-\u06f9: Arabic-Indic and extended Ar.-Indic digits */ 0060 Digit2 [\xD9][\xA0-\xA9]|[\xDB][\xB0-\xB9] 0061 /* \u0966-\u096f, \u09e6-\u09ef: Devanagari digits */ 0062 Digit3 [\xE0]([\xA5]|[\xA7])[\xA6-\xAF] 0063 /* \u0a66-\u0a6f, \u0ae6-\u0aef */ 0064 Digit4 [\xE0]([\xA9]|[\xAB])[\xA6-\xAF] 0065 /* \u0b66-\u0b6f, \u0be7-\u0bef */ 0066 Digit5 [\xE0]([\xAD][\xA6-\xAF]|[\xAF][\xA7-\xAF]) 0067 /* \u0c66-\u0c6f, \u0ce6-\u0cef, \u0d66-\u0d6f */ 0068 Digit6 [\xE0]([\xB1]|[\xB3]|[\xB5])[\xA6-\xAF] 0069 /* \u0e50-\u0e59, \u0ed0-\u0ed9 */ 0070 Digit7 [\xE0]([\xB9]|[\xBB])[\x90-\x99] 0071 /* \u1040-\u1049 */ 0072 Digit8 [\xE1][\x81][\x80-\x89] 0073 /* \uff10-\uff19: Fullwidth digits */ 0074 Digit9 [\xEF][\xBC][\x90-\x99] 0075 0076 /* \u0080-\uffff */ 0077 Multibyte1 ([\xC2-\xDF]|[\xE0][\xA0-\xBF]|[\xE1-\xEF][\x80-\xBF])[\x80-\xBF] 0078 /* \u10000-\u1fffff */ 0079 Multibyte2 ([\xF0][\x90-\xBF]|[\xF1-\xF7][\x80-\xBF])[\x80-\xBF][\x80-\xBF] 0080 /* \u200000-\u3ffffff */ 0081 Multibyte3 ([\xF8][\x88-\xBF]|[\xF9-\xFB][\x80-\xBF])[\x80-\xBF][\x80-\xBF][\x80-\xBF] 0082 /* \u4000000-\u7fffffff */ 0083 Multibyte4 ([\xFC][\x84-\xBF]|[\xFD][\x80-\xBF])[\x80-\xBF][\x80-\xBF][\x80-\xBF] 0084 /* Any multi-byte Unicode character. Single-byte ones are just . in lex. */ 0085 Multibyte {Multibyte1}|{Multibyte2}|{Multibyte3}|{Multibyte4} 0086 0087 0088 /* non-Unicode stuff */ 0089 0090 HexDigit [0-9a-fA-F] 0091 Digit {Digit1}|{Digit2}|{Digit3}|{Digit4}|{Digit5}|{Digit6}|{Digit7}|{Digit8}|{Digit9} 0092 OctalDigit [0-7] 0093 NonZeroDigit [1-9] 0094 0095 UnicodeEscape [\\][u]+{HexDigit}{HexDigit}{HexDigit}{HexDigit} 0096 OctalEscape [\\]{OctalDigit}({Digit}({Digit})?)? 0097 SimpleEscape [\\]([']|["]|[\\]|[rnbft]) 0098 Escape {SimpleEscape}|{UnicodeEscape}|{OctalEscape} 0099 0100 IntSuffix [Ll] 0101 DecimalNum ([0]|{NonZeroDigit}{Digit}*){IntSuffix}? 0102 OctalNum [0]{OctalDigit}+{IntSuffix}? 0103 HexNum [0][xX]{HexDigit}+{IntSuffix}? 0104 IntegerLiteral {DecimalNum}|{OctalNum}|{HexNum} 0105 0106 Sign [+-] 0107 FloatSuffix [fF]|[dD] 0108 SignedInt {Sign}?{Digit}+ 0109 DecimalExponent [eE]{SignedInt}? 0110 BinaryExponent [pP]{SignedInt}? 0111 Float1 {Digit}+[\.]{Digit}*{DecimalExponent}?{FloatSuffix}? 0112 Float2 [\.]{Digit}+{DecimalExponent}?{FloatSuffix}? 0113 Float3 {Digit}+{DecimalExponent}{FloatSuffix}? 0114 Float4 {Digit}+{DecimalExponent}?{FloatSuffix} 0115 HexFloatNum [0][xX]{HexDigit}*[\.]{HexDigit}+ 0116 HexFloat1 {HexNum}[\.]?{BinaryExponent}{FloatSuffix}? 0117 HexFloat2 {HexFloatNum}{BinaryExponent}{FloatSuffix}? 0118 FloatingPoint {Float1}|{Float2}|{Float3}|{Float4}|{HexFloat1}|{HexFloat2} 0119 0120 %x IN_BLOCKCOMMENT 0121 0122 %% 0123 0124 /* whitespace, newlines, preprocessor-statements and comments */ 0125 0126 [ \f\t]+ /* skip */ ; 0127 [\n] /* skip */ ; 0128 0129 "//"[^\n]* /* line comments, skip */ ; 0130 "#"[^\n]* /* preprocessor statement, skip */ ; 0131 0132 "/*" BEGIN(IN_BLOCKCOMMENT); 0133 <IN_BLOCKCOMMENT>{ 0134 [^*\n]* /* eat anything that's not a '*' */ ; 0135 "*"+[^*/\n]* /* eat up '*'s that are not followed by slashes or newlines */; 0136 [\n] /* skip */ ; 0137 "*"+"/" BEGIN(INITIAL); 0138 <<EOF>> { 0139 qWarning() << "Encountered end of file in an unclosed block comment"; 0140 return Parser::Token_EOF; 0141 } 0142 } 0143 0144 0145 /* separators */ 0146 0147 "(" return Parser::Token_LPAREN; 0148 ")" return Parser::Token_RPAREN; 0149 "{" return Parser::Token_LBRACE; 0150 "}" return Parser::Token_RBRACE; 0151 "[" return Parser::Token_LBRACKET; 0152 "]" return Parser::Token_RBRACKET; 0153 "," return Parser::Token_COMMA; 0154 ";" return Parser::Token_SEMICOLON; 0155 "." return Parser::Token_DOT; 0156 "->" return Parser::Token_ARROW; 0157 ":" return Parser::Token_COLON; 0158 0159 0160 /* operators */ 0161 0162 "?" return Parser::Token_QUESTION; 0163 "!" return Parser::Token_NOT; 0164 "~" return Parser::Token_TILDE; 0165 "==" return Parser::Token_EQUAL_EQUAL; 0166 "<" return Parser::Token_LESS; 0167 "<=" return Parser::Token_LESS_EQUAL; 0168 ">" return Parser::Token_GREATER; 0169 ">=" return Parser::Token_GREATER_EQUAL; 0170 "!=" return Parser::Token_NOT_EQUAL; 0171 "&&" return Parser::Token_AND_AND; 0172 "||" return Parser::Token_OR_OR; 0173 "++" return Parser::Token_PLUS_PLUS; 0174 "--" return Parser::Token_MINUS_MINUS; 0175 "=" return Parser::Token_EQUAL; 0176 "+" return Parser::Token_PLUS; 0177 "+=" return Parser::Token_PLUS_EQUAL; 0178 "-" return Parser::Token_MINUS; 0179 "-=" return Parser::Token_MINUS_EQUAL; 0180 "*" return Parser::Token_STAR; 0181 "*=" return Parser::Token_STAR_EQUAL; 0182 "/" return Parser::Token_DIVIDE; 0183 "/=" return Parser::Token_DIVIDE_EQUAL; 0184 "&" return Parser::Token_AND; 0185 "&=" return Parser::Token_AND_EQUAL; 0186 "|" return Parser::Token_OR; 0187 "|=" return Parser::Token_OR_EQUAL; 0188 "^" return Parser::Token_XOR; 0189 "^=" return Parser::Token_XOR_EQUAL; 0190 "%" return Parser::Token_REMAINDER; 0191 "%=" return Parser::Token_REMAINDER_EQUAL; 0192 "<<" return Parser::Token_LSHIFT; 0193 "<<=" return Parser::Token_LSHIFT_EQUAL; 0194 ">>" return Parser::Token_RSHIFT; 0195 ">>=" return Parser::Token_RSHIFT_EQUAL; 0196 "..." return Parser::Token_ELLIPSIS; 0197 0198 /* keywords */ 0199 0200 "break" return Parser::Token_BREAK; 0201 "case" return Parser::Token_CASE; 0202 "continue" return Parser::Token_CONTINUE; 0203 "default" return Parser::Token_DEFAULT; 0204 "do" return Parser::Token_DO; 0205 "else" return Parser::Token_ELSE; 0206 "enum" return Parser::Token_ENUM; 0207 "for" return Parser::Token_FOR; 0208 "goto" return Parser::Token_GOTO; 0209 "if" return Parser::Token_IF; 0210 "return" return Parser::Token_RETURN; 0211 "switch" return Parser::Token_SWITCH; 0212 "while" return Parser::Token_WHILE; 0213 "static" return Parser::Token_STATIC; 0214 "volatile" return Parser::Token_VOLATILE; 0215 "__volatile__" return Parser::Token_VOLATILE; 0216 "const" return Parser::Token_CONST; 0217 "typedef" return Parser::Token_TYPEDEF; 0218 "extern" return Parser::Token_EXTERN; 0219 "auto" return Parser::Token_AUTO; 0220 "register" return Parser::Token_REGISTER; 0221 "void" return Parser::Token_VOID; 0222 "int" return Parser::Token_INT; 0223 "char" return Parser::Token_CHAR; 0224 "short" return Parser::Token_SHORT; 0225 "long" return Parser::Token_LONG; 0226 "signed" return Parser::Token_SIGNED; 0227 "unsigned" return Parser::Token_UNSIGNED; 0228 "float" return Parser::Token_FLOAT; 0229 "double" return Parser::Token_DOUBLE; 0230 "union" return Parser::Token_UNION; 0231 "asm" return Parser::Token_ASM; 0232 "__asm__" return Parser::Token_ASM; 0233 "__extension__" return Parser::Token_EXTENSION; 0234 "__inline" return Parser::Token_INLINE; 0235 "inline" return Parser::Token_INLINE; 0236 0237 /* characters and strings (C with unicode-support) */ 0238 0239 [']({Escape}|{Multibyte}|[^\\\n\'])['] return Parser::Token_X_CONSTANT; 0240 [']({Escape}|{Multibyte}|[\\][^\\\n\']|[^\\\n\'])*([\\]?[\n]|[']) { 0241 qWarning() << QString("Invalid character literal: %1").arg(yytext); 0242 return Parser::Token_X_CONSTANT; 0243 } 0244 0245 ["]({Escape}|{Multibyte}|[^\\\n\"])*["] return Parser::Token_STRING_LITERAL; 0246 ["]({Escape}|{Multibyte}|[\\][^\\\n\"]|[^\\\n\"])*([\\]?[\n]|["]) { 0247 qWarning() << QString("Invalid string literal: %1").arg(yytext); 0248 return Parser::Token_STRING_LITERAL; 0249 } 0250 0251 0252 /* identifiers and number literals */ 0253 0254 {Letter}({Letter}|{Digit})* return Parser::Token_IDENTIFIER; 0255 0256 {IntegerLiteral} return Parser::Token_X_CONSTANT; 0257 {FloatingPoint} return Parser::Token_X_CONSTANT; 0258 0259 0260 /* everything else is not a valid lexeme */ 0261 0262 . { 0263 qWarning() << "INVALID TOKEN"; 0264 exit(-1); 0265 } 0266 0267 %% 0268 0269 namespace cc 0270 { 0271 0272 Lexer::Lexer( Parser *parser, char *contents ) 0273 { 0274 restart( parser, contents ); 0275 } 0276 0277 void Lexer::restart( Parser *parser, char *contents ) 0278 { 0279 m_parser = parser; 0280 m_locationTable = parser->tokenStream->locationTable(); 0281 m_contents = contents; 0282 m_tokenBegin = m_tokenEnd = 0; 0283 m_currentOffset = 0; 0284 0285 // check for and ignore the UTF-8 byte order mark 0286 unsigned char *ucontents = (unsigned char *) m_contents; 0287 if ( ucontents[0] == 0xEF && ucontents[1] == 0xBB && ucontents[2] == 0xBF ) 0288 { 0289 m_tokenBegin = m_tokenEnd = 3; 0290 m_currentOffset = 3; 0291 } 0292 0293 yyrestart(NULL); 0294 BEGIN(INITIAL); // is not set automatically by yyrestart() 0295 } 0296 0297 // reads a character, and returns 1 as the number of characters read 0298 // (or 0 when the end of the string is reached) 0299 int Lexer::LexerInput( char *buf, int /*max_size*/ ) 0300 { 0301 int c = m_contents[ m_currentOffset++ ]; 0302 0303 switch(c) 0304 { 0305 case '\r': 0306 c = '\n'; // only have one single line break character: '\n' 0307 if ( m_contents[m_currentOffset + 1] == '\n' ) 0308 { 0309 m_currentOffset++; 0310 m_tokenEnd++; 0311 } 0312 0313 // fall through 0314 case '\n': 0315 m_locationTable->newline( m_currentOffset - 1 ); 0316 break; 0317 0318 default: 0319 break; 0320 } 0321 0322 return (c == 0) ? 0 : (buf[0] = c, 1); 0323 } 0324 0325 } // end of namespace cc