Warning, /utilities/kregexpeditor/src/qregexpparser.l is written in an unsupported language. File is not indexed.
0001 /* 0002 * Copyright (c) 2002-2003 Jesper K. Pedersen <blackie@kde.org> 0003 * 0004 * This library is free software; you can redistribute it and/or 0005 * modify it under the terms of the GNU Library General Public 0006 * License version 2 as published by the Free Software Foundation. 0007 * 0008 * This library is distributed in the hope that it will be useful, 0009 * but WITHOUT ANY WARRANTY; without even the implied warranty of 0010 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 0011 * Library General Public License for more details. 0012 * 0013 * You should have received a copy of the GNU Library General Public License 0014 * along with this library; see the file COPYING.LIB. If not, write to 0015 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 0016 * Boston, MA 02110-1301, USA. 0017 **/ 0018 %option noyywrap 0019 %option nounput 0020 %option reentrant 0021 %option bison-bridge 0022 0023 %{ 0024 0025 #include "textrangeregexp.h" 0026 #include "gen_qregexpparser.hh" 0027 #include "qregexpparsercommon.h" 0028 0029 void parseRange( const char* txt, int* min, int* max ); 0030 RegExp* parseCharClass( const char* match ); 0031 %} 0032 0033 Escape \\. 0034 BackRef \\[1-9][0-9]* 0035 CharClass \[^?\]?[^]]*\] 0036 Range \{[0-9]*(,[0-9]*)?\} 0037 HexChar \\x[0-9a-fA-F]{1,4} 0038 OctChar \\0[0-7]{1,4} 0039 SpecialEsc \\[afnrtv] 0040 %% 0041 "\\b" return TOK_PosWordChar; 0042 "\\B" return TOK_PosNonWordChar; 0043 "\\d" { 0044 TextRangeRegExp* regexp = new TextRangeRegExp( false ); 0045 regexp->setDigit( true ); 0046 yylval->regexp = regexp; 0047 return TOK_CharClass; 0048 } 0049 "\\D" { 0050 TextRangeRegExp* regexp = new TextRangeRegExp( false ); 0051 regexp->setNonDigit( true ); 0052 yylval->regexp = regexp; 0053 return TOK_CharClass; 0054 } 0055 "\\s" { 0056 TextRangeRegExp* regexp = new TextRangeRegExp( false ); 0057 regexp->setSpace( true ); 0058 yylval->regexp = regexp; 0059 return TOK_CharClass; 0060 } 0061 "\\S" { 0062 TextRangeRegExp* regexp = new TextRangeRegExp( false ); 0063 regexp->setNonSpace( true ); 0064 yylval->regexp = regexp; 0065 return TOK_CharClass; 0066 } 0067 "\\w" { 0068 TextRangeRegExp* regexp = new TextRangeRegExp( false ); 0069 regexp->setWordChar( true ); 0070 yylval->regexp = regexp; 0071 return TOK_CharClass; 0072 } 0073 "\\W" { 0074 TextRangeRegExp* regexp = new TextRangeRegExp( false ); 0075 regexp->setNonWordChar( true ); 0076 yylval->regexp = regexp; 0077 return TOK_CharClass; 0078 } 0079 {SpecialEsc} { 0080 TextRangeRegExp* regexp = new TextRangeRegExp( false ); 0081 regexp->addCharacter( QString::fromLocal8Bit( yytext ) ); 0082 yylval->regexp = regexp; 0083 return TOK_CharClass; 0084 } 0085 0086 {HexChar} { 0087 TextRangeRegExp* regexp = new TextRangeRegExp( false ); 0088 regexp->addCharacter( QString::fromLocal8Bit(yytext) ); 0089 yylval->regexp = regexp; 0090 return TOK_CharClass; 0091 } 0092 {OctChar} { 0093 TextRangeRegExp* regexp = new TextRangeRegExp( false ); 0094 regexp->addCharacter( QString::fromLocal8Bit(yytext) ); 0095 yylval->regexp = regexp; 0096 return TOK_CharClass; 0097 } 0098 "." return TOK_Dot; 0099 "$" return TOK_Dollar; 0100 "^" return TOK_Carat; 0101 "(?:" return TOK_MagicLeftParent; 0102 "(?=" return TOK_PosLookAhead; 0103 "(?!" return TOK_NegLookAhead; 0104 "(" return TOK_LeftParen; 0105 ")" return TOK_RightParent; 0106 "|" return TOK_Bar; 0107 "*" { yylval->range.min = 0; yylval->range.max=-1; return TOK_Quantifier; } 0108 "?" { yylval->range.min = 0; yylval->range.max=1; return TOK_Quantifier; } 0109 "+" { yylval->range.min = 1; yylval->range.max=-1; return TOK_Quantifier; } 0110 {Range} { parseRange( yytext, &yylval->range.min, &yylval->range.max ); return TOK_Quantifier; } 0111 {CharClass} { yylval->regexp = parseCharClass(yytext); return TOK_CharClass; } 0112 {BackRef} { yylval->backRef = atoi( yytext+1 ); return TOK_BackRef; } 0113 {Escape} { yylval->ch = yytext[1]; return TOK_EscapeChar; } 0114 . { yylval->ch = yytext[0]; return TOK_Char; } 0115 0116 %% 0117 0118 void scannerInit( yyscan_t *scanner, struct parse_context *context, const QString& qstr ) 0119 { 0120 yylex_init( scanner ); 0121 yyset_extra( context, *scanner ); 0122 QByteArray cstr; 0123 if ( !qstr.isNull() ) 0124 cstr = qstr.toLatin1(); 0125 yy_switch_to_buffer( yy_scan_string( cstr.constData(), *scanner ), *scanner ); 0126 } 0127 0128 void scannerDestroy( yyscan_t scanner ) 0129 { 0130 yylex_destroy( scanner ); 0131 } 0132 0133 /** 0134 This function parses a range in a form similar to "{3,4}", "{,7}" 0135 etc. and returns the value in the integers pointed to by min and max. 0136 */ 0137 void parseRange( const char* txt, int* min, int* max ) 0138 { 0139 0140 /* 0141 case txt min max 0142 1 {} 0 -1 0143 2 {,} 0 -1 0144 3 {5} 5 5 0145 4 {5,} 5 -1 0146 5 {,7} 0 7 0147 6 {5,7} 5 7 0148 */ 0149 char c; 0150 int i = 1; 0151 int minimum=0, maximum=0; 0152 int minFound=0, maxFound=0, commaFound = 0; 0153 0154 while ( (c = txt[i++]) != ',' && c != '}') { 0155 minimum = minimum*10+ c-'0'; 0156 minFound=1; 0157 } 0158 0159 if ( c == ',' ) 0160 commaFound = 1; 0161 0162 if ( c != '}' ) { 0163 while ( (c = txt[i++]) != '}') { 0164 maximum = maximum*10+ c-'0'; 0165 maxFound = 1; 0166 } 0167 } 0168 0169 *min = minimum; 0170 if ( maxFound ) 0171 *max = maximum; /* case 5,6 */ 0172 else if ( !minFound ) 0173 *max = -1; /* case 1,2 */ 0174 else if ( commaFound ) 0175 *max = -1; /* case 4 */ 0176 else 0177 *max = minimum; /* case 3 */ 0178 } 0179 0180 0181 /** 0182 This function parses a character range like "[^ab1-4]". 0183 */ 0184 RegExp* parseCharClass( const char* match ) 0185 { 0186 TextRangeRegExp* res = new TextRangeRegExp( false ); 0187 QString txt = QString::fromLocal8Bit( match ); 0188 if(txt.length() <= 2) 0189 return res; 0190 txt = txt.mid(1,txt.length()-2); 0191 0192 int i = 0; 0193 QChar ch = txt.at(i++); 0194 QString pendingChar; 0195 QString thisChar; 0196 bool charPending = false; 0197 bool rangePending = false; 0198 bool flushPending = false; 0199 0200 if ( i < txt.length() && ch == QLatin1Char('^') ) { 0201 res->setNegate( true ); 0202 ch = txt.at(i++); 0203 } 0204 0205 do { 0206 // If a character is pending, and the next char is '-' then we are 0207 // possible looking at a range. 0208 if ( i < txt.length() && ch == QLatin1Char('-') && charPending ) { 0209 rangePending = true; 0210 ch = txt.at(i++); 0211 continue; 0212 } 0213 0214 // If we have a pending character, but do not also have a pending 0215 // range, then the pending character was not part of a range, and 0216 // should therefore just be added as a single character. 0217 if ( charPending && !rangePending ) { 0218 res->addCharacter( pendingChar ); 0219 charPending = false; 0220 } 0221 0222 if ( ch == QLatin1Char('\\') ) { 0223 // Handle the cases where an escape character is specified. 0224 ch = txt.at(i++); 0225 0226 if ( ch == QLatin1Char('a') || ch == QLatin1Char('f') || ch == QLatin1Char('n') || ch == QLatin1Char('r') || ch == QLatin1Char('t') || ch == QLatin1Char('v') ) { 0227 // These are just seen as normal characters. 0228 thisChar = QString::fromLocal8Bit("\\") + ch; 0229 } 0230 else if ( ch == QLatin1Char('d') ) { 0231 // The following characters represent character groups. If any of 0232 // these are seen in a range, then the range is ignored, thus [a-\s] 0233 // matches an 'a', a '-', and a space (\s means space). 0234 res->setDigit( true ); 0235 flushPending = true; 0236 } 0237 else if ( ch == QLatin1Char('D') ) { 0238 res->setNonDigit( true ); 0239 flushPending = true; 0240 } 0241 else if ( ch == QLatin1Char('s') ) { 0242 res->setSpace( true ); 0243 flushPending = true; 0244 } 0245 else if ( ch == QLatin1Char('S') ) { 0246 res->setNonSpace( true ); 0247 flushPending = true; 0248 } 0249 else if ( ch == QLatin1Char('w') ) { 0250 res->setWordChar( true ); 0251 flushPending = true; 0252 } 0253 else if ( ch == QLatin1Char('W') ) { 0254 res->setNonWordChar( true ); 0255 flushPending = true; 0256 } 0257 else if ( ch == QLatin1Char('x') || ch == QLatin1Char('X') ) { 0258 // This is a hexidecimal character: \xHHHH 0259 QString str; 0260 for ( int j=0; j<4; j++) { 0261 ch = txt.at(i++); 0262 if ( ch == QLatin1Char('a') || ch == QLatin1Char('A') || ch == QLatin1Char('b') || ch == QLatin1Char('B') || 0263 ch == QLatin1Char('c') || ch == QLatin1Char('C') || ch == QLatin1Char('d') || ch == QLatin1Char('D') || 0264 ch == QLatin1Char('e') || ch == QLatin1Char('E') || ch == QLatin1Char('f') || ch == QLatin1Char('F') || 0265 ch == QLatin1Char('0') || ch == QLatin1Char('1') || ch == QLatin1Char('2') || ch == QLatin1Char('3') || 0266 ch == QLatin1Char('4') || ch == QLatin1Char('5') || ch == QLatin1Char('6') || ch == QLatin1Char('7') || 0267 ch == QLatin1Char('8') || ch == QLatin1Char('9') ) 0268 str += ch; 0269 else 0270 i--; 0271 } 0272 thisChar = QString::fromLocal8Bit("\\x") + str; 0273 } 0274 else if ( ch == QLatin1Char('0') ) { 0275 // This is an octal character 0276 QString str; 0277 for ( int j=0; j<4; j++) { 0278 ch = txt.at(i++); 0279 if ( ch == QLatin1Char('0') || ch == QLatin1Char('1') || ch == QLatin1Char('2') || ch == QLatin1Char('3') 0280 || ch == QLatin1Char('4') || ch == QLatin1Char('5') || ch == QLatin1Char('6') || ch == QLatin1Char('7') ) 0281 str += ch; 0282 else 0283 i--; 0284 } 0285 thisChar = QString::fromLocal8Bit("\\x") + str ; 0286 } 0287 else { 0288 // Anything else escaped just means the character itself. 0289 thisChar = ch; 0290 } 0291 } 0292 else { 0293 // A non escaped character. 0294 thisChar = ch; 0295 } 0296 0297 // The characters \s,\S,\w,\W,\d or \D, can not be part of a range, 0298 // thus if they are meet in what looks like a range, then the 0299 // characters of the range is justed seen as normal non range 0300 // characters. thus [a-\s] matches an 'a', a '-', and a space (\s means 0301 // space). 0302 if ( flushPending ) { 0303 if ( charPending ) 0304 res->addCharacter( pendingChar ); 0305 if ( rangePending ) 0306 res->addCharacter( QString::fromLocal8Bit("-") ); 0307 flushPending = false; 0308 charPending = false; 0309 rangePending = false; 0310 } 0311 else { 0312 if ( rangePending ) { 0313 res->addRange( pendingChar, thisChar ); 0314 charPending = false; 0315 rangePending = false; 0316 } 0317 else { 0318 pendingChar = thisChar; 0319 charPending = true; 0320 } 0321 } 0322 if ( i == txt.length() ) 0323 break; 0324 ch = txt.at(i++); 0325 } 0326 while ( ch != QLatin1Char(']') ); 0327 0328 if ( charPending ) 0329 res->addCharacter( pendingChar ); 0330 if ( rangePending ) 0331 res->addCharacter( QString::fromLocal8Bit("-") ); 0332 0333 return res; 0334 }