Warning, /kdevelop/kdev-ruby/parser/parser.y is written in an unsupported language. File is not indexed.
0001 /* This file is part of KDevelop 0002 * 0003 * This file is based on the file parse.y from the MRI, version 1.9.2-p136. 0004 * So, at this point I must recognize the amazing job ruby developers 0005 * are doing and specially Yukihiro Matsumoto, the Ruby original author 0006 * and the one who signed parse.y. 0007 * 0008 * Copyright (C) 1993-2007 Yukihiro Matsumoto 0009 * Copyright (C) 2010-2015 Miquel Sabaté Solà <mikisabate@gmail.com> 0010 * 0011 * This program is free software: you can redistribute it and/or modify 0012 * it under the terms of the GNU General Public License as published by 0013 * the Free Software Foundation, either version 3 of the License, or 0014 * (at your option) any later version. 0015 * 0016 * This program is distributed in the hope that it will be useful, 0017 * but WITHOUT ANY WARRANTY; without even the implied warranty of 0018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 0019 * GNU General Public License for more details. 0020 * 0021 * You should have received a copy of the GNU General Public License 0022 * along with this program. If not, see <http://www.gnu.org/licenses/>. 0023 */ 0024 0025 0026 %{ 0027 /* make clang happy */ 0028 #ifndef _MSC_VER 0029 extern char *strdup(const char *s); 0030 #endif 0031 0032 /* for alloca */ 0033 #ifdef _WIN32 0034 #include <malloc.h> 0035 #elif !defined(__FreeBSD__) /* alloca() on FreeBSD is in stdlib.h (included later) */ 0036 #include <alloca.h> 0037 #endif 0038 0039 #include <stdio.h> 0040 #include <stdlib.h> 0041 #include <string.h> 0042 0043 #include "node.h" 0044 0045 0046 #define SSIZE 256 0047 #define LSIZE (SSIZE << 2) 0048 0049 0050 /* The state bits, as defined below, have been extracted from the MRI. */ 0051 enum lex_state_bits { 0052 EXPR_BEG_bit, /* ignore newline, +/- is a sign. */ 0053 EXPR_END_bit, /* newline significant, +/- is an operator. */ 0054 EXPR_ENDARG_bit, /* ditto, and unbound braces. */ 0055 EXPR_ENDFN_bit, /* ditto, and unbound braces. */ 0056 EXPR_ARG_bit, /* newline significant, +/- is an operator. */ 0057 EXPR_CMDARG_bit, /* newline significant, +/- is an operator. */ 0058 EXPR_MID_bit, /* newline significant, +/- is an operator. */ 0059 EXPR_FNAME_bit, /* ignore newline, no reserved words. */ 0060 EXPR_DOT_bit, /* right after `.' or `::', no reserved words. */ 0061 EXPR_CLASS_bit, /* immediate after `class', no here document. */ 0062 EXPR_VALUE_bit, /* alike EXPR_BEG but label is disallowed. */ 0063 }; 0064 0065 /* This enum defines the states in which the lexer can be. */ 0066 enum lex_state_e { 0067 #define DEF_EXPR(n) EXPR_##n = (1 << EXPR_##n##_bit) 0068 DEF_EXPR(BEG), 0069 DEF_EXPR(END), 0070 DEF_EXPR(ENDARG), 0071 DEF_EXPR(ENDFN), 0072 DEF_EXPR(ARG), 0073 DEF_EXPR(CMDARG), 0074 DEF_EXPR(MID), 0075 DEF_EXPR(FNAME), 0076 DEF_EXPR(DOT), 0077 DEF_EXPR(CLASS), 0078 DEF_EXPR(VALUE), 0079 EXPR_BEG_ANY = (EXPR_BEG | EXPR_VALUE | EXPR_MID | EXPR_CLASS), 0080 EXPR_ARG_ANY = (EXPR_ARG | EXPR_CMDARG), 0081 EXPR_END_ANY = (EXPR_END | EXPR_ENDARG | EXPR_ENDFN) 0082 }; 0083 0084 /* Helper macros for handling the lexer states. */ 0085 #define IS_lex_state_for(x, ls) ((x) & (ls)) 0086 #define IS_lex_state(ls) IS_lex_state_for(lex_state, (ls)) 0087 0088 /* And now some macros that will help us on some stacks of the parser. */ 0089 #define BITSTACK_PUSH(stack, n) ((stack) = ((stack)<<1)|((n)&1)) 0090 #define BITSTACK_POP(stack) ((stack) = (stack) >> 1) 0091 #define BITSTACK_LEXPOP(stack) ((stack) = ((stack) >> 1) | ((stack) & 1)) 0092 #define BITSTACK_SET_P(stack) ((stack)&1) 0093 0094 #define COND_PUSH(n) BITSTACK_PUSH(parser->cond_stack, (n)) 0095 #define COND_POP() BITSTACK_POP(parser->cond_stack) 0096 #define COND_LEXPOP() BITSTACK_LEXPOP(parser->cond_stack) 0097 #define COND_P() BITSTACK_SET_P(parser->cond_stack) 0098 0099 #define CMDARG_PUSH(n) BITSTACK_PUSH(parser->cmdarg_stack, (n)) 0100 #define CMDARG_POP() BITSTACK_POP(parser->cmdarg_stack) 0101 #define CMDARG_LEXPOP() BITSTACK_LEXPOP(parser->cmdarg_stack) 0102 #define CMDARG_P() BITSTACK_SET_P(parser->cmdarg_stack) 0103 0104 0105 /* 0106 * This structure represents a string/heredoc/regexp/shortcut term. 0107 */ 0108 struct term_t { 0109 int token; 0110 char *word; 0111 int nest; 0112 unsigned char term; 0113 unsigned char paren; 0114 unsigned char can_embed : 1; 0115 unsigned char nestable : 1; 0116 }; 0117 0118 /* 0119 * This structure contains a comment on the code. It basically stores 0120 * the comment itself in a dynamically allocated char pointer, and the 0121 * line where it was found. 0122 */ 0123 struct comment_t { 0124 char *comment; 0125 int line; 0126 }; 0127 0128 /* 0129 * This structure defines all the information that the parser has. 0130 * It contains the AST, flags, stacks, etc. 0131 */ 0132 struct parser_t { 0133 /* Abstract Syntax Tree */ 0134 struct Node *ast; 0135 0136 /* Stack of positions */ 0137 struct pos_t *pos_stack; 0138 int stack_scale; 0139 int pos_size; 0140 0141 /* Flags used by the parser */ 0142 unsigned char eof_reached : 1; 0143 unsigned int cond_stack; 0144 unsigned int cmdarg_stack; 0145 int in_def; 0146 int paren_nest; 0147 int lpar_beg; 0148 int parser_command_start; 0149 enum ruby_version version; 0150 0151 /* Stuff from the lexer */ 0152 enum lex_state_e lex_state; 0153 struct term_t *lex_strterm; 0154 char *lex_p; 0155 char *lex_prev; 0156 char *lex_pend; 0157 unsigned long lex_prevc; 0158 0159 /* Basically used to handle heredocs properly */ 0160 unsigned long line_pend; 0161 unsigned long column_pend; 0162 unsigned char here_found : 1; 0163 0164 /* Errors on the file */ 0165 struct error_t *errors; 0166 struct error_t *last_error; 0167 unsigned char warning : 1; 0168 unsigned char unrecoverable : 1; 0169 0170 /* Stack of names */ 0171 char *stack[2]; 0172 char *aux; 0173 int sp; 0174 0175 /* The last allocated comment + the comment stack */ 0176 struct comment_t last_comment; 0177 char *comment_stack[SSIZE]; 0178 int comment_index; 0179 0180 /* Info about the content to parse */ 0181 unsigned long length; 0182 unsigned long line; 0183 unsigned long column; 0184 unsigned char content_given : 1; 0185 char *blob; 0186 }; 0187 0188 #include "parser_gen.h" 0189 #define yyparse ruby_yyparse 0190 #define YYERROR_VERBOSE 1 0191 0192 /* Macros to access some attributes in a fancier way. */ 0193 #define lex_strterm parser->lex_strterm 0194 #define lex_state parser->lex_state 0195 #define command_start parser->parser_command_start 0196 0197 /* yy's functions */ 0198 #if YYPURE 0199 static int yylex(void *, void *); 0200 #else 0201 static int yylex(void *); 0202 #endif 0203 static void yyerror(struct parser_t *, const char *); 0204 #define yywarning(msg) { parser->warning = 1; yyerror(parser, (msg)); parser->warning = 0;} 0205 0206 /* The static functions below deal with stacks. */ 0207 static void pop_stack(struct parser_t *parser, struct Node *n); 0208 static void push_last_comment(struct parser_t *parser); 0209 static void pop_comment(struct parser_t *parser, struct Node *n); 0210 static void pop_pos(struct parser_t *parser, struct Node *n); 0211 static void pop_start(struct parser_t *parser, struct Node *n); 0212 static void pop_end(struct parser_t *parser, struct Node *n); 0213 0214 /* Helper macros for nodes, positions and stacks */ 0215 #define ALLOC_N(kind, l, r) alloc_node(kind, l, r); pop_pos(parser, yyval.n); 0216 #define DISPOSE2(node1, node2) { free_ast(node1); free_ast(node2); } 0217 #define DISPOSE3(node1, node2, node3) { DISPOSE2(node1, node2); free_ast(node3); } 0218 #define POP_STACK pop_stack(parser, yyval.n) 0219 #define discard_pos() pop_pos(parser, NULL) 0220 #define copy_op(op) { parser->aux = strdup(op); } 0221 %} 0222 0223 %pure-parser 0224 %lex-param {struct parser_t *parser } 0225 %parse-param { struct parser_t *parser } 0226 %union { 0227 struct Node *n; 0228 int num; 0229 struct term_t *term; 0230 } 0231 0232 /* Tokens */ 0233 %token tCLASS tMODULE tDEF tUNDEF tBEGIN tRESCUE tENSURE tEND tIF tUNLESS 0234 %token tTHEN tELSIF tELSE tCASE tWHEN tWHILE tUNTIL tFOR tBREAK tNEXT tREDO 0235 %token tRETRY tIN tDO tDO_COND tDO_BLOCK tRETURN tYIELD tKWAND tKWOR tKWNOT 0236 %token tALIAS tDEFINED upBEGIN upEND tTRUE tFALSE tNIL tENCODING tDSTAR 0237 %token tFILE tLINE tSELF tSUPER GLOBAL BASE CONST tDO_LAMBDA tCHAR tIMAGINARY 0238 %token IVAR CVAR tINTEGER tFLOAT tNTH_REF tBACKTICK tpEND tSYMBEG tRATIONAL 0239 %token tAMPER tAREF tASET tASSOC tCOLON2 tCOLON3 tLAMBDA tLAMBEG tLBRACE 0240 %token tLBRACKET tLPAREN tLPAREN_ARG tSTAR tCOMMENT ARRAY tKEY SYMBOL tUMINUS_NUM 0241 %token tSTRING_BEG tSTRING_CONTENT tSTRING_DBEG tSTRING_DEND tSTRING_END tSTRING_DVAR 0242 0243 /* Types */ 0244 %type <n> singleton strings string literal numeric cpath rescue_arg 0245 %type <n> top_compstmt top_stmt bodystmt compstmt stmts stmt expr arg primary 0246 %type <n> command command_call method_call if_tail opt_else case_body cases 0247 %type <n> opt_rescue exc_list exc_var opt_ensure args call_args opt_call_args 0248 %type <n> paren_args opt_paren_args super aref_args opt_block_arg block_arg 0249 %type <n> mrhs superclass block_call block_command f_block_optarg f_block_opt 0250 %type <n> const f_arglist f_args f_arg f_arg_item f_optarg f_marg f_marg_list 0251 %type <n> f_margs assoc_list assocs assoc undef_list backref for_var bvar base 0252 %type <n> block_param opt_block_param block_param_def f_opt bv_decls label none 0253 %type <n> lambda f_larglist lambda_body command_args opt_bv_decl lhs do_block 0254 %type <n> mlhs mlhs_head mlhs_basic mlhs_item mlhs_node mlhs_post mlhs_inner 0255 %type <n> fsym variable symbol operation operation2 operation3 other_vars 0256 %type <n> cname fname f_rest_arg f_block_arg opt_f_block_arg f_norm_arg 0257 %type <n> brace_block cmd_brace_block f_bad_arg sym opt_brace_block 0258 %type <n> opt_args_tail args_tail f_kwarg block_args_tail opt_block_args_tail 0259 %type <n> f_kw f_block_kw f_block_kwarg f_kwrest simple_numeric 0260 %type <n> string_contents string_content string_dvar 0261 0262 /* When an error has been found, free all the nodes from bison's stacks */ 0263 %destructor { free_ast($$); } <n> 0264 0265 /* precedence table */ 0266 %nonassoc tLOWEST 0267 %nonassoc tLBRACE_ARG 0268 0269 %nonassoc modifier_if modifier_unless modifier_while modifier_until 0270 %left tKWOR tKWAND 0271 %right tKWNOT 0272 %nonassoc tDEFINED 0273 %right '=' tOP_ASGN 0274 %left modifier_rescue 0275 %right '?' ':' 0276 %nonassoc tDOT2 tDOT3 0277 %left tOR 0278 %left tAND 0279 %nonassoc tCMP tEQ tEQQ tNEQ tMATCH tNMATCH 0280 %left '>' tGEQ '<' tLEQ 0281 %left '|' '^' 0282 %left '&' 0283 %left tLSHIFT tRSHIFT 0284 %left '+' '-' 0285 %left '*' '/' '%' 0286 %right tUMINUS_NUM tUMINUS 0287 %right tPOW 0288 %right '!' '~' tUPLUS 0289 0290 %% 0291 0292 top_compstmt: top_stmt { parser->ast = $1; $$ = 0; YYACCEPT; } 0293 | term { $$ = 0; YYACCEPT; } 0294 ; 0295 0296 top_stmt: none 0297 | stmt 0298 | error stmt { $$ = $2; } 0299 ; 0300 0301 bodystmt: 0302 { 0303 $<num>$ = parser->line; 0304 } 0305 { 0306 $<num>$ = parser->column; 0307 } 0308 compstmt opt_rescue opt_else opt_ensure 0309 { 0310 $$ = alloc_ensure(token_body, $3, $4, $5, $6); 0311 pop_end(parser, $$); /* Every bodystmt ends with tEND */ 0312 $$->pos.start_line = $<num>1; 0313 $$->pos.start_col = $<num>2; 0314 } 0315 ; 0316 0317 compstmt: stmts opt_terms { $$ = $1; } 0318 ; 0319 0320 stmts: none 0321 | stmt 0322 | stmts terms stmt { $$ = ($1 == NULL) ? $3 : update_list($1, $3); } 0323 | error stmt { $$ = $2; } 0324 ; 0325 0326 stmt: tALIAS fsym { lex_state = EXPR_FNAME; } fsym 0327 { 0328 $$ = alloc_node(token_alias, $2, $4); 0329 } 0330 | tALIAS GLOBAL GLOBAL 0331 { 0332 /* Ugly as hell, but it works */ 0333 struct Node *l = alloc_node(token_object, NULL, NULL); 0334 l->flags = global; 0335 struct Node *r = alloc_node(token_object, NULL, NULL); 0336 r->flags = global; 0337 pop_pos(parser, r); 0338 pop_pos(parser, l); 0339 pop_stack(parser, l); 0340 pop_stack(parser, r); 0341 $$ = alloc_node(token_alias, l, r); 0342 } 0343 | tALIAS GLOBAL tNTH_REF 0344 { 0345 yyerror(parser, "can't make alias for the number variables"); 0346 $$ = 0; 0347 } 0348 | tUNDEF undef_list 0349 { 0350 $$ = alloc_node(token_undef, NULL, $2);; 0351 } 0352 | stmt modifier_if expr 0353 { 0354 $$ = alloc_cond(token_if, $3, $1, NULL); 0355 } 0356 | stmt modifier_unless expr 0357 { 0358 $$ = alloc_cond(token_unless, $3, $1, NULL); 0359 } 0360 | stmt modifier_while expr 0361 { 0362 $$ = alloc_cond(token_while, $3, $1, NULL); 0363 } 0364 | stmt modifier_until expr 0365 { 0366 $$ = alloc_cond(token_until, $3, $1, NULL); 0367 } 0368 | stmt modifier_rescue stmt 0369 { 0370 $$ = alloc_cond(token_rescue, $3, $1, NULL); 0371 } 0372 | upBEGIN 0373 { 0374 if (parser->in_def) 0375 yyerror(parser, "BEGIN in method"); 0376 } 0377 '{' compstmt '}' 0378 { 0379 $$ = alloc_node(token_up_begin, $4, NULL); 0380 discard_pos(); /* } */ 0381 discard_pos(); /* { */ 0382 } 0383 | upEND '{' compstmt '}' 0384 { 0385 $$ = alloc_node(token_up_end, $3, NULL); 0386 discard_pos(); /* } */ 0387 discard_pos(); /* { */ 0388 } 0389 | lhs '=' command_call { $$ = alloc_node(token_assign, $1, $3); } 0390 | mlhs '=' command_call { $$ = alloc_node(token_assign, $1, $3); } 0391 | variable tOP_ASGN command_call { $$ = alloc_node(token_op_assign, $1, $3); } 0392 | primary '[' opt_call_args rbracket tOP_ASGN command_call 0393 { 0394 struct Node *aux = alloc_node(token_array_value, $1, $3); 0395 $$ = alloc_node(token_op_assign, aux, $6); 0396 } 0397 | primary '.' base tOP_ASGN command_call 0398 { 0399 struct Node *aux = alloc_node(token_object, $1, $3); 0400 $$ = alloc_node(token_op_assign, aux, $5); 0401 } 0402 | primary '.' const tOP_ASGN command_call 0403 { 0404 struct Node *aux = alloc_node(token_object, $1, $3); 0405 $$ = alloc_node(token_op_assign, aux, $5); 0406 } 0407 | primary tCOLON2 const tOP_ASGN command_call 0408 { 0409 yyerror(parser, "constant re-assignment"); 0410 $$ = NULL; 0411 DISPOSE3($1, $3, $5); 0412 } 0413 | primary tCOLON2 base tOP_ASGN command_call 0414 { 0415 struct Node *aux = alloc_node(token_object, $1, $3); 0416 $$ = alloc_node(token_op_assign, aux, $5); 0417 } 0418 | backref tOP_ASGN command_call { $$ = alloc_node(token_op_assign, $1, $3); } 0419 | lhs '=' mrhs { $$ = alloc_node(token_assign, $1, $3); } 0420 | mlhs '=' arg { $$ = alloc_node(token_assign, $1, $3); } 0421 | mlhs '=' mrhs { $$ = alloc_node(token_assign, $1, $3); } 0422 | expr 0423 | tpEND { $$ = alloc_node(token__end__, NULL, NULL); } 0424 ; 0425 0426 expr: command_call 0427 | expr tKWAND expr { $$ = alloc_node(token_kw_and, $1, $3); } 0428 | expr tKWOR expr { $$ = alloc_node(token_kw_or, $1, $3); } 0429 | tKWNOT opt_eol expr { $$ = alloc_node(token_kw_not, $3, NULL); } 0430 | '!' command_call { $$ = alloc_node(token_not, $2, NULL); } 0431 | arg 0432 ; 0433 0434 command_call: command | block_command 0435 ; 0436 0437 block_command: block_call 0438 | block_call '.' operation2 command_args 0439 { 0440 struct Node *aux = update_list($1, $3); 0441 $$ = alloc_node(token_method_call, aux, $4); 0442 } 0443 | block_call tCOLON2 operation2 command_args 0444 { 0445 struct Node *aux = update_list($1, $3); 0446 $$ = alloc_node(token_method_call, aux, $4); 0447 } 0448 ; 0449 0450 cmd_brace_block: tLBRACE_ARG opt_block_param compstmt '}' 0451 { 0452 $$ = ALLOC_N(token_block, $3, $2); 0453 pop_start(parser, $$); 0454 } 0455 ; 0456 0457 command: operation command_args %prec tLOWEST 0458 { 0459 $$ = alloc_node(token_method_call, $1, $2); 0460 } 0461 | operation command_args cmd_brace_block 0462 { 0463 $$ = alloc_cond(token_method_call, $3, $1, $2); 0464 } 0465 | primary '.' operation2 command_args %prec tLOWEST 0466 { 0467 struct Node *aux = update_list($1, $3); 0468 $$ = alloc_node(token_method_call, aux, $4); 0469 } 0470 | primary '.' operation2 command_args cmd_brace_block 0471 { 0472 struct Node *aux = update_list($1, $3); 0473 $$ = alloc_cond(token_method_call, $5, aux, $4); 0474 } 0475 | primary tCOLON2 operation2 command_args %prec tLOWEST 0476 { 0477 struct Node *aux = update_list($1, $3); 0478 $$ = alloc_node(token_method_call, aux, $4); 0479 } 0480 | primary tCOLON2 operation2 command_args cmd_brace_block 0481 { 0482 struct Node *aux = update_list($1, $3); 0483 $$ = alloc_cond(token_method_call, $5, aux, $4); 0484 } 0485 | tSUPER call_args { $$ = alloc_node(token_method_call, $2, NULL); } 0486 | tYIELD call_args { $$ = alloc_node(token_yield, $2, NULL); } 0487 | tRETURN call_args { $$ = alloc_node(token_return, $2, NULL); } 0488 | tBREAK call_args { $$ = alloc_node(token_break, $2, NULL); } 0489 | tNEXT call_args { $$ = alloc_node(token_next, $2, NULL); } 0490 ; 0491 0492 mlhs: mlhs_basic 0493 | tLPAREN mlhs_inner rparen { $$ = $2; } 0494 ; 0495 0496 mlhs_inner: mlhs_basic 0497 | tLPAREN mlhs_inner rparen { $$ = $2; } 0498 ; 0499 0500 mlhs_basic: mlhs_head 0501 | mlhs_head mlhs_item { $$ = update_list($1, $2); } 0502 | mlhs_head tSTAR mlhs_node 0503 { 0504 $3->flags = kwrest; 0505 $$ = update_list($1, $3); 0506 } 0507 | mlhs_head tSTAR mlhs_node ',' mlhs_post 0508 { 0509 $3->flags = kwrest; 0510 $$ = concat_list($1, update_list($3, $5)); 0511 } 0512 | mlhs_head tSTAR 0513 { 0514 $$ = alloc_node(token_object, NULL, NULL); 0515 $$->flags = star; 0516 $$ = update_list($1, $$); 0517 } 0518 | mlhs_head tSTAR ',' mlhs_post 0519 { 0520 $$ = alloc_node(token_object, NULL, NULL); 0521 $$->flags = star; 0522 $$ = update_list($1, $$); 0523 $$ = concat_list($$, $4); 0524 } 0525 | tSTAR mlhs_node { $$ = $2; $$->flags = kwrest; } 0526 | tSTAR mlhs_node ',' mlhs_post { $$ = update_list($2, $4); $2->flags = kwrest; } 0527 | tSTAR 0528 { 0529 $$ = alloc_node(token_object, NULL, NULL); 0530 $$->flags = star; 0531 } 0532 | tSTAR ',' mlhs_post 0533 { 0534 $$ = alloc_node(token_object, NULL, NULL); 0535 $$->flags = star; 0536 $$ = update_list($$, $3); 0537 } 0538 ; 0539 0540 mlhs_item: mlhs_node 0541 | tLPAREN mlhs_inner rparen { $$ = alloc_node(token_object, $2, NULL); } 0542 ; 0543 0544 mlhs_head: mlhs_item ',' { $$ = $1; } 0545 | mlhs_head mlhs_item ',' { $$ = update_list($1, $2); } 0546 ; 0547 0548 mlhs_post: mlhs_item { $$ = $1; } 0549 | mlhs_post ',' mlhs_item { $$ = update_list($1, $3); } 0550 ; 0551 0552 mlhs_node: variable 0553 | primary '[' opt_call_args rbracket 0554 { 0555 $$ = alloc_node(token_array_value, $1, $3); 0556 } 0557 | primary '.' base { $$ = alloc_node(token_method_call, $1, $3); } 0558 | primary tCOLON2 base { $$ = alloc_node(token_method_call, $1, $3); } 0559 | primary '.' const { $$ = alloc_node(token_method_call, $1, $3); } 0560 | primary tCOLON2 const 0561 { 0562 if (parser->in_def) 0563 yyerror(parser, "dynamic constant assignment"); 0564 $$ = alloc_node(token_method_call, $1, $3); 0565 } 0566 | tCOLON3 const 0567 { 0568 if (parser->in_def) 0569 yyerror(parser, "dynamic constant assignment"); 0570 $$ = $2; 0571 } 0572 | backref 0573 ; 0574 0575 lhs: variable 0576 | primary '[' opt_call_args rbracket 0577 { 0578 $$ = alloc_node(token_array_value, $1, $3); 0579 } 0580 | primary '.' base { $$ = alloc_node(token_method_call, $1, $3); } 0581 | primary tCOLON2 base { $$ = alloc_node(token_method_call, $1, $3); } 0582 | primary '.' const { $$ = alloc_node(token_method_call, $1, $3); } 0583 | primary tCOLON2 const 0584 { 0585 if (parser->in_def) 0586 yyerror(parser, "dynamic constant assignment"); 0587 $$ = alloc_node(token_method_call, $1, $3); 0588 } 0589 | tCOLON3 const 0590 { 0591 if (parser->in_def) 0592 yyerror(parser, "dynamic constant assignment"); 0593 $$ = $2; 0594 } 0595 ; 0596 0597 cname: BASE 0598 { 0599 yyerror(parser, "class/module name must be CONSTANT"); 0600 $$ = 0; 0601 } 0602 | const 0603 ; 0604 0605 cpath: tCOLON3 cname { $$ = $2; } 0606 | cname { $$ = $1; } 0607 | primary tCOLON2 cname { $$ = update_list($1, $3); } 0608 ; 0609 0610 fname: base 0611 | const 0612 | op 0613 { 0614 lex_state = EXPR_ENDFN; 0615 $$ = alloc_node(token_object, NULL, NULL); 0616 $$->name = parser->aux; 0617 $$->pos.start_line = $$->pos.end_line = parser->line; 0618 $$->pos.end_col = parser->column; 0619 $$->pos.start_col = $$->pos.end_col - strlen(parser->aux); 0620 } 0621 | reswords 0622 { 0623 lex_state = EXPR_ENDFN; 0624 $$ = alloc_node(token_object, NULL, NULL); 0625 } 0626 ; 0627 0628 fsym: fname | symbol 0629 ; 0630 0631 undef_list: fsym 0632 | undef_list ',' { lex_state = EXPR_FNAME; } fsym { $$ = update_list($1, $4); } 0633 ; 0634 0635 op: '|' { copy_op("|"); } | '^' { copy_op("^"); } | '&' { copy_op("&"); } 0636 | tCMP { copy_op("<=>"); } | tEQ { copy_op("=="); } | tEQQ { copy_op("===");} 0637 | tMATCH { copy_op("=~"); } | tNMATCH {copy_op("!~");} | '>' { copy_op(">");} 0638 | tGEQ { copy_op(">="); } | '<' { copy_op("<"); } | tLEQ { copy_op("<="); } 0639 | tNEQ {copy_op("!=");} | tLSHIFT {copy_op("<<");} | tRSHIFT {copy_op(">>");} 0640 | '+' { copy_op("+"); } | '-' { copy_op("-"); } | '*' { copy_op("*"); } 0641 | tSTAR { copy_op("*"); } | '/' { copy_op("/"); } | '%' { copy_op("%"); } 0642 | tPOW { copy_op("**"); } | tAREF { copy_op("[]"); } | '`' { copy_op("`");} 0643 | tUPLUS { copy_op("+"); } | tASET { copy_op("[]="); } 0644 | tUMINUS { copy_op("-"); } | tDSTAR { copy_op("**"); } 0645 | '!' { copy_op("!"); } | '~' { copy_op("~"); } 0646 ; 0647 0648 reswords: tLINE | tFILE | tENCODING | upBEGIN | upEND | tALIAS | tKWAND 0649 | tBEGIN | tBREAK | tCASE | tCLASS | tDEF | tDEFINED | tDO | tELSE | tELSIF 0650 | tEND | tENSURE | tFALSE | tFOR | tIN | tMODULE | tNEXT | tNIL | tKWNOT 0651 | tKWOR | tREDO | tRESCUE | tRETRY | tRETURN | tSELF | tSUPER | tTHEN | tTRUE 0652 | tUNDEF | tWHEN | tYIELD | tIF | tUNLESS | tWHILE | tUNTIL 0653 ; 0654 0655 arg: lhs '=' arg { $$ = alloc_node(token_assign, $1, $3); } 0656 | lhs '=' arg modifier_rescue arg 0657 { 0658 struct Node *aux = alloc_cond(token_rescue, $5, $3, NULL); 0659 $$ = alloc_node(token_assign, $1, aux); 0660 } 0661 | variable tOP_ASGN arg { $$ = alloc_node(token_op_assign, $1, $3); } 0662 | variable tOP_ASGN arg modifier_rescue arg 0663 { 0664 struct Node *aux = alloc_cond(token_rescue, $5, $3, NULL); 0665 $$ = alloc_node(token_op_assign, $1, aux); 0666 } 0667 | primary '[' opt_call_args rbracket tOP_ASGN arg 0668 { 0669 struct Node *aux = alloc_node(token_array_value, $1, $3); 0670 $$ = alloc_node(token_op_assign, aux, $6); 0671 } 0672 | primary '.' base tOP_ASGN arg 0673 { 0674 struct Node *aux = alloc_node(token_object, $1, $3); 0675 $$ = alloc_node(token_op_assign, aux, $5); 0676 } 0677 | primary '.' const tOP_ASGN arg 0678 { 0679 struct Node *aux = alloc_node(token_object, $1, $3); 0680 $$ = alloc_node(token_op_assign, aux, $5); 0681 } 0682 | primary tCOLON2 base tOP_ASGN arg 0683 { 0684 struct Node *aux = alloc_node(token_object, $1, $3); 0685 $$ = alloc_node(token_op_assign, aux, $5); 0686 } 0687 | primary tCOLON2 const tOP_ASGN arg 0688 { 0689 yyerror(parser, "constant re-assignment"); 0690 $$ = NULL; 0691 DISPOSE3($1, $3, $5); 0692 } 0693 | tCOLON3 const tOP_ASGN arg 0694 { 0695 yyerror(parser, "constant re-assignment"); 0696 $$ = NULL; 0697 DISPOSE2($2, $4); 0698 } 0699 | backref tOP_ASGN arg { $$ = alloc_node(token_assign, $1, $3); } 0700 | arg tDOT2 arg { $$ = alloc_node(token_dot2, $1, $3); } 0701 | arg tDOT3 arg { $$ = alloc_node(token_dot3, $1, $3);} 0702 | arg '+' arg { $$ = alloc_node(token_plus, $1, $3); } 0703 | arg '-' arg { $$ = alloc_node(token_minus, $1, $3);} 0704 | arg '*' arg { $$ = alloc_node(token_mul, $1, $3);} 0705 | arg '/' arg { $$ = alloc_node(token_div, $1, $3);} 0706 | arg '%' arg { $$ = alloc_node(token_mod, $1, $3);} 0707 | arg tPOW arg { $$ = alloc_node(token_pow, $1, $3);} 0708 | tUMINUS_NUM simple_numeric tPOW arg 0709 { 0710 struct Node *aux = alloc_node(token_pow, $2, $4); 0711 $$ = alloc_node(token_unary_minus, aux, NULL); 0712 } 0713 | tUPLUS arg { $$ = alloc_node(token_unary_plus, $2, NULL); } 0714 | tUMINUS arg { $$ = alloc_node(token_unary_minus, $2, NULL); } 0715 | arg '|' arg { $$ = alloc_node(token_bit_or, $1, $3); } 0716 | arg '^' arg { $$ = alloc_node(token_bit_xor, $1, $3); } 0717 | arg '&' arg { $$ = alloc_node(token_bit_and, $1, $3); } 0718 | arg tCMP arg { $$ = alloc_node(token_cmp, $1, $3); } 0719 | arg '>' arg { $$ = alloc_node(token_greater, $1, $3); } 0720 | arg tGEQ arg { $$ = alloc_node(token_geq, $1, $3); } 0721 | arg '<' arg { $$ = alloc_node(token_lesser, $1, $3); } 0722 | arg tLEQ arg { $$ = alloc_node(token_leq, $1, $3); } 0723 | arg tEQ arg { $$ = alloc_node(token_eq, $1, $3); } 0724 | arg tEQQ arg { $$ = alloc_node(token_eqq, $1, $3); } 0725 | arg tNEQ arg { $$ = alloc_node(token_neq, $1, $3); } 0726 | arg tMATCH arg { $$ = alloc_node(token_match, $1, $3); } 0727 | arg tNMATCH arg { $$ = alloc_node(token_nmatch, $1, $3); } 0728 | '!' arg { $$ = alloc_node(token_not, $2, NULL); } 0729 | '~' arg { $$ = alloc_node(token_neg, $2, NULL); } 0730 | arg tLSHIFT arg { $$ = alloc_node(token_lshift, $1, $3); } 0731 | arg tRSHIFT arg { $$ = alloc_node(token_rshift, $1, $3); } 0732 | arg tAND arg { $$ = alloc_node(token_and, $1, $3); } 0733 | arg tOR arg { $$ = alloc_node(token_or, $1, $3); } 0734 | tDEFINED opt_eol arg { $$ = alloc_node(token_defined, $3, NULL); } 0735 | arg '?' arg opt_eol ':' arg 0736 { 0737 $$ = alloc_cond(token_ternary, $1, $3, $6); 0738 } 0739 | primary 0740 ; 0741 0742 aref_args: none 0743 | args trailer { $$ = $1; } 0744 | args ',' assocs trailer { $$ = update_list($1, $3); } 0745 | assocs trailer { $$ = $1; } 0746 ; 0747 0748 paren_args: '(' opt_call_args rparen { $$ = $2; } 0749 ; 0750 0751 opt_paren_args : none | paren_args 0752 ; 0753 0754 opt_call_args: none | call_args 0755 ; 0756 0757 call_args: command 0758 | args opt_block_arg { $$ = update_list($1, $2); } 0759 | assocs opt_block_arg 0760 { 0761 struct Node *aux = alloc_node(token_hash, $1, NULL); 0762 $$ = update_list(aux, $2); 0763 } 0764 | args ',' assocs opt_block_arg 0765 { 0766 struct Node *aux = alloc_node(token_hash, $3, NULL); 0767 struct Node *n = update_list(aux, $4); 0768 $$ = concat_list($1, n); 0769 } 0770 | block_arg 0771 ; 0772 0773 command_args: 0774 { 0775 $<num>$ = parser->cmdarg_stack; 0776 CMDARG_PUSH(1); 0777 } call_args 0778 { 0779 parser->cmdarg_stack = $<num>$; 0780 $$ = $2; 0781 } 0782 ; 0783 0784 block_arg: tAMPER arg { $$ = $2; } 0785 ; 0786 0787 opt_block_arg: ',' block_arg { $$ = $2; } 0788 | ',' { $$ = NULL; } 0789 | none 0790 ; 0791 0792 args: arg 0793 | tSTAR arg { $$ = $2; } 0794 | args ',' arg { $$ = update_list($1, $3); } 0795 | args ',' tSTAR arg { $$ = update_list($1, $4); } 0796 ; 0797 0798 mrhs: args ',' arg { $$ = update_list($1, $3); } 0799 | args ',' tSTAR arg { $$ = update_list($1, $4); } 0800 | tSTAR arg { $$ = $2; } 0801 ; 0802 0803 primary: literal 0804 | strings 0805 | variable 0806 | backref 0807 | tBEGIN bodystmt tEND { $$ = alloc_node(token_begin, $2, NULL); } 0808 | tLPAREN_ARG expr { lex_state = EXPR_ENDARG; } rparen { $$ = $2; } 0809 | tLPAREN compstmt ')' { $$ = $2; } 0810 | primary tCOLON2 const 0811 { 0812 struct Node *aux = update_list($1, $3); 0813 $$ = alloc_node(token_method_call, aux, NULL); 0814 } 0815 | tCOLON3 const { $$ = $2; } 0816 | ARRAY { $$ = alloc_node(token_array, NULL, NULL); } 0817 | tLBRACKET aref_args ']' { $$ = alloc_node(token_array, $2, NULL); } 0818 | tLBRACE assoc_list '}' 0819 { 0820 $$ = alloc_node(token_hash, $2, NULL); 0821 discard_pos(); 0822 } 0823 | tRETURN { $$ = alloc_node(token_return, NULL, NULL); } 0824 | tYIELD '(' call_args rparen { $$ = alloc_node(token_yield, $3, NULL); } 0825 | tYIELD '(' rparen { $$ = alloc_node(token_yield, NULL, NULL); } 0826 | tYIELD { $$ = alloc_node(token_yield, NULL, NULL); } 0827 | tDEFINED opt_eol '(' expr rparen 0828 { 0829 $$ = alloc_node(token_defined, $4, NULL); 0830 } 0831 | tKWNOT '(' expr rparen { $$ = alloc_node(token_kw_not, $3, NULL); } 0832 | tKWNOT '(' rparen { $$ = alloc_node(token_kw_not, NULL, NULL); } 0833 | operation brace_block { $$ = alloc_cond(token_method_call, $2, $1, NULL); } 0834 | method_call opt_brace_block 0835 { 0836 $$ = $1; 0837 $$->cond = $2; 0838 } 0839 | tLAMBDA lambda { $$ = alloc_cond(token_method_call, $2, NULL, NULL); } 0840 | tIF expr then compstmt if_tail tEND 0841 { 0842 $$ = alloc_cond(token_if, $2, $4, $5); 0843 discard_pos(); /* tEND */ 0844 } 0845 | tUNLESS expr then compstmt opt_else tEND 0846 { 0847 $$ = alloc_cond(token_unless, $2, $4, $5); 0848 discard_pos(); /* tEND */ 0849 } 0850 | tWHILE { COND_PUSH(1); } expr do { COND_POP(); } compstmt tEND 0851 { 0852 $$ = alloc_cond(token_while, $3, $6, NULL); 0853 discard_pos(); /* tEND */ 0854 } 0855 | tUNTIL { COND_PUSH(1); } expr do { COND_POP(); } compstmt tEND 0856 { 0857 $$ = alloc_cond(token_while, $3, $6, NULL); 0858 discard_pos(); /* tEND */ 0859 } 0860 | tCASE expr opt_terms case_body tEND 0861 { 0862 $$ = alloc_cond(token_case, $2, $4, NULL); 0863 discard_pos(); /* tEND */ 0864 } 0865 | tCASE opt_terms case_body tEND 0866 { 0867 $$ = alloc_node(token_case, $3, NULL); 0868 discard_pos(); /* tEND */ 0869 } 0870 | tFOR for_var tIN { COND_PUSH(1); } expr do { COND_POP(); } compstmt tEND 0871 { 0872 $$ = alloc_cond(token_for, $5, $8, $2); 0873 discard_pos(); /* tEND */ 0874 } 0875 | tCLASS cpath superclass 0876 { 0877 if (parser->in_def) 0878 yyerror(parser, "class definition in method body"); 0879 } 0880 bodystmt tEND 0881 { 0882 $$ = alloc_cond(token_class, $3, $5, $2); 0883 pop_comment(parser, $$); 0884 } 0885 | tCLASS opt_terms tLSHIFT expr term bodystmt tEND 0886 { 0887 $$ = alloc_node(token_singleton_class, $6, $4); 0888 pop_comment(parser, $$); 0889 } 0890 | tMODULE cpath 0891 { 0892 if (parser->in_def) 0893 yyerror(parser, "module definition in method body"); 0894 } 0895 bodystmt tEND 0896 { 0897 $$ = alloc_node(token_module, $4, $2); 0898 pop_comment(parser, $$); 0899 } 0900 | tDEF fname 0901 { 0902 parser->in_def++; 0903 } 0904 f_arglist bodystmt tEND 0905 { 0906 parser->in_def--; 0907 $$ = alloc_cond(token_function, $2, $5, $4); 0908 pop_comment(parser, $$); 0909 } 0910 | tDEF singleton dot_or_colon { lex_state = EXPR_FNAME; } fname 0911 { 0912 lex_state = EXPR_ENDFN; 0913 parser->in_def++; 0914 } 0915 f_arglist bodystmt tEND 0916 { 0917 $$ = alloc_node(token_object, $2, $5); 0918 $$ = alloc_cond(token_function, $$, $8, $7); 0919 $$->flags = 1; /* Class method */ 0920 pop_comment(parser, $$); 0921 parser->in_def--; 0922 } 0923 | tBREAK { $$ = alloc_node(token_break, NULL, NULL); } 0924 | tNEXT { $$ = alloc_node(token_next, NULL, NULL); } 0925 | tREDO { $$ = alloc_node(token_redo, NULL, NULL); } 0926 | tRETRY { $$ = alloc_node(token_retry, NULL, NULL); } 0927 ; 0928 0929 then: term 0930 | tTHEN 0931 | term tTHEN 0932 ; 0933 0934 do: term | tDO_COND 0935 ; 0936 0937 if_tail: opt_else 0938 | tELSIF expr then compstmt if_tail 0939 { 0940 $$ = alloc_cond(token_if, $2, $4, $5); 0941 } 0942 ; 0943 0944 opt_else: none 0945 | tELSE compstmt { $$ = alloc_node(token_if, $2, NULL); } 0946 ; 0947 0948 for_var: lhs | mlhs 0949 ; 0950 0951 f_marg: f_norm_arg { $$ = $1; } 0952 | tLPAREN f_margs rparen { $$ = $2; } 0953 ; 0954 0955 f_marg_list: f_marg 0956 | f_marg_list ',' f_marg { $$ = update_list($1, $3); } 0957 ; 0958 0959 f_margs: f_marg_list { $$ = $1; } 0960 | f_marg_list ',' tSTAR f_norm_arg { $$ = update_list($1, $4); } 0961 | f_marg_list ',' tSTAR f_norm_arg ',' f_marg_list 0962 { 0963 $$ = concat_list($1, update_list($4, $6)); 0964 } 0965 | f_marg_list ',' tSTAR 0966 { 0967 struct Node *n = alloc_node(token_object, NULL, NULL); 0968 $$ = update_list($1, n); 0969 } 0970 | f_marg_list ',' tSTAR ',' f_marg_list 0971 { 0972 struct Node *n = alloc_node(token_object, NULL, NULL); 0973 $$ = concat_list($1, update_list(n, $5)); 0974 } 0975 | tSTAR f_norm_arg { $$ = $2; } 0976 | tSTAR f_norm_arg ',' f_marg_list { $$ = update_list($2, $4); } 0977 | tSTAR { $$ = alloc_node(token_object, NULL, NULL); } 0978 | tSTAR ',' f_marg_list 0979 { 0980 struct Node *n = alloc_node(token_object, NULL, NULL); 0981 $$ = update_list(n, $3); 0982 } 0983 ; 0984 0985 block_args_tail: f_block_kwarg ',' f_kwrest opt_f_block_arg 0986 { 0987 $$ = concat_list($1, update_list($3, $4)); 0988 } 0989 | f_block_kwarg opt_f_block_arg 0990 { 0991 $$ = update_list($1, $2); 0992 } 0993 | f_kwrest opt_f_block_arg 0994 { 0995 $$ = update_list($1, $2); 0996 } 0997 | f_block_arg { $$ = $1; } 0998 ; 0999 1000 opt_block_args_tail: ',' block_args_tail { $$ = $2; } 1001 | /* none */ { $$ = 0; } 1002 ; 1003 1004 block_param: f_arg ',' f_block_optarg ',' f_rest_arg opt_block_args_tail 1005 { 1006 $$ = concat_list($1, concat_list($3, update_list($5, $6))); 1007 } 1008 | f_arg ',' f_block_optarg ',' f_rest_arg ',' f_arg opt_block_args_tail 1009 { 1010 $$ = concat_list($1, concat_list($3, create_list($5, update_list($7, $8)))); 1011 } 1012 | f_arg ',' f_block_optarg opt_block_args_tail 1013 { 1014 $$ = concat_list($1, update_list($3, $4)); 1015 } 1016 | f_arg ',' f_block_optarg ',' f_arg opt_block_args_tail 1017 { 1018 $$ = concat_list($1, concat_list($3, update_list($5, $6))); 1019 } 1020 | f_arg ',' f_rest_arg opt_block_args_tail 1021 { 1022 $$ = update_list($1, update_list($3, $4)); 1023 } 1024 | f_arg ',' { $$ = $1; } 1025 | f_arg ',' f_rest_arg ',' f_arg opt_block_args_tail 1026 { 1027 $$ = concat_list($1, concat_list($3, update_list($5, $6))); 1028 } 1029 | f_arg opt_block_args_tail { $$ = update_list($1, $2); } 1030 | f_block_optarg ',' f_rest_arg opt_block_args_tail 1031 { 1032 $$ = concat_list($1, update_list($3, $4)); 1033 } 1034 | f_block_optarg ',' f_rest_arg ',' f_arg opt_block_args_tail 1035 { 1036 $$ = concat_list($1, create_list($3, update_list($5, $6))); 1037 } 1038 | f_block_optarg opt_block_args_tail { $$ = update_list($1, $2); } 1039 | f_block_optarg ',' f_arg opt_block_args_tail 1040 { 1041 $$ = concat_list($1, update_list($3, $4)); 1042 } 1043 | f_rest_arg opt_block_args_tail { $$ = update_list($1, $2); } 1044 | f_rest_arg ',' f_arg opt_block_args_tail 1045 { 1046 $$ = create_list($1, update_list($3, $4)); 1047 } 1048 | block_args_tail 1049 ; 1050 1051 opt_block_param: none 1052 | block_param_def 1053 { 1054 command_start = 1; 1055 $$ = $1; 1056 } 1057 ; 1058 1059 block_param_def : '|' opt_bv_decl '|' { $$ = $2; } 1060 | tOR { $$ = NULL; } 1061 | '|' block_param opt_bv_decl '|' { $$ = update_list($2, $3); } 1062 ; 1063 1064 opt_bv_decl: none 1065 | ';' bv_decls 1066 { 1067 if (parser->version < ruby19) { 1068 yywarning("Block local variables are only available in Ruby 1.9.x or higher."); 1069 } 1070 $$ = $2; 1071 } 1072 ; 1073 1074 bv_decls: bvar 1075 | bv_decls ',' bvar { $$ = update_list($1, $3); } 1076 ; 1077 1078 bvar: base 1079 | f_bad_arg 1080 { 1081 $$ = NULL; 1082 free_ast($1); 1083 } 1084 ; 1085 1086 lambda: 1087 { 1088 $<num>$ = parser->lpar_beg; 1089 parser->lpar_beg = ++parser->paren_nest; 1090 } 1091 f_larglist lambda_body 1092 { 1093 parser->lpar_beg = $<num>1; 1094 $$ = alloc_node(token_block, $3, $2); 1095 } 1096 ; 1097 1098 f_larglist: '(' f_args opt_bv_decl rparen { $$ = update_list($2, $3); } 1099 | f_args 1100 ; 1101 1102 lambda_body: tLAMBEG compstmt '}' 1103 { 1104 $$ = $2; 1105 discard_pos(); /* } */ 1106 discard_pos(); /* { */ 1107 } 1108 | tDO_LAMBDA compstmt tEND 1109 { 1110 $$ = $2; 1111 discard_pos(); /* end */ 1112 } 1113 ; 1114 1115 do_block: tDO_BLOCK opt_block_param compstmt tEND 1116 { 1117 $$ = ALLOC_N(token_block, $3, $2); 1118 pop_start(parser, $$); 1119 } 1120 ; 1121 1122 block_call: command do_block { $1->cond = $2; $$ = $1; } 1123 | block_call '.' operation2 opt_paren_args 1124 { 1125 struct Node *aux = update_list($1, $3); 1126 $$ = update_list(aux, $4); 1127 } 1128 | block_call tCOLON2 operation2 opt_paren_args 1129 { 1130 struct Node *aux = update_list($1, $3); 1131 $$ = update_list(aux, $4); 1132 } 1133 ; 1134 1135 method_call: operation paren_args 1136 { 1137 $$ = alloc_node(token_method_call, $1, $2); 1138 } 1139 | primary '.' operation2 opt_paren_args 1140 { 1141 struct Node *aux = update_list($1, $3); 1142 $$ = alloc_node(token_method_call, aux, $4); 1143 } 1144 | primary tCOLON2 operation2 paren_args 1145 { 1146 struct Node *aux = update_list($1, $3); 1147 $$ = alloc_node(token_method_call, aux, $4); 1148 } 1149 | primary tCOLON2 operation3 1150 { 1151 struct Node *aux = update_list($1, $3); 1152 $$ = alloc_node(token_method_call, aux, NULL); 1153 } 1154 | primary '.' paren_args 1155 { 1156 $$ = alloc_node(token_method_call, $1, $3); 1157 } 1158 | primary tCOLON2 paren_args 1159 { 1160 $$ = alloc_node(token_method_call, $1, $3); 1161 } 1162 | super paren_args { $$ = $1; $$->r = $2; } 1163 | super 1164 | primary '[' opt_call_args rbracket 1165 { 1166 $$ = alloc_node(token_array_value, $1, $3); 1167 } 1168 ; 1169 1170 opt_brace_block: none 1171 | brace_block 1172 ; 1173 1174 brace_block: '{' opt_block_param compstmt '}' 1175 { 1176 $$ = ALLOC_N(token_block, $3, $2); 1177 pop_start(parser, $$); 1178 } 1179 | tDO opt_block_param compstmt tEND 1180 { 1181 $$ = ALLOC_N(token_block, $3, $2); 1182 pop_start(parser, $$); 1183 } 1184 ; 1185 1186 case_body: tWHEN args then compstmt cases 1187 { 1188 $$ = alloc_cond(token_when, $2, $4, $5); 1189 } 1190 ; 1191 1192 cases: opt_else | case_body 1193 ; 1194 1195 opt_rescue: tRESCUE rescue_arg then compstmt opt_rescue 1196 { 1197 $$ = alloc_node(token_rescue, $2, $4); 1198 $$->ensure = $5; 1199 } 1200 | none 1201 ; 1202 1203 rescue_arg: exc_list exc_var 1204 { 1205 $$ = ($1 || $2) ? alloc_node(token_rescue_arg, $1, $2) : NULL; 1206 } 1207 ; 1208 1209 exc_list: arg | mrhs | none 1210 ; 1211 1212 exc_var: none | tASSOC lhs { $$ = $2; } 1213 ; 1214 1215 opt_ensure: none 1216 | tENSURE compstmt { $$ = alloc_node(token_ensure, $2, NULL); } 1217 ; 1218 1219 literal: numeric | symbol 1220 ; 1221 1222 strings: string { $$ = $1; } 1223 | strings string { $$ = update_list($1, $2); } 1224 ; 1225 1226 string: tCHAR 1227 { 1228 $$ = alloc_node(token_string, NULL, NULL); 1229 } 1230 | tSTRING_BEG string_contents tSTRING_END 1231 { 1232 $$ = alloc_node(lex_strterm->token, $2, NULL); 1233 if (lex_strterm->word) { 1234 free(lex_strterm->word); 1235 lex_strterm->word = NULL; 1236 } 1237 free(lex_strterm); 1238 lex_strterm = NULL; 1239 } 1240 ; 1241 1242 string_contents: /* none */ { $$ = 0; } 1243 | string_contents string_content 1244 { 1245 if ($1 != NULL) 1246 $$ = update_list($1, $2); 1247 else 1248 $$ = $2; 1249 } 1250 ; 1251 1252 string_content: tSTRING_CONTENT { $$ = 0; } 1253 | tSTRING_DBEG 1254 { 1255 lex_state = EXPR_BEG; 1256 $<num>$ = parser->cond_stack; 1257 } 1258 { 1259 $<term>$ = lex_strterm; 1260 lex_strterm = NULL; 1261 } 1262 compstmt '}' 1263 { 1264 parser->cond_stack = $<num>2; 1265 lex_strterm = $<term>3; 1266 $$ = $4; 1267 discard_pos(); /* } */ 1268 } 1269 | tSTRING_DVAR 1270 { 1271 $<term>$ = lex_strterm; 1272 lex_strterm = NULL; 1273 lex_state = EXPR_BEG; 1274 } 1275 string_dvar 1276 { 1277 lex_strterm = $<term>2; 1278 $$ = $3; 1279 } 1280 ; 1281 1282 string_dvar: backref 1283 | GLOBAL { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = global; POP_STACK; } 1284 | IVAR { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = ivar; POP_STACK; } 1285 | CVAR { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = cvar; POP_STACK; } 1286 ; 1287 1288 symbol: tSYMBEG sym 1289 { 1290 $$ = $2; 1291 $$->kind = token_symbol; 1292 $$->pos.start_col--; 1293 } 1294 ; 1295 1296 sym: fname 1297 | strings 1298 | GLOBAL { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = global; POP_STACK; } 1299 | IVAR { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = ivar; POP_STACK; } 1300 | CVAR { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = cvar; POP_STACK; } 1301 ; 1302 1303 numeric: simple_numeric 1304 | tUMINUS_NUM simple_numeric %prec tLOWEST 1305 { 1306 $$ = alloc_node(token_unary_minus, $2, NULL); 1307 } 1308 ; 1309 1310 simple_numeric: tINTEGER { $$ = alloc_node(token_numeric, NULL, NULL); $$->flags = int_l; } 1311 | tFLOAT { $$ = alloc_node(token_numeric, NULL, NULL); $$->flags = float_l; } 1312 | tRATIONAL 1313 { 1314 if (parser->version < ruby21) { 1315 yywarning("Rational literals are only available in Ruby 2.1.x or higher."); 1316 } 1317 $$ = alloc_node(token_numeric, NULL, NULL); $$->flags = rational_l; 1318 } 1319 | tIMAGINARY 1320 { 1321 if (parser->version < ruby21) { 1322 yywarning("Imaginary literals are only available in Ruby 2.1.x or higher."); 1323 } 1324 $$ = alloc_node(token_numeric, NULL, NULL); $$->flags = imaginary_l; 1325 } 1326 ; 1327 1328 variable: base 1329 | GLOBAL { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = global; POP_STACK; } 1330 | IVAR { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = ivar; POP_STACK; } 1331 | CVAR { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = cvar; POP_STACK; } 1332 | const 1333 | other_vars 1334 ; 1335 1336 other_vars: tNIL { $$ = alloc_node(token_nil, NULL, NULL); } 1337 | tSELF { $$ = alloc_node(token_self, NULL, NULL); } 1338 | tTRUE { $$ = alloc_node(token_true, NULL, NULL); } 1339 | tFALSE { $$ = alloc_node(token_false, NULL, NULL); } 1340 | tFILE { $$ = alloc_node(token_file, NULL, NULL); } 1341 | tLINE { $$ = alloc_node(token_line, NULL, NULL); } 1342 | tENCODING { $$ = alloc_node(token_encoding, NULL, NULL); } 1343 ; 1344 1345 backref: tNTH_REF { $$ = ALLOC_N(token_object, NULL, NULL); POP_STACK; } 1346 ; 1347 1348 superclass: term { $$ = NULL; } 1349 | '<' 1350 { 1351 lex_state = EXPR_BEG; 1352 command_start = 1; 1353 } 1354 expr term 1355 { 1356 $$ = $3; 1357 } 1358 | error term { yyerrok; $$ = NULL; } 1359 ; 1360 1361 f_arglist: '(' f_args rparen 1362 { 1363 $$ = $2; 1364 lex_state = EXPR_BEG; 1365 command_start = 1; 1366 } 1367 | f_args term 1368 { 1369 $$ = $1; 1370 lex_state = EXPR_BEG; 1371 command_start = 1; 1372 } 1373 ; 1374 1375 args_tail: f_kwarg ',' f_kwrest opt_f_block_arg 1376 { 1377 if (parser->version < ruby20) { 1378 yywarning("Keyword arguments are only available in Ruby 2.0.x or higher."); 1379 } 1380 $$ = concat_list($1, update_list($3, $4)); 1381 } 1382 | f_kwarg opt_f_block_arg 1383 { 1384 if (parser->version < ruby20) { 1385 yywarning("Keyword arguments are only available in Ruby 2.0.x or higher."); 1386 } 1387 $$ = update_list($1, $2); 1388 } 1389 | f_kwrest opt_f_block_arg 1390 { 1391 if (parser->version < ruby20) { 1392 yywarning("Keyword arguments are only available in Ruby 2.0.x or higher."); 1393 } 1394 $$ = update_list($1, $2); 1395 } 1396 | f_block_arg 1397 { 1398 $$ = $1; 1399 } 1400 ; 1401 1402 opt_args_tail: ',' args_tail { $$ = $2; } 1403 | /* none */ { $$ = 0; } 1404 ; 1405 1406 f_args: f_arg ',' f_optarg ',' f_rest_arg opt_args_tail 1407 { 1408 $$ = concat_list($1, concat_list($3, concat_list($5, $6))); 1409 } 1410 | f_arg ',' f_optarg ',' f_rest_arg ',' f_arg opt_args_tail 1411 { 1412 $$ = concat_list($1, concat_list($3, create_list($5, concat_list($7, $8)))); 1413 } 1414 | f_arg ',' f_optarg opt_args_tail 1415 { 1416 $$ = concat_list($1, concat_list($3, $4)); 1417 } 1418 | f_arg ',' f_optarg ',' f_arg opt_args_tail 1419 { 1420 $$ = concat_list($1, concat_list($3, concat_list($5, $6))); 1421 } 1422 | f_arg ',' f_rest_arg opt_args_tail 1423 { 1424 $$ = concat_list($1, concat_list($3, $4)); 1425 } 1426 | f_arg ',' f_rest_arg ',' f_arg opt_args_tail 1427 { 1428 $$ = concat_list($1, concat_list($3, concat_list($5, $6))); 1429 } 1430 | f_arg opt_args_tail 1431 { 1432 $$ = concat_list($1, $2); 1433 } 1434 | f_optarg ',' f_rest_arg opt_args_tail 1435 { 1436 $$ = concat_list($1, concat_list($3, $4)); 1437 } 1438 | f_optarg ',' f_rest_arg ',' f_arg opt_args_tail 1439 { 1440 $$ = concat_list($1, create_list($3, concat_list($5, $6))); 1441 } 1442 | f_optarg opt_args_tail 1443 { 1444 $$ = concat_list($1, $2); 1445 } 1446 | f_optarg ',' f_arg opt_args_tail 1447 { 1448 $$ = concat_list($1, concat_list($3, $4)); 1449 } 1450 | f_rest_arg opt_args_tail 1451 { 1452 $$ = concat_list($1, $2); 1453 } 1454 | f_rest_arg ',' f_arg opt_args_tail 1455 { 1456 $$ = create_list($1, concat_list($3, $4)); 1457 } 1458 | args_tail 1459 | none 1460 ; 1461 1462 f_bad_arg: CONST { yyerror(parser, "formal argument cannot be a constant"); $$ = 0; } 1463 | IVAR { yyerror(parser, "formal argument cannot be an instance variable"); $$ = 0; } 1464 | GLOBAL { yyerror(parser, "formal argument cannot be a global variable"); $$ = 0; } 1465 | CVAR { yyerror(parser, "formal argument cannot be a class variable"); $$ = 0; } 1466 ; 1467 1468 f_norm_arg: f_bad_arg | base 1469 ; 1470 1471 f_arg_item: f_norm_arg 1472 | tLPAREN f_margs rparen { $$ = $2; } 1473 ; 1474 1475 f_arg: f_arg_item 1476 | f_arg ',' f_arg_item { $$ = concat_list($1, $3); } 1477 ; 1478 1479 f_kw: label arg 1480 { 1481 $$ = alloc_node(token_object, $1, $2); 1482 $$->flags = label; 1483 } 1484 ; 1485 1486 f_block_kw: label primary 1487 { 1488 $$ = alloc_node(token_object, $1, $2); 1489 $$->flags = label; 1490 } 1491 ; 1492 1493 f_block_kwarg: f_block_kw { $$ = $1; } 1494 | f_block_kwarg ',' f_block_kw { $$ = update_list($1, $3); } 1495 ; 1496 1497 f_kwarg: f_kw { $$ = $1; } 1498 | f_kwarg ',' f_kw { $$ = update_list($1, $3); } 1499 ; 1500 1501 kwrest_mark: tPOW | tDSTAR 1502 ; 1503 1504 f_kwrest: kwrest_mark base 1505 { 1506 $$ = $2; 1507 $$->flags = kwrest; 1508 } 1509 | kwrest_mark 1510 { 1511 $$ = alloc_node(token_object, NULL, NULL); 1512 $$->flags = kwrest; 1513 } 1514 ; 1515 1516 f_opt: base '=' 1517 { 1518 $<num>$ = parser->column; 1519 } 1520 arg 1521 { 1522 $$ = alloc_node(token_assign, $1, $4); 1523 $1->flags = opt; /* TODO: not sure about this */ 1524 $4->pos.start_col = $<num>3; 1525 $4->pos.end_col = parser->column; 1526 $4->pos.offset = parser->lex_prev - parser->blob; 1527 } 1528 ; 1529 1530 f_block_opt: base '=' primary { $$ = alloc_node(token_assign, $1, $3); } 1531 ; 1532 1533 f_block_optarg: f_block_opt 1534 | f_block_optarg ',' f_block_opt { $$ = update_list($1, $3); } 1535 ; 1536 1537 f_optarg: f_opt 1538 | f_optarg ',' f_opt { $$ = update_list($1, $3); } 1539 ; 1540 1541 restarg_mark: '*' | tSTAR 1542 ; 1543 1544 f_rest_arg: restarg_mark base { $$ = $2; $$->flags = kwrest; } 1545 | restarg_mark { $$ = alloc_node(token_object, NULL, NULL); $$->flags = kwrest; } 1546 ; 1547 1548 blkarg_mark: '&' | tAMPER 1549 ; 1550 1551 f_block_arg: blkarg_mark base { $$ = $2; $$->flags = block; } 1552 ; 1553 1554 opt_f_block_arg : ',' f_block_arg { $$ = $2; } 1555 | none 1556 ; 1557 1558 singleton: variable { $$ = $1; } 1559 | '(' { lex_state = EXPR_BEG; } expr rparen 1560 { 1561 if ($3 == 0) 1562 yyerror(parser, "can't define singleton method for ()."); 1563 else { 1564 switch ($3->kind) { 1565 case token_string: 1566 case token_regexp: 1567 case token_numeric: 1568 case token_symbol: 1569 case token_array: 1570 yyerror(parser, "can't define singleton method for literals"); 1571 } 1572 } 1573 $$ = $3; 1574 } 1575 ; 1576 1577 const: CONST { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = constant; POP_STACK; } 1578 ; 1579 1580 base: BASE { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = var; POP_STACK; } 1581 ; 1582 1583 assoc_list: none 1584 | assocs trailer { $$ = $1; } 1585 ; 1586 1587 assocs: assoc 1588 | assocs ',' assoc { $$ = update_list($1, $3); } 1589 ; 1590 1591 assoc: arg tASSOC arg 1592 { 1593 $$ = alloc_node(token_object, $1, $3); 1594 } 1595 | label arg 1596 { 1597 if (parser->version < ruby19) { 1598 yywarning("This syntax is only available in Ruby 1.9.x or higher."); 1599 } 1600 $$ = alloc_node(token_object, $1, $2); 1601 } 1602 | tDSTAR arg 1603 { 1604 if (parser->version < ruby20) { 1605 yywarning("tDSTAR token is only available in Ruby 2.0.x or higher."); 1606 } 1607 $$ = $2; 1608 } 1609 ; 1610 1611 operation: base | const 1612 ; 1613 1614 operation2: base 1615 | const 1616 | op 1617 { 1618 $$ = alloc_node(token_object, NULL, NULL); 1619 $$->name = parser->aux; 1620 } 1621 ; 1622 1623 operation3: base 1624 | op 1625 { 1626 $$ = alloc_node(token_object, NULL, NULL); 1627 $$->name = parser->aux; 1628 } 1629 ; 1630 1631 label: tKEY { $$ = ALLOC_N(token_symbol, NULL, NULL); POP_STACK; } 1632 ; 1633 1634 super: tSUPER { $$ = alloc_node(token_super, NULL, NULL); } 1635 ; 1636 1637 dot_or_colon: '.' | tCOLON2 1638 ; 1639 1640 opt_terms: /* none */ | terms 1641 ; 1642 1643 opt_eol: /* none */ | '\n' 1644 ; 1645 1646 rparen: opt_eol ')' 1647 ; 1648 1649 rbracket: opt_eol ']' 1650 ; 1651 1652 trailer: opt_eol | ',' 1653 ; 1654 1655 term: ';' {yyerrok;} | '\n' 1656 ; 1657 1658 terms: term | terms ';' {yyerrok;} 1659 ; 1660 1661 none: /* none */ { $$ = NULL; } 1662 ; 1663 1664 %% 1665 #undef parser 1666 #undef yylex 1667 1668 #include <ctype.h> 1669 #include "hash.c" 1670 1671 1672 /* Let's define some useful macros :D */ 1673 1674 #define _unused_(c) (void) c; 1675 #define multiline_comment(c) (*(c+1) == 'b' && *(c+2) == 'e' && *(c+3) == 'g' && *(c+4) == 'i' && *(c+5) == 'n') 1676 #define multiline_end(c) (*c == '=' && *(c+1) == 'e' && *(c+2) == 'n' && *(c+3) == 'd') 1677 #define not_sep(c) (is_valid_identifier(c) || is_utf8_digit(c) || *c == '_') 1678 #define is_blank(c) (c == ' ' || c == '\t') 1679 #define SWAP(a, b, aux) { aux = a; a = b; b = aux; } 1680 #define is_special_method(buffer) ((strlen(buffer) > 4) && buffer[0] == '_' && \ 1681 buffer[1] == '_' && buffer[strlen(buffer) - 2] == '_' && \ 1682 buffer[strlen(buffer) - 1] == '_') 1683 #define IS_EOF() ((unsigned int) (parser->lex_p - parser->blob) >= parser->length) 1684 #define IS_ARG() IS_lex_state(EXPR_ARG_ANY) 1685 #define IS_END() IS_lex_state(EXPR_END_ANY) 1686 #define IS_BEG() IS_lex_state(EXPR_BEG_ANY) 1687 #define IS_SPCARG(c) (IS_ARG() && space_seen && !isspace(c)) 1688 #define IS_LABEL_POSSIBLE() ((IS_lex_state(EXPR_BEG | EXPR_ENDFN) && !cmd_state) || IS_ARG()) 1689 #define IS_LABEL_SUFFIX() (*parser->lex_p == ':' && *(parser->lex_p + 1) != ':') 1690 #define IS_AFTER_OPERATOR() IS_lex_state(EXPR_FNAME | EXPR_DOT) 1691 1692 1693 /* Initialize the parser */ 1694 static void init_parser(struct parser_t *parser) 1695 { 1696 parser->content_given = 0; 1697 parser->ast = NULL; 1698 parser->blob = NULL; 1699 parser->lex_p = NULL; 1700 parser->lex_prev = NULL; 1701 parser->lex_prevc = 0; 1702 parser->lex_pend = NULL; 1703 parser->line_pend = 0; 1704 parser->column_pend = 0; 1705 parser->here_found = 0; 1706 parser->eof_reached = 0; 1707 parser->cond_stack = 0; 1708 parser->cmdarg_stack = 0; 1709 parser->in_def = 0; 1710 parser->lpar_beg = 0; 1711 parser->paren_nest = 0; 1712 parser->sp = 0; 1713 parser->line = 1; 1714 parser->column = 0; 1715 parser->pos_stack = (struct pos_t *) malloc(SSIZE * sizeof(struct pos_t)); 1716 parser->stack_scale = 0; 1717 parser->pos_size = 0; 1718 parser->errors = NULL; 1719 parser->last_error = NULL; 1720 parser->warning = 0; 1721 parser->unrecoverable = 0; 1722 parser->last_comment.comment = NULL; 1723 parser->last_comment.line = 0; 1724 parser->comment_index = 0; 1725 command_start = 1; 1726 lex_strterm = NULL; 1727 lex_state = EXPR_BEG; 1728 } 1729 1730 /* Free the parser */ 1731 static void free_parser(struct parser_t *parser) 1732 { 1733 int index; 1734 1735 for (index = 0; index < parser->sp; index++) 1736 free(parser->stack[index]); 1737 if (parser->pos_stack != NULL) 1738 free(parser->pos_stack); 1739 if (lex_strterm && lex_strterm->word) 1740 free(lex_strterm->word); 1741 if (parser->last_comment.comment) 1742 free(parser->last_comment.comment); 1743 if (!parser->content_given) 1744 free(parser->blob); 1745 } 1746 1747 /* Read the file's source code and allocate it for further inspection. */ 1748 static int retrieve_source(struct parser_t *p, const char *path) 1749 { 1750 int length; 1751 1752 /* Open specified file */ 1753 FILE *fd = fopen(path, "r"); 1754 if (!fd) { 1755 fprintf(stderr, "Cannot open file: %s\n", path); 1756 return 0; 1757 } 1758 1759 fseek(fd, 0, SEEK_END); 1760 length = ftell(fd); 1761 fseek(fd, 0, SEEK_SET); 1762 1763 if (!length) 1764 return 0; 1765 p->blob = (char *) malloc(sizeof(char) * length); 1766 1767 if (!p->blob) { 1768 fprintf(stderr, "Cannot store contents\n"); 1769 return 0; 1770 } 1771 fread(p->blob, length, 1, fd); 1772 if (ferror(fd)) { 1773 fprintf(stderr, "Reading error\n"); 1774 return 0; 1775 } 1776 p->length = length; 1777 p->lex_p = p->blob; 1778 fclose(fd); 1779 return 1; 1780 } 1781 1782 /* 1783 * Some macros to make easier the UTF-8 support 1784 */ 1785 #define is_utf(c) ((c & 0xC0) != 0x80) 1786 #define is_special(c) (utf8_charsize(c) > 1) 1787 #define is_identchar(c) (is_utf8_alnum(c) || *c == '_') 1788 1789 /* 1790 * This function is really simple. It steps over a char of 1791 * the string s, that is encoded in UTF-8. The result varies on the 1792 * number of bytes that encodes a single character following the UTF-8 1793 * rules. Therefore, this function will return 1 if the character 1794 * is in plain-ASCII, and greater than 1 otherwise. 1795 */ 1796 static int utf8_charsize(const char *s) 1797 { 1798 int size = 0; 1799 int i = 0; 1800 1801 do { 1802 i++; 1803 size++; 1804 } while (s[i] && !is_utf(s[i])); 1805 return size; 1806 } 1807 1808 static int is_utf8_alpha(const char *str) 1809 { 1810 return is_special(str) ? 1 : isalpha(*str); 1811 } 1812 1813 static int is_utf8_alnum(const char *str) 1814 { 1815 return is_special(str) ? 1 : isalnum(*str); 1816 } 1817 1818 static int is_utf8_graph(const char *str) 1819 { 1820 return is_special(str) ? 1 : isgraph(*str); 1821 } 1822 1823 static int is_utf8_digit(const char *str) 1824 { 1825 return is_special(str) ? 0 : isdigit(*str); 1826 } 1827 1828 /* Check that the given parameter points to a valid identifier */ 1829 static int is_valid_identifier(const char *c) 1830 { 1831 if (is_utf8_alpha(c)) 1832 return 1; 1833 else if (*c == '$' && is_utf8_graph(c + 1) && !is_utf8_digit(c + 1)) 1834 return 1; 1835 else if ((*c == '_' || *c == '@') && is_utf8_alpha(c + 1)) 1836 return 1; 1837 else if (*c == '@' && *(c + 1) == '@' && (is_utf8_alpha(c + 2) || *(c + 2) == '_')) 1838 return 1; 1839 return 0; 1840 } 1841 1842 /* Get the next character and move the lexer forward. */ 1843 static int parser_nextc(struct parser_t *parser) 1844 { 1845 int c; 1846 1847 if (parser->eof_reached || IS_EOF()) 1848 return -1; 1849 1850 parser->lex_prev = parser->lex_p; 1851 parser->lex_prevc = parser->column; 1852 c = (unsigned char) *parser->lex_p++; 1853 if (c == '\n') { 1854 if (parser->here_found) { 1855 parser->line = parser->line_pend; 1856 parser->column = parser->column_pend; 1857 parser->lex_p = parser->lex_pend + 1; 1858 parser->here_found = 0; 1859 } 1860 parser->line++; 1861 parser->column = -1; 1862 } 1863 parser->column++; 1864 return c; 1865 } 1866 #define nextc() parser_nextc(parser) 1867 1868 /* Move the lexer backwards. */ 1869 static void parser_pushback(struct parser_t *parser) 1870 { 1871 parser->column--; 1872 parser->lex_p--; 1873 if (*parser->lex_p == '\n') { 1874 parser->line--; 1875 parser->column = parser->lex_prevc; 1876 } 1877 } 1878 #define pushback() parser_pushback(parser) 1879 1880 /* It parses a heredoc identifier and sets a new lex_strterm */ 1881 static int parse_heredoc_identifier(struct parser_t *parser) 1882 { 1883 char *buffer, *ptr; 1884 int count = SSIZE, scale = 0; 1885 char c = nextc(); 1886 unsigned char quote_seen = 0, term = ' '; 1887 unsigned char dash_seen = 0; 1888 1889 /* Check for <<- case */ 1890 if (c == '-') { 1891 dash_seen = 1; 1892 c = nextc(); 1893 } 1894 /* And now surrounding quotes */ 1895 if (c == '\'' || c == '"' || c == '`') { 1896 term = c; 1897 c = nextc(); 1898 quote_seen = 1; 1899 } 1900 if (!quote_seen && !is_identchar(parser->lex_prev)) { 1901 if (dash_seen) 1902 pushback(); 1903 return 0; 1904 } 1905 1906 buffer = (char *) malloc(SSIZE * sizeof(char)); 1907 ptr = buffer; 1908 for (;;) { 1909 /* If quote was seen, anything except the term is accepted */ 1910 if (quote_seen) { 1911 if (c == term || !is_utf8_graph(parser->lex_prev)) 1912 break; 1913 } else if (!is_identchar(parser->lex_prev)) 1914 break; 1915 if (!count) { 1916 scale++; 1917 buffer = (char *) realloc(buffer, (SSIZE << scale) * sizeof(char)); 1918 } 1919 *ptr++ = c; 1920 c = nextc(); 1921 if (c < 0) { 1922 free(buffer); 1923 yyerror(parser, "unterminated here document identifier"); 1924 return 0; 1925 } 1926 } 1927 *ptr = '\0'; 1928 pushback(); 1929 1930 lex_strterm = (struct term_t *) malloc(sizeof(struct term_t)); 1931 lex_strterm->term = 1; 1932 lex_strterm->can_embed = dash_seen; 1933 lex_strterm->word = buffer; 1934 lex_strterm->token = token_heredoc; 1935 lex_strterm->nestable = 0; 1936 lex_strterm->paren = 0; 1937 parser->lex_pend = parser->lex_p + quote_seen; 1938 parser->line_pend = parser->line; 1939 parser->column_pend = parser->column; 1940 return 1; 1941 } 1942 1943 /* Let's parse a heredoc */ 1944 static int parse_heredoc(struct parser_t *parser) 1945 { 1946 int length = strlen(lex_strterm->word); 1947 char *aux = (char*)alloca(length); 1948 char c = nextc(); 1949 int i = 0; 1950 int ax = 0; 1951 1952 /* Skip until next line */ 1953 while (c != -1 && c != '\n') 1954 c = nextc(); 1955 1956 do { 1957 c = nextc(); 1958 1959 /* Ignore initial spaces if dash seen */ 1960 if (i == 0 && lex_strterm->can_embed) 1961 while (isspace(c) || c == '\n') 1962 c = nextc(); 1963 if (c == '#' && *(parser->lex_prev - 1) != '\\') { 1964 c = nextc(); 1965 switch (c) { 1966 case '$': case '@': 1967 parser->column -= ax; 1968 pushback(); 1969 return tSTRING_DVAR; 1970 case '{': 1971 parser->column -= ax; 1972 command_start = 1; 1973 return tSTRING_DBEG; 1974 } 1975 } 1976 aux[i] = c; 1977 if (c == '\n') { 1978 if ((length == i) && !strncmp(lex_strterm->word, aux, i)) { 1979 pushback(); 1980 return tSTRING_END; 1981 } 1982 i = -1; 1983 } else 1984 ax += utf8_charsize(parser->lex_prev) - 1; 1985 if (i >= length) 1986 i = -1; 1987 i++; 1988 } while (c != -1); 1989 1990 parser->eof_reached = 1; 1991 if (lex_strterm->word) { 1992 free(lex_strterm->word); 1993 lex_strterm->word = NULL; 1994 } 1995 free(lex_strterm); 1996 lex_strterm = NULL; 1997 return token_invalid; 1998 } 1999 2000 /* Return what's the char that closes c */ 2001 static char closing_char(char c) 2002 { 2003 switch (c) { 2004 case '[': return ']'; 2005 case '(': return ')'; 2006 case '<': return '>'; 2007 case '{': return '}'; 2008 default: return c; 2009 } 2010 } 2011 2012 /* Guess the token kind of the shortcut based on the given character */ 2013 static int guess_kind(struct parser_t *parser, char c) 2014 { 2015 if (!isalpha(c)) 2016 return token_string; 2017 2018 switch (c) { 2019 case 'Q': case 'q': case 'x': return token_string; 2020 case 'I': case 'i': 2021 if (parser->version < ruby20) { 2022 yywarning("This shortcut is only available in Ruby 2.0.x or higher."); 2023 } 2024 case 'W': case 'w': return token_array; 2025 case 's': return token_symbol; 2026 case 'r': return token_regexp; 2027 default: 2028 yyerror(parser, "unknown type of %string"); 2029 return 0; 2030 } 2031 } 2032 2033 /* Push name to the stack */ 2034 static void push_stack(struct parser_t *parser, const char *buf) 2035 { 2036 parser->stack[parser->sp] = strdup(buf); 2037 parser->sp++; 2038 } 2039 2040 /* Pop name from the stack. */ 2041 static void pop_stack(struct parser_t *parser, struct Node *n) 2042 { 2043 if (n != NULL) 2044 n->name = parser->stack[0]; 2045 parser->stack[0] = parser->stack[1]; 2046 parser->stack[1] = NULL; 2047 parser->sp--; 2048 } 2049 2050 /* Push a position into the stack of positions */ 2051 static void push_pos(struct parser_t *parser, struct pos_t tokp) 2052 { 2053 int scale = SSIZE * parser->stack_scale; 2054 2055 parser->pos_size++; 2056 if (parser->pos_size > SSIZE) { 2057 parser->pos_size = 1; 2058 parser->stack_scale++; 2059 scale += SSIZE; 2060 parser->pos_stack = (struct pos_t *) realloc(parser->pos_stack, scale * sizeof(struct pos_t)); 2061 } 2062 parser->pos_stack[parser->pos_size + scale - 1] = tokp; 2063 } 2064 2065 /* Pop a position from the stack of positions and assign to the given node */ 2066 static void pop_pos(struct parser_t *parser, struct Node *n) 2067 { 2068 int scale = SSIZE * parser->stack_scale; 2069 int pos = parser->pos_size - 1 + scale; 2070 struct pos_t tokp = parser->pos_stack[pos]; 2071 2072 if (n != NULL) { 2073 n->pos.start_line = tokp.start_line; 2074 n->pos.start_col = tokp.start_col; 2075 n->pos.end_line = tokp.end_line; 2076 n->pos.end_col = tokp.end_col; 2077 n->pos.offset = tokp.offset; 2078 } 2079 parser->pos_size--; 2080 if (parser->pos_size == 0 && parser->stack_scale > 0) { 2081 parser->stack_scale--; 2082 parser->pos_size = SSIZE; 2083 scale -= SSIZE; 2084 parser->pos_stack = (struct pos_t *) realloc(parser->pos_stack, scale * sizeof(struct pos_t)); 2085 } 2086 } 2087 2088 /* Like pop_pos but it just copies the start position to the given node */ 2089 static void pop_start(struct parser_t *parser, struct Node *n) 2090 { 2091 n->pos.start_line = parser->pos_stack[parser->pos_size - 1].start_line; 2092 n->pos.start_col = parser->pos_stack[parser->pos_size - 1].start_col; 2093 pop_pos(parser, NULL); 2094 } 2095 2096 /* Like pop_pos but it just copies the end position to the given node */ 2097 static void pop_end(struct parser_t *parser, struct Node *n) 2098 { 2099 n->pos.end_line = parser->pos_stack[parser->pos_size - 1].start_line; 2100 n->pos.end_col = parser->pos_stack[parser->pos_size - 1].start_col; 2101 pop_pos(parser, NULL); 2102 } 2103 2104 /* Push the last comment that we've found to the stack of comments. */ 2105 static void push_last_comment(struct parser_t *parser) 2106 { 2107 if ((parser->line - parser->last_comment.line) < 2) 2108 parser->comment_stack[parser->comment_index] = parser->last_comment.comment; 2109 else { 2110 parser->comment_stack[parser->comment_index] = NULL; 2111 if (parser->last_comment.comment) 2112 free(parser->last_comment.comment); 2113 } 2114 parser->comment_index++; 2115 parser->last_comment.comment = NULL; 2116 } 2117 2118 /* Pop a comment from the stack of comments and assign it to the given node */ 2119 static void pop_comment(struct parser_t *parser, struct Node *n) 2120 { 2121 if (parser->comment_index > 0) { 2122 parser->comment_index--; 2123 n->comment = parser->comment_stack[parser->comment_index]; 2124 } 2125 } 2126 2127 #define __check_buffer_size(N) { \ 2128 if (count > N) { \ 2129 count = 0; \ 2130 scale++; \ 2131 buffer = (char *) realloc(buffer, scale * 1024); \ 2132 } \ 2133 } 2134 2135 /* Store the given comment as the last comment seen */ 2136 static void store_comment(struct parser_t *parser, char *comment) 2137 { 2138 if (parser->last_comment.comment != NULL) 2139 free(parser->last_comment.comment); 2140 parser->last_comment.comment = comment; 2141 parser->last_comment.line = parser->line; 2142 } 2143 2144 /* Check if the given parameter points to an indented comment */ 2145 static int is_indented_comment(struct parser_t *parser) 2146 { 2147 char *c = parser->lex_prev; 2148 char *original = c; 2149 2150 for (; *c == ' ' || *c == '\t'; ++c); 2151 parser->lex_p += (c - original); 2152 parser->column += (c - original); 2153 return (*c == '#'); 2154 } 2155 2156 /* Read a comment and store it if possible */ 2157 static void set_comment(struct parser_t *parser) 2158 { 2159 int c = ' ', count = 0, scale = 0; 2160 char *buffer = (char *) malloc(LSIZE); 2161 2162 for (;; ++count) { 2163 if (c != '#' && !is_indented_comment(parser)) 2164 break; 2165 c = *(parser->lex_p - 1); 2166 while (c == '#' && c != -1) 2167 c = nextc(); 2168 if (c != '\n') { 2169 for (; c != -1; count++) { 2170 __check_buffer_size(1000); 2171 buffer[count] = c; 2172 c = nextc(); 2173 if (c == '\n') { 2174 buffer[++count] = c; 2175 break; 2176 } 2177 } 2178 } else 2179 buffer[count] = c; 2180 c = nextc(); 2181 } 2182 2183 if (c != -1) 2184 pushback(); 2185 buffer[count] = '\0'; 2186 store_comment(parser, buffer); 2187 } 2188 2189 /* Parse a string or a regexp */ 2190 static int parse_string(struct parser_t *parser) 2191 { 2192 register int c = *parser->lex_p; 2193 int next = *(parser->lex_p + 1); 2194 2195 if (c == '\\' && (next == '\\' || next == lex_strterm->term || next == lex_strterm->paren)) { 2196 parser->lex_p += 2; 2197 parser->column += 2; 2198 return tSTRING_CONTENT; 2199 } 2200 2201 if (c == lex_strterm->term) { 2202 nextc(); 2203 if (lex_strterm->nestable) { 2204 lex_strterm->nest--; 2205 if (lex_strterm->nest > 0) 2206 return tSTRING_CONTENT; 2207 } 2208 return tSTRING_END; 2209 } else if (lex_strterm->nestable && lex_strterm->paren == c) { 2210 lex_strterm->nest++; 2211 nextc(); 2212 return tSTRING_CONTENT; 2213 } 2214 2215 if (IS_EOF()) { 2216 parser->eof_reached = 1; 2217 yyerror(parser, "unterminated string meets end of file"); 2218 free(lex_strterm); 2219 lex_strterm = NULL; 2220 return token_invalid; 2221 } 2222 2223 if (lex_strterm->can_embed && c == '#' && *(parser->lex_prev) != '\\') { 2224 nextc(); 2225 switch (*parser->lex_p) { 2226 case '$': case '@': 2227 return tSTRING_DVAR; 2228 case '{': 2229 c = nextc(); 2230 command_start = 1; 2231 return tSTRING_DBEG; 2232 } 2233 pushback(); 2234 } 2235 2236 /* Re-using the next and the c variables */ 2237 next = utf8_charsize(parser->lex_p); 2238 c = next - 1; 2239 while (next-- > 0) { 2240 if (nextc() < 0) { 2241 parser->eof_reached = 1; 2242 free(lex_strterm); 2243 lex_strterm = NULL; 2244 return token_invalid; 2245 } 2246 } 2247 parser->column -= c; 2248 return tSTRING_CONTENT; 2249 } 2250 2251 /* Regular expressions can end with some options, read them */ 2252 static void parse_re_options(struct parser_t *parser) 2253 { 2254 char aux[64]; 2255 int c = *parser->lex_p; 2256 2257 while (isalpha(c)) { 2258 if (c != 'i' && c != 'm' && c != 'x' && c != 'o' && 2259 c != 'u' && c != 'e' && c != 's' && c != 'n') { 2260 sprintf(aux, "unknown regexp option - %c", c); 2261 yyerror(parser, aux); 2262 return; 2263 } 2264 c = nextc(); 2265 } 2266 pushback(); 2267 } 2268 2269 /* Standard warning for ambiguous arguments */ 2270 static void arg_ambiguous_gen(struct parser_t *parser) 2271 { 2272 yywarning("ambiguous first argument; put parentheses or even spaces"); 2273 } 2274 #define arg_ambiguous() (arg_ambiguous_gen(parser), 1) 2275 2276 /* 2277 * This is the lexer. It reads the source code (blob) and provides tokens to 2278 * the parser. It also updates the necessary flags. 2279 */ 2280 static int parser_yylex(struct parser_t *parser) 2281 { 2282 register int c; 2283 int bc = 0; 2284 char *cp; 2285 char lexbuf[SSIZE]; 2286 unsigned char space_seen = 0; 2287 int cmd_state; 2288 struct pos_t tokp = {-1, -1, -1, -1, 0}; 2289 2290 /* Check for string terminations: string, regexp, heredoc, shortcut */ 2291 if (lex_strterm) { 2292 if (lex_strterm->token == token_heredoc) { 2293 c = parse_heredoc(parser); 2294 if (c == tSTRING_END) { 2295 tokp.end_line = parser->line; 2296 tokp.end_col = parser->column; 2297 SWAP(parser->line, parser->line_pend, bc); 2298 SWAP(parser->column, parser->column_pend, bc); 2299 SWAP(parser->lex_p, parser->lex_pend, cp); 2300 parser->here_found = 1; 2301 lex_state = EXPR_END; 2302 2303 } 2304 } else { 2305 c = parse_string(parser); 2306 if (c == tSTRING_END) { 2307 if (lex_strterm->token == token_regexp && isalpha(*parser->lex_p)) 2308 parse_re_options(parser); 2309 lex_state = EXPR_END; 2310 } 2311 } 2312 return c; 2313 } 2314 2315 cmd_state = command_start; 2316 command_start = 0; 2317 retry: 2318 c = nextc(); 2319 2320 tokp.start_line = parser->line; 2321 tokp.start_col = parser->column - 1; 2322 2323 /* Check numeric values here instead of entering the main switch */ 2324 if (isdigit(c)) { 2325 cp = lexbuf; 2326 goto tnum; 2327 } 2328 2329 switch (c) { 2330 case '\0': /* NULL */ 2331 case EOF: /* end of script */ 2332 parser->eof_reached = 1; 2333 return token_invalid; 2334 2335 /* white spaces */ 2336 case ' ': case '\t': case '\f': case '\r': 2337 case '\13': /* vertical tab */ 2338 space_seen = 1; 2339 goto retry; 2340 case '#': 2341 set_comment(parser); 2342 case '\n': 2343 if (IS_lex_state(EXPR_BEG | EXPR_VALUE | EXPR_CLASS | EXPR_FNAME | EXPR_DOT)) 2344 goto retry; 2345 CMDARG_PUSH(0); 2346 lex_state = EXPR_BEG; 2347 command_start = 1; 2348 return '\n'; 2349 case '=': 2350 lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG; 2351 bc = nextc(); 2352 if (bc == '=') { 2353 if (nextc() == '=') 2354 return tEQQ; 2355 pushback(); 2356 return tEQ; 2357 } 2358 if (bc == '~') 2359 return tMATCH; 2360 if (bc == '>') 2361 return tASSOC; 2362 if (multiline_comment(parser->lex_prev - 1)) { 2363 parser->column += 4; 2364 parser->lex_p += 4; 2365 while (!multiline_end(parser->lex_prev)) 2366 nextc(); 2367 parser->column += 3; 2368 parser->lex_p += 3; 2369 bc = 0; 2370 goto retry; 2371 } 2372 break; 2373 case '[': 2374 parser->paren_nest++; 2375 if (IS_AFTER_OPERATOR()) { 2376 lex_state = EXPR_ARG; 2377 bc = nextc(); 2378 if (bc == ']') { 2379 if (nextc() == '=') 2380 return tASET; 2381 c = tAREF; 2382 } 2383 break; 2384 } else if (IS_BEG()) 2385 c = tLBRACKET; 2386 else if (IS_ARG() && space_seen) 2387 c = tLBRACKET; 2388 lex_state = EXPR_BEG; 2389 COND_PUSH(0); 2390 CMDARG_PUSH(0); 2391 return c; 2392 case ']': 2393 parser->paren_nest--; 2394 lex_state = EXPR_ENDARG; 2395 CMDARG_LEXPOP(); 2396 COND_LEXPOP(); 2397 return c; 2398 case '<': 2399 bc = nextc(); 2400 if (bc == '<' && !IS_lex_state(EXPR_DOT | EXPR_CLASS) && 2401 !IS_END() && (!IS_ARG() || space_seen)) { 2402 if (parse_heredoc_identifier(parser)) 2403 return tSTRING_BEG; 2404 pushback(); 2405 } 2406 if (IS_AFTER_OPERATOR()) 2407 lex_state = EXPR_ARG; 2408 else { 2409 if (IS_lex_state(EXPR_CLASS)) 2410 command_start = 1; 2411 lex_state = EXPR_BEG; 2412 } 2413 if (bc == '=') { 2414 if (nextc() == '>') 2415 return tCMP; 2416 pushback(); 2417 return tLEQ; 2418 } 2419 if (bc == '<') { 2420 if (nextc() == '=') { 2421 lex_state = EXPR_BEG; 2422 return tOP_ASGN; 2423 } 2424 c = tLSHIFT; 2425 } 2426 break; 2427 case '>': 2428 lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG; 2429 bc = nextc(); 2430 if (bc == '=') 2431 return tGEQ; 2432 if (bc == '>') { 2433 if (nextc() == '=') { 2434 lex_state = EXPR_BEG; 2435 return tOP_ASGN; 2436 } 2437 c = tRSHIFT; 2438 } 2439 break; 2440 case '!': 2441 bc = nextc(); 2442 if (IS_AFTER_OPERATOR()) { 2443 lex_state = EXPR_ARG; 2444 if (bc == '@') 2445 return '!'; 2446 } else 2447 lex_state = EXPR_BEG; 2448 if (bc == '=') 2449 return tNEQ; 2450 if (bc == '~') 2451 return tNMATCH; 2452 break; 2453 case '+': 2454 bc = nextc(); 2455 if (IS_AFTER_OPERATOR()) { 2456 lex_state = EXPR_ARG; 2457 if (bc == '@') 2458 return tUPLUS; 2459 pushback(); 2460 return '+'; 2461 } 2462 if (bc == '=') { 2463 lex_state = EXPR_BEG; 2464 return tOP_ASGN; 2465 } 2466 if (IS_BEG() || (IS_SPCARG(bc) && arg_ambiguous())) { 2467 lex_state = EXPR_BEG; 2468 pushback(); 2469 return tUPLUS; 2470 } 2471 lex_state = EXPR_BEG; 2472 break; 2473 case '-': 2474 bc = nextc(); 2475 if (IS_AFTER_OPERATOR()) { 2476 lex_state = EXPR_ARG; 2477 if (bc == '@') 2478 return tUMINUS; 2479 pushback(); 2480 return '-'; 2481 } 2482 if (bc == '=') { 2483 lex_state = EXPR_BEG; 2484 return tOP_ASGN; 2485 } 2486 if (bc == '>') { 2487 lex_state = EXPR_ENDFN; 2488 return tLAMBDA; 2489 } 2490 if (IS_BEG() || (IS_SPCARG(bc) && arg_ambiguous())) { 2491 lex_state = EXPR_BEG; 2492 pushback(); 2493 if (bc != -1 && isdigit(bc)) 2494 return tUMINUS_NUM; 2495 return tUMINUS; 2496 } 2497 lex_state = EXPR_BEG; 2498 break; 2499 case '*': 2500 bc = nextc(); 2501 if (bc == '=') { 2502 lex_state = EXPR_BEG; 2503 return tOP_ASGN; 2504 } 2505 if (bc == '*') { 2506 bc = nextc(); 2507 if (bc == '=') { 2508 lex_state = EXPR_BEG; 2509 return tOP_ASGN; 2510 } 2511 pushback(); 2512 if (IS_SPCARG(bc)) { 2513 yywarning("`**' interpreted as argument prefix"); 2514 c = tDSTAR; 2515 } else if (IS_BEG()) 2516 c = tDSTAR; 2517 else 2518 c = tPOW; 2519 lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG; 2520 return c; 2521 } 2522 if (IS_SPCARG(bc)) { 2523 yywarning("`*' interpreted as argument prefix"); 2524 c = tSTAR; 2525 } else if (IS_BEG()) 2526 c = tSTAR; 2527 lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG; 2528 break; 2529 case '/': 2530 if (IS_lex_state(EXPR_BEG_ANY)) { 2531 regexp: 2532 lex_strterm = (struct term_t *) malloc(sizeof(struct term_t)); 2533 lex_strterm->term = c; 2534 lex_strterm->can_embed = 1; 2535 lex_strterm->token = token_regexp; 2536 lex_strterm->word = NULL; 2537 lex_strterm->nestable = 0; 2538 lex_strterm->paren = 0; 2539 return tSTRING_BEG; 2540 } 2541 bc = nextc(); 2542 if (bc == '=') { 2543 lex_state = EXPR_BEG; 2544 return tOP_ASGN; 2545 } 2546 pushback(); 2547 if (IS_SPCARG(bc)) { 2548 arg_ambiguous_gen(parser); 2549 goto regexp; 2550 } 2551 lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG; 2552 return c; 2553 case '%': 2554 bc = nextc(); 2555 if (IS_lex_state(EXPR_BEG_ANY) || IS_SPCARG(bc)) { 2556 lex_strterm = (struct term_t *) malloc(sizeof(struct term_t)); 2557 lex_strterm->token = guess_kind(parser, bc); 2558 if (isalpha(bc)) 2559 bc = nextc(); 2560 lex_strterm->term = closing_char(bc); 2561 lex_strterm->can_embed = 1; 2562 lex_strterm->word = NULL; 2563 lex_strterm->paren = bc; 2564 lex_strterm->nestable = (bc != lex_strterm->term); 2565 lex_strterm->nest = 1; 2566 return tSTRING_BEG; 2567 } 2568 if (bc == '=') { 2569 lex_state = EXPR_BEG; 2570 return tOP_ASGN; 2571 } 2572 lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG; 2573 break; 2574 case '&': 2575 bc = nextc(); 2576 if (bc == '&') { 2577 lex_state = EXPR_BEG; 2578 if (nextc() == '=') 2579 return tOP_ASGN; 2580 pushback(); 2581 return tAND; 2582 } 2583 if (bc == '=') { 2584 lex_state = EXPR_BEG; 2585 return tOP_ASGN; 2586 } 2587 if (IS_SPCARG(bc)) { 2588 yywarning("`&' interpreted as argument prefix"); 2589 c = tAMPER; 2590 } else if (IS_BEG()) 2591 c = tAMPER; 2592 lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG; 2593 break; 2594 case '|': 2595 bc = nextc(); 2596 if (bc == '|') { 2597 lex_state = EXPR_BEG; 2598 if (nextc() == '=') 2599 return tOP_ASGN; 2600 pushback(); 2601 return tOR; 2602 } 2603 if (bc == '=') { 2604 lex_state = EXPR_BEG; 2605 return tOP_ASGN; 2606 } 2607 lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG; 2608 break; 2609 case '.': 2610 lex_state = EXPR_BEG; 2611 bc = nextc(); 2612 if (bc == '.') { 2613 if (nextc() == '.') 2614 return tDOT3; 2615 pushback(); 2616 return tDOT2; 2617 } 2618 lex_state = EXPR_DOT; 2619 break; 2620 case ':': 2621 bc = nextc(); 2622 if (bc == ':') { 2623 if (IS_BEG() || IS_lex_state(EXPR_CLASS) || IS_SPCARG(-1)) { 2624 lex_state = EXPR_BEG; 2625 return tCOLON3; 2626 } 2627 lex_state = EXPR_DOT; 2628 return tCOLON2; 2629 } 2630 if (IS_END() || isspace(bc)) { 2631 lex_state = EXPR_BEG; 2632 break; 2633 } 2634 lex_state = EXPR_FNAME; 2635 pushback(); 2636 return tSYMBEG; 2637 case '^': 2638 if (nextc() == '=') { 2639 lex_state = EXPR_BEG; 2640 return tOP_ASGN; 2641 } 2642 lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG; 2643 break; 2644 case ';': 2645 command_start = 1; 2646 /* fallthrough */ 2647 case ',': 2648 lex_state = EXPR_BEG; 2649 return c; 2650 case '?': 2651 if (IS_END()) { 2652 lex_state = EXPR_VALUE; 2653 return c; 2654 } 2655 bc = nextc(); 2656 if (isspace(bc)) { 2657 lex_state = EXPR_VALUE; 2658 break; 2659 } 2660 if (bc == '\\') 2661 nextc(); 2662 if (IS_BEG()) { 2663 lex_state = EXPR_END; 2664 return tCHAR; 2665 } 2666 if (is_blank(*parser->lex_p) || *parser->lex_p == ':') { 2667 lex_state = EXPR_VALUE; 2668 break; 2669 } 2670 lex_state = EXPR_END; 2671 return tCHAR; 2672 case '`': 2673 if (IS_lex_state(EXPR_FNAME)) { 2674 lex_state = EXPR_ENDFN; 2675 return c; 2676 } 2677 if (IS_lex_state(EXPR_DOT)) { 2678 lex_state = (cmd_state) ? EXPR_CMDARG : EXPR_ARG; 2679 return c; 2680 } 2681 /* fallthrough */ 2682 case '"': 2683 space_seen = 1; 2684 /* fallthrough */ 2685 case '\'': 2686 lex_strterm = (struct term_t *) malloc(sizeof(struct term_t)); 2687 lex_strterm->term = c; 2688 lex_strterm->can_embed = space_seen; 2689 lex_strterm->token = token_string; 2690 lex_strterm->word = NULL; 2691 lex_strterm->nestable = 0; 2692 lex_strterm->paren = 0; 2693 return tSTRING_BEG; 2694 case '\\': 2695 if (nextc() == '\n') { 2696 space_seen = 1; 2697 goto retry; 2698 } 2699 pushback(); 2700 return c; 2701 case '(': 2702 if (IS_BEG()) 2703 c = tLPAREN; 2704 else if (IS_SPCARG(-1)) 2705 c = tLPAREN_ARG; 2706 parser->paren_nest++; 2707 lex_state = EXPR_BEG; 2708 COND_PUSH(0); 2709 CMDARG_PUSH(0); 2710 return c; 2711 case ')': 2712 parser->paren_nest--; 2713 lex_state = EXPR_ENDFN; 2714 CMDARG_LEXPOP(); 2715 COND_LEXPOP(); 2716 return c; 2717 case '{': 2718 if (parser->lpar_beg && parser->lpar_beg == parser->paren_nest) { 2719 lex_state = EXPR_BEG; 2720 parser->lpar_beg = 0; 2721 parser->paren_nest--; 2722 COND_PUSH(0); 2723 CMDARG_PUSH(0); 2724 push_pos(parser, tokp); 2725 if (parser->version < ruby19) { 2726 yywarning("\"->\" syntax is only available in Ruby 1.9.x or higher."); 2727 } 2728 return tLAMBEG; /* this is a lambda ->() {} construction */ 2729 } 2730 if (IS_ARG() || IS_lex_state(EXPR_END | EXPR_ENDFN)) 2731 c = '{'; 2732 else if (IS_lex_state(EXPR_ENDARG)) 2733 c = tLBRACE_ARG; /* block (expr) */ 2734 else 2735 c = tLBRACE; /* smells like a hash */ 2736 COND_PUSH(0); 2737 CMDARG_PUSH(0); 2738 lex_state = EXPR_BEG; 2739 if (c != tLBRACE) { 2740 push_pos(parser, tokp); 2741 command_start = 1; 2742 } 2743 return c; /* block (primary) */ 2744 case '}': 2745 CMDARG_LEXPOP(); 2746 COND_LEXPOP(); 2747 tokp.end_line = parser->line; 2748 tokp.end_col = parser->column; 2749 push_pos(parser, tokp); 2750 lex_state = EXPR_ENDARG; 2751 return c; 2752 case '@': 2753 cp = lexbuf; 2754 *cp++ = c; 2755 c = nextc(); 2756 if (c != '@') { 2757 bc = IVAR; 2758 } else { 2759 *cp++ = c; 2760 c = nextc(); 2761 bc = CVAR; 2762 } 2763 goto talpha; 2764 case '$': 2765 tokp.end_line = parser->line; 2766 cp = lexbuf; 2767 *cp++ = c; 2768 bc = nextc(); 2769 switch (bc) { 2770 case '1': case '2': case '3': case '4': 2771 case '5': case '6': case '7': case '8': case '9': 2772 c = bc; 2773 while (isdigit(c)) { 2774 *cp++ = c; 2775 c = nextc(); 2776 } 2777 *cp = '\0'; 2778 pushback(); 2779 c = tNTH_REF; 2780 break; 2781 case '~': case '*': case '$': case '?': case '!': case '@': 2782 case '/': case '\\': case ';': case ',': case '.': case '=': 2783 case ':': case '<': case '>': case '\"': 2784 case '&': case '`': case '\'': case '+': 2785 case '0': 2786 c = GLOBAL; 2787 *cp++ = bc; 2788 *cp = '\0'; 2789 break; 2790 case '-': 2791 c = nextc(); 2792 *cp++ = bc; 2793 bc = GLOBAL; 2794 goto talpha; 2795 default: 2796 c = bc; 2797 bc = GLOBAL; 2798 goto talpha; 2799 } 2800 lex_state = EXPR_END; 2801 tokp.end_col = parser->column; 2802 push_pos(parser, tokp); 2803 push_stack(parser, lexbuf); 2804 return c; 2805 case '~': 2806 if (IS_AFTER_OPERATOR()) { 2807 bc = nextc(); 2808 if (bc != '@') 2809 pushback(); 2810 lex_state = EXPR_ARG; 2811 } else 2812 lex_state = EXPR_BEG; 2813 return c; 2814 default: 2815 cp = lexbuf; 2816 goto talpha; 2817 } 2818 pushback(); 2819 return c; 2820 2821 talpha: 2822 { 2823 int step = 0; 2824 int ax = 0; 2825 2826 /* It's time to parse the word */ 2827 while (not_sep(parser->lex_prev)) { 2828 step = utf8_charsize(parser->lex_prev); 2829 ax += step - 1; 2830 while (step-- > 0) { 2831 *cp++ = c; 2832 c = nextc(); 2833 } 2834 if (c < 0) { 2835 parser->eof_reached = 1; 2836 break; 2837 } 2838 } 2839 *cp = '\0'; 2840 parser->column -= ax; 2841 tokp.end_line = tokp.start_line; 2842 tokp.end_col = parser->lex_prevc - ax; 2843 pushback(); 2844 2845 /* IVAR, CVAR, GLOBAL */ 2846 if (bc > 0) { 2847 push_pos(parser, tokp); 2848 push_stack(parser, lexbuf); 2849 lex_state = EXPR_END; 2850 return bc; 2851 } 2852 2853 /* Check for '!', '?' and '=' at the end of the word */ 2854 if (c == '!' || c == '?') { 2855 *cp++ = c; 2856 *cp = '\0'; 2857 tokp.end_col++; 2858 nextc(); 2859 c = BASE; 2860 } else { 2861 c = 0; 2862 if (IS_lex_state(EXPR_FNAME)) { 2863 bc = nextc(); 2864 if (bc == '=') { 2865 bc = nextc(); 2866 if (bc != '>') { 2867 *cp++ = '='; 2868 *cp = '\0'; 2869 tokp.end_col++; 2870 c = BASE; 2871 } else 2872 pushback(); 2873 } 2874 pushback(); 2875 } 2876 c = (!c && isupper(lexbuf[0])) ? CONST : BASE; 2877 } 2878 2879 /* Check if this is just a hash key. */ 2880 if (IS_LABEL_POSSIBLE() && IS_LABEL_SUFFIX()) { 2881 lex_state = EXPR_BEG; 2882 nextc(); 2883 push_stack(parser, lexbuf); 2884 push_pos(parser, tokp); 2885 return tKEY; 2886 } 2887 2888 /* Check if this is a keyword */ 2889 const struct kwtable *kw = NULL; 2890 if (!IS_lex_state(EXPR_DOT)) { 2891 kw = rb_reserved_word(lexbuf, cp - lexbuf); 2892 if (kw) { 2893 enum lex_state_e state = lex_state; 2894 lex_state = kw->state; 2895 if (state == EXPR_FNAME) 2896 return kw->id[0]; 2897 if (lex_state == EXPR_BEG) 2898 command_start = 1; 2899 switch (kw->id[0]) { 2900 case tCLASS: case tMODULE: case tDEF: 2901 push_last_comment(parser); 2902 break; 2903 case tDO: 2904 if (parser->lpar_beg && parser->lpar_beg == parser->paren_nest) { 2905 parser->lpar_beg = 0; 2906 parser->paren_nest--; 2907 return tDO_LAMBDA; 2908 } 2909 if (COND_P()) 2910 return tDO_COND; 2911 push_pos(parser, tokp); 2912 if (CMDARG_P() && state != EXPR_CMDARG) 2913 return tDO_BLOCK; 2914 return tDO; 2915 case tEND: 2916 push_pos(parser, tokp); 2917 break; 2918 } 2919 if (IS_lex_state_for(state, EXPR_BEG | EXPR_VALUE)) 2920 return kw->id[0]; 2921 else { 2922 if (kw->id[0] != kw->id[1]) 2923 lex_state = EXPR_BEG; 2924 return kw->id[1]; 2925 } 2926 } 2927 } 2928 2929 /* Maybe this is just some special method */ 2930 if (is_special_method(lexbuf)) { 2931 if (!strcmp(lexbuf, "__END__")) { 2932 parser->eof_reached = 1; 2933 return tpEND; 2934 } 2935 } 2936 2937 /* If this is not a keyword, push its position and the name */ 2938 if (!kw) { 2939 push_stack(parser, lexbuf); 2940 push_pos(parser, tokp); 2941 } 2942 2943 /* Update the state of the lexer */ 2944 if (IS_lex_state(EXPR_BEG_ANY | EXPR_ARG_ANY | EXPR_DOT)) 2945 lex_state = (cmd_state) ? EXPR_CMDARG : EXPR_ARG; 2946 else if (lex_state == EXPR_FNAME) 2947 lex_state = EXPR_ENDFN; 2948 else 2949 lex_state = EXPR_END; 2950 return c; 2951 } 2952 2953 tnum: 2954 { 2955 char hex, bin, has_point, aux; 2956 hex = bin = has_point = aux = 0; 2957 2958 lex_state = EXPR_END; 2959 if (c == '0') { 2960 bc = nextc(); 2961 if (toupper(bc) == 'X') { 2962 hex = 1; 2963 c = nextc(); 2964 } else if (toupper(bc) == 'B') { 2965 bin = 1; 2966 c = nextc(); 2967 } 2968 pushback(); 2969 } 2970 while (c > 0 && ((isdigit(c) && !bin) || (!hex && !bin && !has_point && c == '.') 2971 || (hex && toupper(c) >= 'A' && toupper(c) < 'G') 2972 || (bin && (c == '1' || c == '0')) || c == '_')) { 2973 if (c == '.') { 2974 if (!isdigit(*parser->lex_p)) { 2975 tokp.end_line = parser->line; 2976 tokp.end_col = parser->column - 1; 2977 pushback(); 2978 return tINTEGER; 2979 } 2980 has_point = 1; 2981 } 2982 aux = 1; 2983 c = nextc(); 2984 } 2985 if ((bin || hex) && !aux) 2986 yyerror(parser, "numeric literal without digits"); 2987 2988 /* is it an exponential number ? */ 2989 if (!bin && !hex && toupper(c) == 'E') { 2990 c = nextc(); 2991 if (isdigit(c) || ((c == '+' || c == '-') && isdigit(*(parser->lex_p)))) 2992 c = nextc(); 2993 while (c != -1 && isdigit(c)) 2994 c = nextc(); 2995 } 2996 2997 if (c != -1) 2998 pushback(); 2999 tokp.end_line = parser->line + 1; 3000 tokp.end_col = parser->column + 1; 3001 if (c == 'r') { 3002 nextc(); 3003 return tRATIONAL; 3004 } else if (c == 'i') { 3005 nextc(); 3006 return tIMAGINARY; 3007 } 3008 tokp.end_line--; 3009 tokp.end_col--; 3010 return (has_point) ? tFLOAT : tINTEGER; 3011 } 3012 } 3013 3014 /* Standard yylex. */ 3015 #if YYPURE 3016 static int yylex(void *lval, void *p) 3017 #else 3018 static int yylex(void *p) 3019 #endif 3020 { 3021 struct parser_t *parser = (struct parser_t *) p; 3022 int t = token_invalid; 3023 _unused_(lval); 3024 3025 t = parser_yylex(parser); 3026 return t; 3027 } 3028 3029 /* 3030 * Error handling. Take the formmated string s and append the error 3031 * string to the list of errors p->errors. 3032 */ 3033 static void yyerror(struct parser_t *parser, const char *s) 3034 { 3035 struct error_t *e = (struct error_t *) malloc(sizeof(struct error_t)); 3036 3037 e->msg = strdup(s); 3038 e->line = parser->line; 3039 e->column = parser->column; 3040 e->warning = parser->warning; 3041 e->next = e; 3042 if (parser->errors) 3043 parser->last_error->next = e; 3044 else 3045 parser->errors = e; 3046 parser->last_error = e; 3047 parser->last_error->next = NULL; 3048 3049 parser->eof_reached = !e->warning; 3050 if (!parser->unrecoverable) 3051 parser->unrecoverable = !e->warning; 3052 } 3053 3054 struct ast_t * rb_compile_file(struct options_t *opts) 3055 { 3056 struct parser_t p; 3057 struct ast_t *result; 3058 3059 /* Initialize parser */ 3060 init_parser(&p); 3061 p.version = opts->version; 3062 if (!opts->contents) { 3063 if (!retrieve_source(&p, opts->path)) 3064 return NULL; 3065 } else { 3066 p.content_given = 1; 3067 p.length = strlen(opts->contents); 3068 p.blob = opts->contents; 3069 p.lex_p = opts->contents; 3070 } 3071 3072 /* Let's parse */ 3073 result = (struct ast_t *) malloc(sizeof(struct ast_t)); 3074 result->tree = NULL; 3075 result->unrecoverable = 0; 3076 for (;;) { 3077 yyparse(&p); 3078 if (p.ast != NULL) { 3079 if (result->tree == NULL) 3080 result->tree = p.ast; 3081 else 3082 update_list(result->tree, p.ast); 3083 } 3084 if (p.eof_reached) { 3085 result->errors = p.errors; 3086 result->unrecoverable = p.unrecoverable; 3087 break; 3088 } 3089 } 3090 free_parser(&p); 3091 3092 return result; 3093 } 3094 3095 #ifdef BUILD_TESTS 3096 /* 3097 * Compile a file like the rb_compile_file function but printing 3098 * things directly to the stdout. This function is used for the tests. 3099 */ 3100 KDEVRUBYPARSER_EXPORT int rb_debug_file(struct options_t *opts); 3101 3102 int rb_debug_file(struct options_t *opts) 3103 { 3104 struct parser_t p; 3105 int index; 3106 3107 /* Set up parser */ 3108 init_parser(&p); 3109 p.version = opts->version; 3110 if (!retrieve_source(&p, opts->path)) 3111 return 0; 3112 3113 printf("Resulting AST's:"); 3114 for (;;) { 3115 printf("\n"); 3116 yyparse(&p); 3117 print_node(p.ast); 3118 if (p.ast != NULL) { 3119 if (p.ast->cond != NULL) { 3120 printf("\nCondition: "); 3121 print_node(p.ast->cond); 3122 } 3123 if (p.ast->l != NULL && p.ast->l->ensure != NULL) { 3124 if (p.ast->l->cond != NULL) { 3125 printf("\nCondition: "); 3126 print_node(p.ast->l->cond); 3127 } 3128 printf("\nEnsure: "); 3129 print_node(p.ast->l->ensure); 3130 } 3131 free_ast(p.ast); 3132 p.ast = NULL; 3133 } 3134 if (p.eof_reached) { 3135 if (p.errors) 3136 print_errors(p.errors); 3137 break; 3138 } 3139 } 3140 3141 /* Check that all the stacks are empty */ 3142 for (index = 0; index < p.sp; index++) 3143 printf("\nS: %s", p.stack[index]); 3144 printf("\n"); 3145 3146 for (index = 0; index < p.pos_size; index++) 3147 printf("\nP: %i:%i", p.pos_stack[index].start_line, p.pos_stack[index].start_col); 3148 printf("\n"); 3149 free_parser(&p); 3150 return 1; 3151 } 3152 #endif