Warning, /kdevelop/kdev-ruby/parser/parser.y is written in an unsupported language. File is not indexed.
0001 /* This file is part of KDevelop
0002 *
0003 * This file is based on the file parse.y from the MRI, version 1.9.2-p136.
0004 * So, at this point I must recognize the amazing job ruby developers
0005 * are doing and specially Yukihiro Matsumoto, the Ruby original author
0006 * and the one who signed parse.y.
0007 *
0008 * Copyright (C) 1993-2007 Yukihiro Matsumoto
0009 * Copyright (C) 2010-2015 Miquel Sabaté Solà <mikisabate@gmail.com>
0010 *
0011 * This program is free software: you can redistribute it and/or modify
0012 * it under the terms of the GNU General Public License as published by
0013 * the Free Software Foundation, either version 3 of the License, or
0014 * (at your option) any later version.
0015 *
0016 * This program is distributed in the hope that it will be useful,
0017 * but WITHOUT ANY WARRANTY; without even the implied warranty of
0018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
0019 * GNU General Public License for more details.
0020 *
0021 * You should have received a copy of the GNU General Public License
0022 * along with this program. If not, see <http://www.gnu.org/licenses/>.
0023 */
0024
0025
0026 %{
0027 /* make clang happy */
0028 #ifndef _MSC_VER
0029 extern char *strdup(const char *s);
0030 #endif
0031
0032 /* for alloca */
0033 #ifdef _WIN32
0034 #include <malloc.h>
0035 #elif !defined(__FreeBSD__) /* alloca() on FreeBSD is in stdlib.h (included later) */
0036 #include <alloca.h>
0037 #endif
0038
0039 #include <stdio.h>
0040 #include <stdlib.h>
0041 #include <string.h>
0042
0043 #include "node.h"
0044
0045
0046 #define SSIZE 256
0047 #define LSIZE (SSIZE << 2)
0048
0049
0050 /* The state bits, as defined below, have been extracted from the MRI. */
0051 enum lex_state_bits {
0052 EXPR_BEG_bit, /* ignore newline, +/- is a sign. */
0053 EXPR_END_bit, /* newline significant, +/- is an operator. */
0054 EXPR_ENDARG_bit, /* ditto, and unbound braces. */
0055 EXPR_ENDFN_bit, /* ditto, and unbound braces. */
0056 EXPR_ARG_bit, /* newline significant, +/- is an operator. */
0057 EXPR_CMDARG_bit, /* newline significant, +/- is an operator. */
0058 EXPR_MID_bit, /* newline significant, +/- is an operator. */
0059 EXPR_FNAME_bit, /* ignore newline, no reserved words. */
0060 EXPR_DOT_bit, /* right after `.' or `::', no reserved words. */
0061 EXPR_CLASS_bit, /* immediate after `class', no here document. */
0062 EXPR_VALUE_bit, /* alike EXPR_BEG but label is disallowed. */
0063 };
0064
0065 /* This enum defines the states in which the lexer can be. */
0066 enum lex_state_e {
0067 #define DEF_EXPR(n) EXPR_##n = (1 << EXPR_##n##_bit)
0068 DEF_EXPR(BEG),
0069 DEF_EXPR(END),
0070 DEF_EXPR(ENDARG),
0071 DEF_EXPR(ENDFN),
0072 DEF_EXPR(ARG),
0073 DEF_EXPR(CMDARG),
0074 DEF_EXPR(MID),
0075 DEF_EXPR(FNAME),
0076 DEF_EXPR(DOT),
0077 DEF_EXPR(CLASS),
0078 DEF_EXPR(VALUE),
0079 EXPR_BEG_ANY = (EXPR_BEG | EXPR_VALUE | EXPR_MID | EXPR_CLASS),
0080 EXPR_ARG_ANY = (EXPR_ARG | EXPR_CMDARG),
0081 EXPR_END_ANY = (EXPR_END | EXPR_ENDARG | EXPR_ENDFN)
0082 };
0083
0084 /* Helper macros for handling the lexer states. */
0085 #define IS_lex_state_for(x, ls) ((x) & (ls))
0086 #define IS_lex_state(ls) IS_lex_state_for(lex_state, (ls))
0087
0088 /* And now some macros that will help us on some stacks of the parser. */
0089 #define BITSTACK_PUSH(stack, n) ((stack) = ((stack)<<1)|((n)&1))
0090 #define BITSTACK_POP(stack) ((stack) = (stack) >> 1)
0091 #define BITSTACK_LEXPOP(stack) ((stack) = ((stack) >> 1) | ((stack) & 1))
0092 #define BITSTACK_SET_P(stack) ((stack)&1)
0093
0094 #define COND_PUSH(n) BITSTACK_PUSH(parser->cond_stack, (n))
0095 #define COND_POP() BITSTACK_POP(parser->cond_stack)
0096 #define COND_LEXPOP() BITSTACK_LEXPOP(parser->cond_stack)
0097 #define COND_P() BITSTACK_SET_P(parser->cond_stack)
0098
0099 #define CMDARG_PUSH(n) BITSTACK_PUSH(parser->cmdarg_stack, (n))
0100 #define CMDARG_POP() BITSTACK_POP(parser->cmdarg_stack)
0101 #define CMDARG_LEXPOP() BITSTACK_LEXPOP(parser->cmdarg_stack)
0102 #define CMDARG_P() BITSTACK_SET_P(parser->cmdarg_stack)
0103
0104
0105 /*
0106 * This structure represents a string/heredoc/regexp/shortcut term.
0107 */
0108 struct term_t {
0109 int token;
0110 char *word;
0111 int nest;
0112 unsigned char term;
0113 unsigned char paren;
0114 unsigned char can_embed : 1;
0115 unsigned char nestable : 1;
0116 };
0117
0118 /*
0119 * This structure contains a comment on the code. It basically stores
0120 * the comment itself in a dynamically allocated char pointer, and the
0121 * line where it was found.
0122 */
0123 struct comment_t {
0124 char *comment;
0125 int line;
0126 };
0127
0128 /*
0129 * This structure defines all the information that the parser has.
0130 * It contains the AST, flags, stacks, etc.
0131 */
0132 struct parser_t {
0133 /* Abstract Syntax Tree */
0134 struct Node *ast;
0135
0136 /* Stack of positions */
0137 struct pos_t *pos_stack;
0138 int stack_scale;
0139 int pos_size;
0140
0141 /* Flags used by the parser */
0142 unsigned char eof_reached : 1;
0143 unsigned int cond_stack;
0144 unsigned int cmdarg_stack;
0145 int in_def;
0146 int paren_nest;
0147 int lpar_beg;
0148 int parser_command_start;
0149 enum ruby_version version;
0150
0151 /* Stuff from the lexer */
0152 enum lex_state_e lex_state;
0153 struct term_t *lex_strterm;
0154 char *lex_p;
0155 char *lex_prev;
0156 char *lex_pend;
0157 unsigned long lex_prevc;
0158
0159 /* Basically used to handle heredocs properly */
0160 unsigned long line_pend;
0161 unsigned long column_pend;
0162 unsigned char here_found : 1;
0163
0164 /* Errors on the file */
0165 struct error_t *errors;
0166 struct error_t *last_error;
0167 unsigned char warning : 1;
0168 unsigned char unrecoverable : 1;
0169
0170 /* Stack of names */
0171 char *stack[2];
0172 char *aux;
0173 int sp;
0174
0175 /* The last allocated comment + the comment stack */
0176 struct comment_t last_comment;
0177 char *comment_stack[SSIZE];
0178 int comment_index;
0179
0180 /* Info about the content to parse */
0181 unsigned long length;
0182 unsigned long line;
0183 unsigned long column;
0184 unsigned char content_given : 1;
0185 char *blob;
0186 };
0187
0188 #include "parser_gen.h"
0189 #define yyparse ruby_yyparse
0190 #define YYERROR_VERBOSE 1
0191
0192 /* Macros to access some attributes in a fancier way. */
0193 #define lex_strterm parser->lex_strterm
0194 #define lex_state parser->lex_state
0195 #define command_start parser->parser_command_start
0196
0197 /* yy's functions */
0198 #if YYPURE
0199 static int yylex(void *, void *);
0200 #else
0201 static int yylex(void *);
0202 #endif
0203 static void yyerror(struct parser_t *, const char *);
0204 #define yywarning(msg) { parser->warning = 1; yyerror(parser, (msg)); parser->warning = 0;}
0205
0206 /* The static functions below deal with stacks. */
0207 static void pop_stack(struct parser_t *parser, struct Node *n);
0208 static void push_last_comment(struct parser_t *parser);
0209 static void pop_comment(struct parser_t *parser, struct Node *n);
0210 static void pop_pos(struct parser_t *parser, struct Node *n);
0211 static void pop_start(struct parser_t *parser, struct Node *n);
0212 static void pop_end(struct parser_t *parser, struct Node *n);
0213
0214 /* Helper macros for nodes, positions and stacks */
0215 #define ALLOC_N(kind, l, r) alloc_node(kind, l, r); pop_pos(parser, yyval.n);
0216 #define DISPOSE2(node1, node2) { free_ast(node1); free_ast(node2); }
0217 #define DISPOSE3(node1, node2, node3) { DISPOSE2(node1, node2); free_ast(node3); }
0218 #define POP_STACK pop_stack(parser, yyval.n)
0219 #define discard_pos() pop_pos(parser, NULL)
0220 #define copy_op(op) { parser->aux = strdup(op); }
0221 %}
0222
0223 %pure-parser
0224 %lex-param {struct parser_t *parser }
0225 %parse-param { struct parser_t *parser }
0226 %union {
0227 struct Node *n;
0228 int num;
0229 struct term_t *term;
0230 }
0231
0232 /* Tokens */
0233 %token tCLASS tMODULE tDEF tUNDEF tBEGIN tRESCUE tENSURE tEND tIF tUNLESS
0234 %token tTHEN tELSIF tELSE tCASE tWHEN tWHILE tUNTIL tFOR tBREAK tNEXT tREDO
0235 %token tRETRY tIN tDO tDO_COND tDO_BLOCK tRETURN tYIELD tKWAND tKWOR tKWNOT
0236 %token tALIAS tDEFINED upBEGIN upEND tTRUE tFALSE tNIL tENCODING tDSTAR
0237 %token tFILE tLINE tSELF tSUPER GLOBAL BASE CONST tDO_LAMBDA tCHAR tIMAGINARY
0238 %token IVAR CVAR tINTEGER tFLOAT tNTH_REF tBACKTICK tpEND tSYMBEG tRATIONAL
0239 %token tAMPER tAREF tASET tASSOC tCOLON2 tCOLON3 tLAMBDA tLAMBEG tLBRACE
0240 %token tLBRACKET tLPAREN tLPAREN_ARG tSTAR tCOMMENT ARRAY tKEY SYMBOL tUMINUS_NUM
0241 %token tSTRING_BEG tSTRING_CONTENT tSTRING_DBEG tSTRING_DEND tSTRING_END tSTRING_DVAR
0242
0243 /* Types */
0244 %type <n> singleton strings string literal numeric cpath rescue_arg
0245 %type <n> top_compstmt top_stmt bodystmt compstmt stmts stmt expr arg primary
0246 %type <n> command command_call method_call if_tail opt_else case_body cases
0247 %type <n> opt_rescue exc_list exc_var opt_ensure args call_args opt_call_args
0248 %type <n> paren_args opt_paren_args super aref_args opt_block_arg block_arg
0249 %type <n> mrhs superclass block_call block_command f_block_optarg f_block_opt
0250 %type <n> const f_arglist f_args f_arg f_arg_item f_optarg f_marg f_marg_list
0251 %type <n> f_margs assoc_list assocs assoc undef_list backref for_var bvar base
0252 %type <n> block_param opt_block_param block_param_def f_opt bv_decls label none
0253 %type <n> lambda f_larglist lambda_body command_args opt_bv_decl lhs do_block
0254 %type <n> mlhs mlhs_head mlhs_basic mlhs_item mlhs_node mlhs_post mlhs_inner
0255 %type <n> fsym variable symbol operation operation2 operation3 other_vars
0256 %type <n> cname fname f_rest_arg f_block_arg opt_f_block_arg f_norm_arg
0257 %type <n> brace_block cmd_brace_block f_bad_arg sym opt_brace_block
0258 %type <n> opt_args_tail args_tail f_kwarg block_args_tail opt_block_args_tail
0259 %type <n> f_kw f_block_kw f_block_kwarg f_kwrest simple_numeric
0260 %type <n> string_contents string_content string_dvar
0261
0262 /* When an error has been found, free all the nodes from bison's stacks */
0263 %destructor { free_ast($$); } <n>
0264
0265 /* precedence table */
0266 %nonassoc tLOWEST
0267 %nonassoc tLBRACE_ARG
0268
0269 %nonassoc modifier_if modifier_unless modifier_while modifier_until
0270 %left tKWOR tKWAND
0271 %right tKWNOT
0272 %nonassoc tDEFINED
0273 %right '=' tOP_ASGN
0274 %left modifier_rescue
0275 %right '?' ':'
0276 %nonassoc tDOT2 tDOT3
0277 %left tOR
0278 %left tAND
0279 %nonassoc tCMP tEQ tEQQ tNEQ tMATCH tNMATCH
0280 %left '>' tGEQ '<' tLEQ
0281 %left '|' '^'
0282 %left '&'
0283 %left tLSHIFT tRSHIFT
0284 %left '+' '-'
0285 %left '*' '/' '%'
0286 %right tUMINUS_NUM tUMINUS
0287 %right tPOW
0288 %right '!' '~' tUPLUS
0289
0290 %%
0291
0292 top_compstmt: top_stmt { parser->ast = $1; $$ = 0; YYACCEPT; }
0293 | term { $$ = 0; YYACCEPT; }
0294 ;
0295
0296 top_stmt: none
0297 | stmt
0298 | error stmt { $$ = $2; }
0299 ;
0300
0301 bodystmt:
0302 {
0303 $<num>$ = parser->line;
0304 }
0305 {
0306 $<num>$ = parser->column;
0307 }
0308 compstmt opt_rescue opt_else opt_ensure
0309 {
0310 $$ = alloc_ensure(token_body, $3, $4, $5, $6);
0311 pop_end(parser, $$); /* Every bodystmt ends with tEND */
0312 $$->pos.start_line = $<num>1;
0313 $$->pos.start_col = $<num>2;
0314 }
0315 ;
0316
0317 compstmt: stmts opt_terms { $$ = $1; }
0318 ;
0319
0320 stmts: none
0321 | stmt
0322 | stmts terms stmt { $$ = ($1 == NULL) ? $3 : update_list($1, $3); }
0323 | error stmt { $$ = $2; }
0324 ;
0325
0326 stmt: tALIAS fsym { lex_state = EXPR_FNAME; } fsym
0327 {
0328 $$ = alloc_node(token_alias, $2, $4);
0329 }
0330 | tALIAS GLOBAL GLOBAL
0331 {
0332 /* Ugly as hell, but it works */
0333 struct Node *l = alloc_node(token_object, NULL, NULL);
0334 l->flags = global;
0335 struct Node *r = alloc_node(token_object, NULL, NULL);
0336 r->flags = global;
0337 pop_pos(parser, r);
0338 pop_pos(parser, l);
0339 pop_stack(parser, l);
0340 pop_stack(parser, r);
0341 $$ = alloc_node(token_alias, l, r);
0342 }
0343 | tALIAS GLOBAL tNTH_REF
0344 {
0345 yyerror(parser, "can't make alias for the number variables");
0346 $$ = 0;
0347 }
0348 | tUNDEF undef_list
0349 {
0350 $$ = alloc_node(token_undef, NULL, $2);;
0351 }
0352 | stmt modifier_if expr
0353 {
0354 $$ = alloc_cond(token_if, $3, $1, NULL);
0355 }
0356 | stmt modifier_unless expr
0357 {
0358 $$ = alloc_cond(token_unless, $3, $1, NULL);
0359 }
0360 | stmt modifier_while expr
0361 {
0362 $$ = alloc_cond(token_while, $3, $1, NULL);
0363 }
0364 | stmt modifier_until expr
0365 {
0366 $$ = alloc_cond(token_until, $3, $1, NULL);
0367 }
0368 | stmt modifier_rescue stmt
0369 {
0370 $$ = alloc_cond(token_rescue, $3, $1, NULL);
0371 }
0372 | upBEGIN
0373 {
0374 if (parser->in_def)
0375 yyerror(parser, "BEGIN in method");
0376 }
0377 '{' compstmt '}'
0378 {
0379 $$ = alloc_node(token_up_begin, $4, NULL);
0380 discard_pos(); /* } */
0381 discard_pos(); /* { */
0382 }
0383 | upEND '{' compstmt '}'
0384 {
0385 $$ = alloc_node(token_up_end, $3, NULL);
0386 discard_pos(); /* } */
0387 discard_pos(); /* { */
0388 }
0389 | lhs '=' command_call { $$ = alloc_node(token_assign, $1, $3); }
0390 | mlhs '=' command_call { $$ = alloc_node(token_assign, $1, $3); }
0391 | variable tOP_ASGN command_call { $$ = alloc_node(token_op_assign, $1, $3); }
0392 | primary '[' opt_call_args rbracket tOP_ASGN command_call
0393 {
0394 struct Node *aux = alloc_node(token_array_value, $1, $3);
0395 $$ = alloc_node(token_op_assign, aux, $6);
0396 }
0397 | primary '.' base tOP_ASGN command_call
0398 {
0399 struct Node *aux = alloc_node(token_object, $1, $3);
0400 $$ = alloc_node(token_op_assign, aux, $5);
0401 }
0402 | primary '.' const tOP_ASGN command_call
0403 {
0404 struct Node *aux = alloc_node(token_object, $1, $3);
0405 $$ = alloc_node(token_op_assign, aux, $5);
0406 }
0407 | primary tCOLON2 const tOP_ASGN command_call
0408 {
0409 yyerror(parser, "constant re-assignment");
0410 $$ = NULL;
0411 DISPOSE3($1, $3, $5);
0412 }
0413 | primary tCOLON2 base tOP_ASGN command_call
0414 {
0415 struct Node *aux = alloc_node(token_object, $1, $3);
0416 $$ = alloc_node(token_op_assign, aux, $5);
0417 }
0418 | backref tOP_ASGN command_call { $$ = alloc_node(token_op_assign, $1, $3); }
0419 | lhs '=' mrhs { $$ = alloc_node(token_assign, $1, $3); }
0420 | mlhs '=' arg { $$ = alloc_node(token_assign, $1, $3); }
0421 | mlhs '=' mrhs { $$ = alloc_node(token_assign, $1, $3); }
0422 | expr
0423 | tpEND { $$ = alloc_node(token__end__, NULL, NULL); }
0424 ;
0425
0426 expr: command_call
0427 | expr tKWAND expr { $$ = alloc_node(token_kw_and, $1, $3); }
0428 | expr tKWOR expr { $$ = alloc_node(token_kw_or, $1, $3); }
0429 | tKWNOT opt_eol expr { $$ = alloc_node(token_kw_not, $3, NULL); }
0430 | '!' command_call { $$ = alloc_node(token_not, $2, NULL); }
0431 | arg
0432 ;
0433
0434 command_call: command | block_command
0435 ;
0436
0437 block_command: block_call
0438 | block_call '.' operation2 command_args
0439 {
0440 struct Node *aux = update_list($1, $3);
0441 $$ = alloc_node(token_method_call, aux, $4);
0442 }
0443 | block_call tCOLON2 operation2 command_args
0444 {
0445 struct Node *aux = update_list($1, $3);
0446 $$ = alloc_node(token_method_call, aux, $4);
0447 }
0448 ;
0449
0450 cmd_brace_block: tLBRACE_ARG opt_block_param compstmt '}'
0451 {
0452 $$ = ALLOC_N(token_block, $3, $2);
0453 pop_start(parser, $$);
0454 }
0455 ;
0456
0457 command: operation command_args %prec tLOWEST
0458 {
0459 $$ = alloc_node(token_method_call, $1, $2);
0460 }
0461 | operation command_args cmd_brace_block
0462 {
0463 $$ = alloc_cond(token_method_call, $3, $1, $2);
0464 }
0465 | primary '.' operation2 command_args %prec tLOWEST
0466 {
0467 struct Node *aux = update_list($1, $3);
0468 $$ = alloc_node(token_method_call, aux, $4);
0469 }
0470 | primary '.' operation2 command_args cmd_brace_block
0471 {
0472 struct Node *aux = update_list($1, $3);
0473 $$ = alloc_cond(token_method_call, $5, aux, $4);
0474 }
0475 | primary tCOLON2 operation2 command_args %prec tLOWEST
0476 {
0477 struct Node *aux = update_list($1, $3);
0478 $$ = alloc_node(token_method_call, aux, $4);
0479 }
0480 | primary tCOLON2 operation2 command_args cmd_brace_block
0481 {
0482 struct Node *aux = update_list($1, $3);
0483 $$ = alloc_cond(token_method_call, $5, aux, $4);
0484 }
0485 | tSUPER call_args { $$ = alloc_node(token_method_call, $2, NULL); }
0486 | tYIELD call_args { $$ = alloc_node(token_yield, $2, NULL); }
0487 | tRETURN call_args { $$ = alloc_node(token_return, $2, NULL); }
0488 | tBREAK call_args { $$ = alloc_node(token_break, $2, NULL); }
0489 | tNEXT call_args { $$ = alloc_node(token_next, $2, NULL); }
0490 ;
0491
0492 mlhs: mlhs_basic
0493 | tLPAREN mlhs_inner rparen { $$ = $2; }
0494 ;
0495
0496 mlhs_inner: mlhs_basic
0497 | tLPAREN mlhs_inner rparen { $$ = $2; }
0498 ;
0499
0500 mlhs_basic: mlhs_head
0501 | mlhs_head mlhs_item { $$ = update_list($1, $2); }
0502 | mlhs_head tSTAR mlhs_node
0503 {
0504 $3->flags = kwrest;
0505 $$ = update_list($1, $3);
0506 }
0507 | mlhs_head tSTAR mlhs_node ',' mlhs_post
0508 {
0509 $3->flags = kwrest;
0510 $$ = concat_list($1, update_list($3, $5));
0511 }
0512 | mlhs_head tSTAR
0513 {
0514 $$ = alloc_node(token_object, NULL, NULL);
0515 $$->flags = star;
0516 $$ = update_list($1, $$);
0517 }
0518 | mlhs_head tSTAR ',' mlhs_post
0519 {
0520 $$ = alloc_node(token_object, NULL, NULL);
0521 $$->flags = star;
0522 $$ = update_list($1, $$);
0523 $$ = concat_list($$, $4);
0524 }
0525 | tSTAR mlhs_node { $$ = $2; $$->flags = kwrest; }
0526 | tSTAR mlhs_node ',' mlhs_post { $$ = update_list($2, $4); $2->flags = kwrest; }
0527 | tSTAR
0528 {
0529 $$ = alloc_node(token_object, NULL, NULL);
0530 $$->flags = star;
0531 }
0532 | tSTAR ',' mlhs_post
0533 {
0534 $$ = alloc_node(token_object, NULL, NULL);
0535 $$->flags = star;
0536 $$ = update_list($$, $3);
0537 }
0538 ;
0539
0540 mlhs_item: mlhs_node
0541 | tLPAREN mlhs_inner rparen { $$ = alloc_node(token_object, $2, NULL); }
0542 ;
0543
0544 mlhs_head: mlhs_item ',' { $$ = $1; }
0545 | mlhs_head mlhs_item ',' { $$ = update_list($1, $2); }
0546 ;
0547
0548 mlhs_post: mlhs_item { $$ = $1; }
0549 | mlhs_post ',' mlhs_item { $$ = update_list($1, $3); }
0550 ;
0551
0552 mlhs_node: variable
0553 | primary '[' opt_call_args rbracket
0554 {
0555 $$ = alloc_node(token_array_value, $1, $3);
0556 }
0557 | primary '.' base { $$ = alloc_node(token_method_call, $1, $3); }
0558 | primary tCOLON2 base { $$ = alloc_node(token_method_call, $1, $3); }
0559 | primary '.' const { $$ = alloc_node(token_method_call, $1, $3); }
0560 | primary tCOLON2 const
0561 {
0562 if (parser->in_def)
0563 yyerror(parser, "dynamic constant assignment");
0564 $$ = alloc_node(token_method_call, $1, $3);
0565 }
0566 | tCOLON3 const
0567 {
0568 if (parser->in_def)
0569 yyerror(parser, "dynamic constant assignment");
0570 $$ = $2;
0571 }
0572 | backref
0573 ;
0574
0575 lhs: variable
0576 | primary '[' opt_call_args rbracket
0577 {
0578 $$ = alloc_node(token_array_value, $1, $3);
0579 }
0580 | primary '.' base { $$ = alloc_node(token_method_call, $1, $3); }
0581 | primary tCOLON2 base { $$ = alloc_node(token_method_call, $1, $3); }
0582 | primary '.' const { $$ = alloc_node(token_method_call, $1, $3); }
0583 | primary tCOLON2 const
0584 {
0585 if (parser->in_def)
0586 yyerror(parser, "dynamic constant assignment");
0587 $$ = alloc_node(token_method_call, $1, $3);
0588 }
0589 | tCOLON3 const
0590 {
0591 if (parser->in_def)
0592 yyerror(parser, "dynamic constant assignment");
0593 $$ = $2;
0594 }
0595 ;
0596
0597 cname: BASE
0598 {
0599 yyerror(parser, "class/module name must be CONSTANT");
0600 $$ = 0;
0601 }
0602 | const
0603 ;
0604
0605 cpath: tCOLON3 cname { $$ = $2; }
0606 | cname { $$ = $1; }
0607 | primary tCOLON2 cname { $$ = update_list($1, $3); }
0608 ;
0609
0610 fname: base
0611 | const
0612 | op
0613 {
0614 lex_state = EXPR_ENDFN;
0615 $$ = alloc_node(token_object, NULL, NULL);
0616 $$->name = parser->aux;
0617 $$->pos.start_line = $$->pos.end_line = parser->line;
0618 $$->pos.end_col = parser->column;
0619 $$->pos.start_col = $$->pos.end_col - strlen(parser->aux);
0620 }
0621 | reswords
0622 {
0623 lex_state = EXPR_ENDFN;
0624 $$ = alloc_node(token_object, NULL, NULL);
0625 }
0626 ;
0627
0628 fsym: fname | symbol
0629 ;
0630
0631 undef_list: fsym
0632 | undef_list ',' { lex_state = EXPR_FNAME; } fsym { $$ = update_list($1, $4); }
0633 ;
0634
0635 op: '|' { copy_op("|"); } | '^' { copy_op("^"); } | '&' { copy_op("&"); }
0636 | tCMP { copy_op("<=>"); } | tEQ { copy_op("=="); } | tEQQ { copy_op("===");}
0637 | tMATCH { copy_op("=~"); } | tNMATCH {copy_op("!~");} | '>' { copy_op(">");}
0638 | tGEQ { copy_op(">="); } | '<' { copy_op("<"); } | tLEQ { copy_op("<="); }
0639 | tNEQ {copy_op("!=");} | tLSHIFT {copy_op("<<");} | tRSHIFT {copy_op(">>");}
0640 | '+' { copy_op("+"); } | '-' { copy_op("-"); } | '*' { copy_op("*"); }
0641 | tSTAR { copy_op("*"); } | '/' { copy_op("/"); } | '%' { copy_op("%"); }
0642 | tPOW { copy_op("**"); } | tAREF { copy_op("[]"); } | '`' { copy_op("`");}
0643 | tUPLUS { copy_op("+"); } | tASET { copy_op("[]="); }
0644 | tUMINUS { copy_op("-"); } | tDSTAR { copy_op("**"); }
0645 | '!' { copy_op("!"); } | '~' { copy_op("~"); }
0646 ;
0647
0648 reswords: tLINE | tFILE | tENCODING | upBEGIN | upEND | tALIAS | tKWAND
0649 | tBEGIN | tBREAK | tCASE | tCLASS | tDEF | tDEFINED | tDO | tELSE | tELSIF
0650 | tEND | tENSURE | tFALSE | tFOR | tIN | tMODULE | tNEXT | tNIL | tKWNOT
0651 | tKWOR | tREDO | tRESCUE | tRETRY | tRETURN | tSELF | tSUPER | tTHEN | tTRUE
0652 | tUNDEF | tWHEN | tYIELD | tIF | tUNLESS | tWHILE | tUNTIL
0653 ;
0654
0655 arg: lhs '=' arg { $$ = alloc_node(token_assign, $1, $3); }
0656 | lhs '=' arg modifier_rescue arg
0657 {
0658 struct Node *aux = alloc_cond(token_rescue, $5, $3, NULL);
0659 $$ = alloc_node(token_assign, $1, aux);
0660 }
0661 | variable tOP_ASGN arg { $$ = alloc_node(token_op_assign, $1, $3); }
0662 | variable tOP_ASGN arg modifier_rescue arg
0663 {
0664 struct Node *aux = alloc_cond(token_rescue, $5, $3, NULL);
0665 $$ = alloc_node(token_op_assign, $1, aux);
0666 }
0667 | primary '[' opt_call_args rbracket tOP_ASGN arg
0668 {
0669 struct Node *aux = alloc_node(token_array_value, $1, $3);
0670 $$ = alloc_node(token_op_assign, aux, $6);
0671 }
0672 | primary '.' base tOP_ASGN arg
0673 {
0674 struct Node *aux = alloc_node(token_object, $1, $3);
0675 $$ = alloc_node(token_op_assign, aux, $5);
0676 }
0677 | primary '.' const tOP_ASGN arg
0678 {
0679 struct Node *aux = alloc_node(token_object, $1, $3);
0680 $$ = alloc_node(token_op_assign, aux, $5);
0681 }
0682 | primary tCOLON2 base tOP_ASGN arg
0683 {
0684 struct Node *aux = alloc_node(token_object, $1, $3);
0685 $$ = alloc_node(token_op_assign, aux, $5);
0686 }
0687 | primary tCOLON2 const tOP_ASGN arg
0688 {
0689 yyerror(parser, "constant re-assignment");
0690 $$ = NULL;
0691 DISPOSE3($1, $3, $5);
0692 }
0693 | tCOLON3 const tOP_ASGN arg
0694 {
0695 yyerror(parser, "constant re-assignment");
0696 $$ = NULL;
0697 DISPOSE2($2, $4);
0698 }
0699 | backref tOP_ASGN arg { $$ = alloc_node(token_assign, $1, $3); }
0700 | arg tDOT2 arg { $$ = alloc_node(token_dot2, $1, $3); }
0701 | arg tDOT3 arg { $$ = alloc_node(token_dot3, $1, $3);}
0702 | arg '+' arg { $$ = alloc_node(token_plus, $1, $3); }
0703 | arg '-' arg { $$ = alloc_node(token_minus, $1, $3);}
0704 | arg '*' arg { $$ = alloc_node(token_mul, $1, $3);}
0705 | arg '/' arg { $$ = alloc_node(token_div, $1, $3);}
0706 | arg '%' arg { $$ = alloc_node(token_mod, $1, $3);}
0707 | arg tPOW arg { $$ = alloc_node(token_pow, $1, $3);}
0708 | tUMINUS_NUM simple_numeric tPOW arg
0709 {
0710 struct Node *aux = alloc_node(token_pow, $2, $4);
0711 $$ = alloc_node(token_unary_minus, aux, NULL);
0712 }
0713 | tUPLUS arg { $$ = alloc_node(token_unary_plus, $2, NULL); }
0714 | tUMINUS arg { $$ = alloc_node(token_unary_minus, $2, NULL); }
0715 | arg '|' arg { $$ = alloc_node(token_bit_or, $1, $3); }
0716 | arg '^' arg { $$ = alloc_node(token_bit_xor, $1, $3); }
0717 | arg '&' arg { $$ = alloc_node(token_bit_and, $1, $3); }
0718 | arg tCMP arg { $$ = alloc_node(token_cmp, $1, $3); }
0719 | arg '>' arg { $$ = alloc_node(token_greater, $1, $3); }
0720 | arg tGEQ arg { $$ = alloc_node(token_geq, $1, $3); }
0721 | arg '<' arg { $$ = alloc_node(token_lesser, $1, $3); }
0722 | arg tLEQ arg { $$ = alloc_node(token_leq, $1, $3); }
0723 | arg tEQ arg { $$ = alloc_node(token_eq, $1, $3); }
0724 | arg tEQQ arg { $$ = alloc_node(token_eqq, $1, $3); }
0725 | arg tNEQ arg { $$ = alloc_node(token_neq, $1, $3); }
0726 | arg tMATCH arg { $$ = alloc_node(token_match, $1, $3); }
0727 | arg tNMATCH arg { $$ = alloc_node(token_nmatch, $1, $3); }
0728 | '!' arg { $$ = alloc_node(token_not, $2, NULL); }
0729 | '~' arg { $$ = alloc_node(token_neg, $2, NULL); }
0730 | arg tLSHIFT arg { $$ = alloc_node(token_lshift, $1, $3); }
0731 | arg tRSHIFT arg { $$ = alloc_node(token_rshift, $1, $3); }
0732 | arg tAND arg { $$ = alloc_node(token_and, $1, $3); }
0733 | arg tOR arg { $$ = alloc_node(token_or, $1, $3); }
0734 | tDEFINED opt_eol arg { $$ = alloc_node(token_defined, $3, NULL); }
0735 | arg '?' arg opt_eol ':' arg
0736 {
0737 $$ = alloc_cond(token_ternary, $1, $3, $6);
0738 }
0739 | primary
0740 ;
0741
0742 aref_args: none
0743 | args trailer { $$ = $1; }
0744 | args ',' assocs trailer { $$ = update_list($1, $3); }
0745 | assocs trailer { $$ = $1; }
0746 ;
0747
0748 paren_args: '(' opt_call_args rparen { $$ = $2; }
0749 ;
0750
0751 opt_paren_args : none | paren_args
0752 ;
0753
0754 opt_call_args: none | call_args
0755 ;
0756
0757 call_args: command
0758 | args opt_block_arg { $$ = update_list($1, $2); }
0759 | assocs opt_block_arg
0760 {
0761 struct Node *aux = alloc_node(token_hash, $1, NULL);
0762 $$ = update_list(aux, $2);
0763 }
0764 | args ',' assocs opt_block_arg
0765 {
0766 struct Node *aux = alloc_node(token_hash, $3, NULL);
0767 struct Node *n = update_list(aux, $4);
0768 $$ = concat_list($1, n);
0769 }
0770 | block_arg
0771 ;
0772
0773 command_args:
0774 {
0775 $<num>$ = parser->cmdarg_stack;
0776 CMDARG_PUSH(1);
0777 } call_args
0778 {
0779 parser->cmdarg_stack = $<num>$;
0780 $$ = $2;
0781 }
0782 ;
0783
0784 block_arg: tAMPER arg { $$ = $2; }
0785 ;
0786
0787 opt_block_arg: ',' block_arg { $$ = $2; }
0788 | ',' { $$ = NULL; }
0789 | none
0790 ;
0791
0792 args: arg
0793 | tSTAR arg { $$ = $2; }
0794 | args ',' arg { $$ = update_list($1, $3); }
0795 | args ',' tSTAR arg { $$ = update_list($1, $4); }
0796 ;
0797
0798 mrhs: args ',' arg { $$ = update_list($1, $3); }
0799 | args ',' tSTAR arg { $$ = update_list($1, $4); }
0800 | tSTAR arg { $$ = $2; }
0801 ;
0802
0803 primary: literal
0804 | strings
0805 | variable
0806 | backref
0807 | tBEGIN bodystmt tEND { $$ = alloc_node(token_begin, $2, NULL); }
0808 | tLPAREN_ARG expr { lex_state = EXPR_ENDARG; } rparen { $$ = $2; }
0809 | tLPAREN compstmt ')' { $$ = $2; }
0810 | primary tCOLON2 const
0811 {
0812 struct Node *aux = update_list($1, $3);
0813 $$ = alloc_node(token_method_call, aux, NULL);
0814 }
0815 | tCOLON3 const { $$ = $2; }
0816 | ARRAY { $$ = alloc_node(token_array, NULL, NULL); }
0817 | tLBRACKET aref_args ']' { $$ = alloc_node(token_array, $2, NULL); }
0818 | tLBRACE assoc_list '}'
0819 {
0820 $$ = alloc_node(token_hash, $2, NULL);
0821 discard_pos();
0822 }
0823 | tRETURN { $$ = alloc_node(token_return, NULL, NULL); }
0824 | tYIELD '(' call_args rparen { $$ = alloc_node(token_yield, $3, NULL); }
0825 | tYIELD '(' rparen { $$ = alloc_node(token_yield, NULL, NULL); }
0826 | tYIELD { $$ = alloc_node(token_yield, NULL, NULL); }
0827 | tDEFINED opt_eol '(' expr rparen
0828 {
0829 $$ = alloc_node(token_defined, $4, NULL);
0830 }
0831 | tKWNOT '(' expr rparen { $$ = alloc_node(token_kw_not, $3, NULL); }
0832 | tKWNOT '(' rparen { $$ = alloc_node(token_kw_not, NULL, NULL); }
0833 | operation brace_block { $$ = alloc_cond(token_method_call, $2, $1, NULL); }
0834 | method_call opt_brace_block
0835 {
0836 $$ = $1;
0837 $$->cond = $2;
0838 }
0839 | tLAMBDA lambda { $$ = alloc_cond(token_method_call, $2, NULL, NULL); }
0840 | tIF expr then compstmt if_tail tEND
0841 {
0842 $$ = alloc_cond(token_if, $2, $4, $5);
0843 discard_pos(); /* tEND */
0844 }
0845 | tUNLESS expr then compstmt opt_else tEND
0846 {
0847 $$ = alloc_cond(token_unless, $2, $4, $5);
0848 discard_pos(); /* tEND */
0849 }
0850 | tWHILE { COND_PUSH(1); } expr do { COND_POP(); } compstmt tEND
0851 {
0852 $$ = alloc_cond(token_while, $3, $6, NULL);
0853 discard_pos(); /* tEND */
0854 }
0855 | tUNTIL { COND_PUSH(1); } expr do { COND_POP(); } compstmt tEND
0856 {
0857 $$ = alloc_cond(token_while, $3, $6, NULL);
0858 discard_pos(); /* tEND */
0859 }
0860 | tCASE expr opt_terms case_body tEND
0861 {
0862 $$ = alloc_cond(token_case, $2, $4, NULL);
0863 discard_pos(); /* tEND */
0864 }
0865 | tCASE opt_terms case_body tEND
0866 {
0867 $$ = alloc_node(token_case, $3, NULL);
0868 discard_pos(); /* tEND */
0869 }
0870 | tFOR for_var tIN { COND_PUSH(1); } expr do { COND_POP(); } compstmt tEND
0871 {
0872 $$ = alloc_cond(token_for, $5, $8, $2);
0873 discard_pos(); /* tEND */
0874 }
0875 | tCLASS cpath superclass
0876 {
0877 if (parser->in_def)
0878 yyerror(parser, "class definition in method body");
0879 }
0880 bodystmt tEND
0881 {
0882 $$ = alloc_cond(token_class, $3, $5, $2);
0883 pop_comment(parser, $$);
0884 }
0885 | tCLASS opt_terms tLSHIFT expr term bodystmt tEND
0886 {
0887 $$ = alloc_node(token_singleton_class, $6, $4);
0888 pop_comment(parser, $$);
0889 }
0890 | tMODULE cpath
0891 {
0892 if (parser->in_def)
0893 yyerror(parser, "module definition in method body");
0894 }
0895 bodystmt tEND
0896 {
0897 $$ = alloc_node(token_module, $4, $2);
0898 pop_comment(parser, $$);
0899 }
0900 | tDEF fname
0901 {
0902 parser->in_def++;
0903 }
0904 f_arglist bodystmt tEND
0905 {
0906 parser->in_def--;
0907 $$ = alloc_cond(token_function, $2, $5, $4);
0908 pop_comment(parser, $$);
0909 }
0910 | tDEF singleton dot_or_colon { lex_state = EXPR_FNAME; } fname
0911 {
0912 lex_state = EXPR_ENDFN;
0913 parser->in_def++;
0914 }
0915 f_arglist bodystmt tEND
0916 {
0917 $$ = alloc_node(token_object, $2, $5);
0918 $$ = alloc_cond(token_function, $$, $8, $7);
0919 $$->flags = 1; /* Class method */
0920 pop_comment(parser, $$);
0921 parser->in_def--;
0922 }
0923 | tBREAK { $$ = alloc_node(token_break, NULL, NULL); }
0924 | tNEXT { $$ = alloc_node(token_next, NULL, NULL); }
0925 | tREDO { $$ = alloc_node(token_redo, NULL, NULL); }
0926 | tRETRY { $$ = alloc_node(token_retry, NULL, NULL); }
0927 ;
0928
0929 then: term
0930 | tTHEN
0931 | term tTHEN
0932 ;
0933
0934 do: term | tDO_COND
0935 ;
0936
0937 if_tail: opt_else
0938 | tELSIF expr then compstmt if_tail
0939 {
0940 $$ = alloc_cond(token_if, $2, $4, $5);
0941 }
0942 ;
0943
0944 opt_else: none
0945 | tELSE compstmt { $$ = alloc_node(token_if, $2, NULL); }
0946 ;
0947
0948 for_var: lhs | mlhs
0949 ;
0950
0951 f_marg: f_norm_arg { $$ = $1; }
0952 | tLPAREN f_margs rparen { $$ = $2; }
0953 ;
0954
0955 f_marg_list: f_marg
0956 | f_marg_list ',' f_marg { $$ = update_list($1, $3); }
0957 ;
0958
0959 f_margs: f_marg_list { $$ = $1; }
0960 | f_marg_list ',' tSTAR f_norm_arg { $$ = update_list($1, $4); }
0961 | f_marg_list ',' tSTAR f_norm_arg ',' f_marg_list
0962 {
0963 $$ = concat_list($1, update_list($4, $6));
0964 }
0965 | f_marg_list ',' tSTAR
0966 {
0967 struct Node *n = alloc_node(token_object, NULL, NULL);
0968 $$ = update_list($1, n);
0969 }
0970 | f_marg_list ',' tSTAR ',' f_marg_list
0971 {
0972 struct Node *n = alloc_node(token_object, NULL, NULL);
0973 $$ = concat_list($1, update_list(n, $5));
0974 }
0975 | tSTAR f_norm_arg { $$ = $2; }
0976 | tSTAR f_norm_arg ',' f_marg_list { $$ = update_list($2, $4); }
0977 | tSTAR { $$ = alloc_node(token_object, NULL, NULL); }
0978 | tSTAR ',' f_marg_list
0979 {
0980 struct Node *n = alloc_node(token_object, NULL, NULL);
0981 $$ = update_list(n, $3);
0982 }
0983 ;
0984
0985 block_args_tail: f_block_kwarg ',' f_kwrest opt_f_block_arg
0986 {
0987 $$ = concat_list($1, update_list($3, $4));
0988 }
0989 | f_block_kwarg opt_f_block_arg
0990 {
0991 $$ = update_list($1, $2);
0992 }
0993 | f_kwrest opt_f_block_arg
0994 {
0995 $$ = update_list($1, $2);
0996 }
0997 | f_block_arg { $$ = $1; }
0998 ;
0999
1000 opt_block_args_tail: ',' block_args_tail { $$ = $2; }
1001 | /* none */ { $$ = 0; }
1002 ;
1003
1004 block_param: f_arg ',' f_block_optarg ',' f_rest_arg opt_block_args_tail
1005 {
1006 $$ = concat_list($1, concat_list($3, update_list($5, $6)));
1007 }
1008 | f_arg ',' f_block_optarg ',' f_rest_arg ',' f_arg opt_block_args_tail
1009 {
1010 $$ = concat_list($1, concat_list($3, create_list($5, update_list($7, $8))));
1011 }
1012 | f_arg ',' f_block_optarg opt_block_args_tail
1013 {
1014 $$ = concat_list($1, update_list($3, $4));
1015 }
1016 | f_arg ',' f_block_optarg ',' f_arg opt_block_args_tail
1017 {
1018 $$ = concat_list($1, concat_list($3, update_list($5, $6)));
1019 }
1020 | f_arg ',' f_rest_arg opt_block_args_tail
1021 {
1022 $$ = update_list($1, update_list($3, $4));
1023 }
1024 | f_arg ',' { $$ = $1; }
1025 | f_arg ',' f_rest_arg ',' f_arg opt_block_args_tail
1026 {
1027 $$ = concat_list($1, concat_list($3, update_list($5, $6)));
1028 }
1029 | f_arg opt_block_args_tail { $$ = update_list($1, $2); }
1030 | f_block_optarg ',' f_rest_arg opt_block_args_tail
1031 {
1032 $$ = concat_list($1, update_list($3, $4));
1033 }
1034 | f_block_optarg ',' f_rest_arg ',' f_arg opt_block_args_tail
1035 {
1036 $$ = concat_list($1, create_list($3, update_list($5, $6)));
1037 }
1038 | f_block_optarg opt_block_args_tail { $$ = update_list($1, $2); }
1039 | f_block_optarg ',' f_arg opt_block_args_tail
1040 {
1041 $$ = concat_list($1, update_list($3, $4));
1042 }
1043 | f_rest_arg opt_block_args_tail { $$ = update_list($1, $2); }
1044 | f_rest_arg ',' f_arg opt_block_args_tail
1045 {
1046 $$ = create_list($1, update_list($3, $4));
1047 }
1048 | block_args_tail
1049 ;
1050
1051 opt_block_param: none
1052 | block_param_def
1053 {
1054 command_start = 1;
1055 $$ = $1;
1056 }
1057 ;
1058
1059 block_param_def : '|' opt_bv_decl '|' { $$ = $2; }
1060 | tOR { $$ = NULL; }
1061 | '|' block_param opt_bv_decl '|' { $$ = update_list($2, $3); }
1062 ;
1063
1064 opt_bv_decl: none
1065 | ';' bv_decls
1066 {
1067 if (parser->version < ruby19) {
1068 yywarning("Block local variables are only available in Ruby 1.9.x or higher.");
1069 }
1070 $$ = $2;
1071 }
1072 ;
1073
1074 bv_decls: bvar
1075 | bv_decls ',' bvar { $$ = update_list($1, $3); }
1076 ;
1077
1078 bvar: base
1079 | f_bad_arg
1080 {
1081 $$ = NULL;
1082 free_ast($1);
1083 }
1084 ;
1085
1086 lambda:
1087 {
1088 $<num>$ = parser->lpar_beg;
1089 parser->lpar_beg = ++parser->paren_nest;
1090 }
1091 f_larglist lambda_body
1092 {
1093 parser->lpar_beg = $<num>1;
1094 $$ = alloc_node(token_block, $3, $2);
1095 }
1096 ;
1097
1098 f_larglist: '(' f_args opt_bv_decl rparen { $$ = update_list($2, $3); }
1099 | f_args
1100 ;
1101
1102 lambda_body: tLAMBEG compstmt '}'
1103 {
1104 $$ = $2;
1105 discard_pos(); /* } */
1106 discard_pos(); /* { */
1107 }
1108 | tDO_LAMBDA compstmt tEND
1109 {
1110 $$ = $2;
1111 discard_pos(); /* end */
1112 }
1113 ;
1114
1115 do_block: tDO_BLOCK opt_block_param compstmt tEND
1116 {
1117 $$ = ALLOC_N(token_block, $3, $2);
1118 pop_start(parser, $$);
1119 }
1120 ;
1121
1122 block_call: command do_block { $1->cond = $2; $$ = $1; }
1123 | block_call '.' operation2 opt_paren_args
1124 {
1125 struct Node *aux = update_list($1, $3);
1126 $$ = update_list(aux, $4);
1127 }
1128 | block_call tCOLON2 operation2 opt_paren_args
1129 {
1130 struct Node *aux = update_list($1, $3);
1131 $$ = update_list(aux, $4);
1132 }
1133 ;
1134
1135 method_call: operation paren_args
1136 {
1137 $$ = alloc_node(token_method_call, $1, $2);
1138 }
1139 | primary '.' operation2 opt_paren_args
1140 {
1141 struct Node *aux = update_list($1, $3);
1142 $$ = alloc_node(token_method_call, aux, $4);
1143 }
1144 | primary tCOLON2 operation2 paren_args
1145 {
1146 struct Node *aux = update_list($1, $3);
1147 $$ = alloc_node(token_method_call, aux, $4);
1148 }
1149 | primary tCOLON2 operation3
1150 {
1151 struct Node *aux = update_list($1, $3);
1152 $$ = alloc_node(token_method_call, aux, NULL);
1153 }
1154 | primary '.' paren_args
1155 {
1156 $$ = alloc_node(token_method_call, $1, $3);
1157 }
1158 | primary tCOLON2 paren_args
1159 {
1160 $$ = alloc_node(token_method_call, $1, $3);
1161 }
1162 | super paren_args { $$ = $1; $$->r = $2; }
1163 | super
1164 | primary '[' opt_call_args rbracket
1165 {
1166 $$ = alloc_node(token_array_value, $1, $3);
1167 }
1168 ;
1169
1170 opt_brace_block: none
1171 | brace_block
1172 ;
1173
1174 brace_block: '{' opt_block_param compstmt '}'
1175 {
1176 $$ = ALLOC_N(token_block, $3, $2);
1177 pop_start(parser, $$);
1178 }
1179 | tDO opt_block_param compstmt tEND
1180 {
1181 $$ = ALLOC_N(token_block, $3, $2);
1182 pop_start(parser, $$);
1183 }
1184 ;
1185
1186 case_body: tWHEN args then compstmt cases
1187 {
1188 $$ = alloc_cond(token_when, $2, $4, $5);
1189 }
1190 ;
1191
1192 cases: opt_else | case_body
1193 ;
1194
1195 opt_rescue: tRESCUE rescue_arg then compstmt opt_rescue
1196 {
1197 $$ = alloc_node(token_rescue, $2, $4);
1198 $$->ensure = $5;
1199 }
1200 | none
1201 ;
1202
1203 rescue_arg: exc_list exc_var
1204 {
1205 $$ = ($1 || $2) ? alloc_node(token_rescue_arg, $1, $2) : NULL;
1206 }
1207 ;
1208
1209 exc_list: arg | mrhs | none
1210 ;
1211
1212 exc_var: none | tASSOC lhs { $$ = $2; }
1213 ;
1214
1215 opt_ensure: none
1216 | tENSURE compstmt { $$ = alloc_node(token_ensure, $2, NULL); }
1217 ;
1218
1219 literal: numeric | symbol
1220 ;
1221
1222 strings: string { $$ = $1; }
1223 | strings string { $$ = update_list($1, $2); }
1224 ;
1225
1226 string: tCHAR
1227 {
1228 $$ = alloc_node(token_string, NULL, NULL);
1229 }
1230 | tSTRING_BEG string_contents tSTRING_END
1231 {
1232 $$ = alloc_node(lex_strterm->token, $2, NULL);
1233 if (lex_strterm->word) {
1234 free(lex_strterm->word);
1235 lex_strterm->word = NULL;
1236 }
1237 free(lex_strterm);
1238 lex_strterm = NULL;
1239 }
1240 ;
1241
1242 string_contents: /* none */ { $$ = 0; }
1243 | string_contents string_content
1244 {
1245 if ($1 != NULL)
1246 $$ = update_list($1, $2);
1247 else
1248 $$ = $2;
1249 }
1250 ;
1251
1252 string_content: tSTRING_CONTENT { $$ = 0; }
1253 | tSTRING_DBEG
1254 {
1255 lex_state = EXPR_BEG;
1256 $<num>$ = parser->cond_stack;
1257 }
1258 {
1259 $<term>$ = lex_strterm;
1260 lex_strterm = NULL;
1261 }
1262 compstmt '}'
1263 {
1264 parser->cond_stack = $<num>2;
1265 lex_strterm = $<term>3;
1266 $$ = $4;
1267 discard_pos(); /* } */
1268 }
1269 | tSTRING_DVAR
1270 {
1271 $<term>$ = lex_strterm;
1272 lex_strterm = NULL;
1273 lex_state = EXPR_BEG;
1274 }
1275 string_dvar
1276 {
1277 lex_strterm = $<term>2;
1278 $$ = $3;
1279 }
1280 ;
1281
1282 string_dvar: backref
1283 | GLOBAL { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = global; POP_STACK; }
1284 | IVAR { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = ivar; POP_STACK; }
1285 | CVAR { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = cvar; POP_STACK; }
1286 ;
1287
1288 symbol: tSYMBEG sym
1289 {
1290 $$ = $2;
1291 $$->kind = token_symbol;
1292 $$->pos.start_col--;
1293 }
1294 ;
1295
1296 sym: fname
1297 | strings
1298 | GLOBAL { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = global; POP_STACK; }
1299 | IVAR { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = ivar; POP_STACK; }
1300 | CVAR { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = cvar; POP_STACK; }
1301 ;
1302
1303 numeric: simple_numeric
1304 | tUMINUS_NUM simple_numeric %prec tLOWEST
1305 {
1306 $$ = alloc_node(token_unary_minus, $2, NULL);
1307 }
1308 ;
1309
1310 simple_numeric: tINTEGER { $$ = alloc_node(token_numeric, NULL, NULL); $$->flags = int_l; }
1311 | tFLOAT { $$ = alloc_node(token_numeric, NULL, NULL); $$->flags = float_l; }
1312 | tRATIONAL
1313 {
1314 if (parser->version < ruby21) {
1315 yywarning("Rational literals are only available in Ruby 2.1.x or higher.");
1316 }
1317 $$ = alloc_node(token_numeric, NULL, NULL); $$->flags = rational_l;
1318 }
1319 | tIMAGINARY
1320 {
1321 if (parser->version < ruby21) {
1322 yywarning("Imaginary literals are only available in Ruby 2.1.x or higher.");
1323 }
1324 $$ = alloc_node(token_numeric, NULL, NULL); $$->flags = imaginary_l;
1325 }
1326 ;
1327
1328 variable: base
1329 | GLOBAL { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = global; POP_STACK; }
1330 | IVAR { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = ivar; POP_STACK; }
1331 | CVAR { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = cvar; POP_STACK; }
1332 | const
1333 | other_vars
1334 ;
1335
1336 other_vars: tNIL { $$ = alloc_node(token_nil, NULL, NULL); }
1337 | tSELF { $$ = alloc_node(token_self, NULL, NULL); }
1338 | tTRUE { $$ = alloc_node(token_true, NULL, NULL); }
1339 | tFALSE { $$ = alloc_node(token_false, NULL, NULL); }
1340 | tFILE { $$ = alloc_node(token_file, NULL, NULL); }
1341 | tLINE { $$ = alloc_node(token_line, NULL, NULL); }
1342 | tENCODING { $$ = alloc_node(token_encoding, NULL, NULL); }
1343 ;
1344
1345 backref: tNTH_REF { $$ = ALLOC_N(token_object, NULL, NULL); POP_STACK; }
1346 ;
1347
1348 superclass: term { $$ = NULL; }
1349 | '<'
1350 {
1351 lex_state = EXPR_BEG;
1352 command_start = 1;
1353 }
1354 expr term
1355 {
1356 $$ = $3;
1357 }
1358 | error term { yyerrok; $$ = NULL; }
1359 ;
1360
1361 f_arglist: '(' f_args rparen
1362 {
1363 $$ = $2;
1364 lex_state = EXPR_BEG;
1365 command_start = 1;
1366 }
1367 | f_args term
1368 {
1369 $$ = $1;
1370 lex_state = EXPR_BEG;
1371 command_start = 1;
1372 }
1373 ;
1374
1375 args_tail: f_kwarg ',' f_kwrest opt_f_block_arg
1376 {
1377 if (parser->version < ruby20) {
1378 yywarning("Keyword arguments are only available in Ruby 2.0.x or higher.");
1379 }
1380 $$ = concat_list($1, update_list($3, $4));
1381 }
1382 | f_kwarg opt_f_block_arg
1383 {
1384 if (parser->version < ruby20) {
1385 yywarning("Keyword arguments are only available in Ruby 2.0.x or higher.");
1386 }
1387 $$ = update_list($1, $2);
1388 }
1389 | f_kwrest opt_f_block_arg
1390 {
1391 if (parser->version < ruby20) {
1392 yywarning("Keyword arguments are only available in Ruby 2.0.x or higher.");
1393 }
1394 $$ = update_list($1, $2);
1395 }
1396 | f_block_arg
1397 {
1398 $$ = $1;
1399 }
1400 ;
1401
1402 opt_args_tail: ',' args_tail { $$ = $2; }
1403 | /* none */ { $$ = 0; }
1404 ;
1405
1406 f_args: f_arg ',' f_optarg ',' f_rest_arg opt_args_tail
1407 {
1408 $$ = concat_list($1, concat_list($3, concat_list($5, $6)));
1409 }
1410 | f_arg ',' f_optarg ',' f_rest_arg ',' f_arg opt_args_tail
1411 {
1412 $$ = concat_list($1, concat_list($3, create_list($5, concat_list($7, $8))));
1413 }
1414 | f_arg ',' f_optarg opt_args_tail
1415 {
1416 $$ = concat_list($1, concat_list($3, $4));
1417 }
1418 | f_arg ',' f_optarg ',' f_arg opt_args_tail
1419 {
1420 $$ = concat_list($1, concat_list($3, concat_list($5, $6)));
1421 }
1422 | f_arg ',' f_rest_arg opt_args_tail
1423 {
1424 $$ = concat_list($1, concat_list($3, $4));
1425 }
1426 | f_arg ',' f_rest_arg ',' f_arg opt_args_tail
1427 {
1428 $$ = concat_list($1, concat_list($3, concat_list($5, $6)));
1429 }
1430 | f_arg opt_args_tail
1431 {
1432 $$ = concat_list($1, $2);
1433 }
1434 | f_optarg ',' f_rest_arg opt_args_tail
1435 {
1436 $$ = concat_list($1, concat_list($3, $4));
1437 }
1438 | f_optarg ',' f_rest_arg ',' f_arg opt_args_tail
1439 {
1440 $$ = concat_list($1, create_list($3, concat_list($5, $6)));
1441 }
1442 | f_optarg opt_args_tail
1443 {
1444 $$ = concat_list($1, $2);
1445 }
1446 | f_optarg ',' f_arg opt_args_tail
1447 {
1448 $$ = concat_list($1, concat_list($3, $4));
1449 }
1450 | f_rest_arg opt_args_tail
1451 {
1452 $$ = concat_list($1, $2);
1453 }
1454 | f_rest_arg ',' f_arg opt_args_tail
1455 {
1456 $$ = create_list($1, concat_list($3, $4));
1457 }
1458 | args_tail
1459 | none
1460 ;
1461
1462 f_bad_arg: CONST { yyerror(parser, "formal argument cannot be a constant"); $$ = 0; }
1463 | IVAR { yyerror(parser, "formal argument cannot be an instance variable"); $$ = 0; }
1464 | GLOBAL { yyerror(parser, "formal argument cannot be a global variable"); $$ = 0; }
1465 | CVAR { yyerror(parser, "formal argument cannot be a class variable"); $$ = 0; }
1466 ;
1467
1468 f_norm_arg: f_bad_arg | base
1469 ;
1470
1471 f_arg_item: f_norm_arg
1472 | tLPAREN f_margs rparen { $$ = $2; }
1473 ;
1474
1475 f_arg: f_arg_item
1476 | f_arg ',' f_arg_item { $$ = concat_list($1, $3); }
1477 ;
1478
1479 f_kw: label arg
1480 {
1481 $$ = alloc_node(token_object, $1, $2);
1482 $$->flags = label;
1483 }
1484 ;
1485
1486 f_block_kw: label primary
1487 {
1488 $$ = alloc_node(token_object, $1, $2);
1489 $$->flags = label;
1490 }
1491 ;
1492
1493 f_block_kwarg: f_block_kw { $$ = $1; }
1494 | f_block_kwarg ',' f_block_kw { $$ = update_list($1, $3); }
1495 ;
1496
1497 f_kwarg: f_kw { $$ = $1; }
1498 | f_kwarg ',' f_kw { $$ = update_list($1, $3); }
1499 ;
1500
1501 kwrest_mark: tPOW | tDSTAR
1502 ;
1503
1504 f_kwrest: kwrest_mark base
1505 {
1506 $$ = $2;
1507 $$->flags = kwrest;
1508 }
1509 | kwrest_mark
1510 {
1511 $$ = alloc_node(token_object, NULL, NULL);
1512 $$->flags = kwrest;
1513 }
1514 ;
1515
1516 f_opt: base '='
1517 {
1518 $<num>$ = parser->column;
1519 }
1520 arg
1521 {
1522 $$ = alloc_node(token_assign, $1, $4);
1523 $1->flags = opt; /* TODO: not sure about this */
1524 $4->pos.start_col = $<num>3;
1525 $4->pos.end_col = parser->column;
1526 $4->pos.offset = parser->lex_prev - parser->blob;
1527 }
1528 ;
1529
1530 f_block_opt: base '=' primary { $$ = alloc_node(token_assign, $1, $3); }
1531 ;
1532
1533 f_block_optarg: f_block_opt
1534 | f_block_optarg ',' f_block_opt { $$ = update_list($1, $3); }
1535 ;
1536
1537 f_optarg: f_opt
1538 | f_optarg ',' f_opt { $$ = update_list($1, $3); }
1539 ;
1540
1541 restarg_mark: '*' | tSTAR
1542 ;
1543
1544 f_rest_arg: restarg_mark base { $$ = $2; $$->flags = kwrest; }
1545 | restarg_mark { $$ = alloc_node(token_object, NULL, NULL); $$->flags = kwrest; }
1546 ;
1547
1548 blkarg_mark: '&' | tAMPER
1549 ;
1550
1551 f_block_arg: blkarg_mark base { $$ = $2; $$->flags = block; }
1552 ;
1553
1554 opt_f_block_arg : ',' f_block_arg { $$ = $2; }
1555 | none
1556 ;
1557
1558 singleton: variable { $$ = $1; }
1559 | '(' { lex_state = EXPR_BEG; } expr rparen
1560 {
1561 if ($3 == 0)
1562 yyerror(parser, "can't define singleton method for ().");
1563 else {
1564 switch ($3->kind) {
1565 case token_string:
1566 case token_regexp:
1567 case token_numeric:
1568 case token_symbol:
1569 case token_array:
1570 yyerror(parser, "can't define singleton method for literals");
1571 }
1572 }
1573 $$ = $3;
1574 }
1575 ;
1576
1577 const: CONST { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = constant; POP_STACK; }
1578 ;
1579
1580 base: BASE { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = var; POP_STACK; }
1581 ;
1582
1583 assoc_list: none
1584 | assocs trailer { $$ = $1; }
1585 ;
1586
1587 assocs: assoc
1588 | assocs ',' assoc { $$ = update_list($1, $3); }
1589 ;
1590
1591 assoc: arg tASSOC arg
1592 {
1593 $$ = alloc_node(token_object, $1, $3);
1594 }
1595 | label arg
1596 {
1597 if (parser->version < ruby19) {
1598 yywarning("This syntax is only available in Ruby 1.9.x or higher.");
1599 }
1600 $$ = alloc_node(token_object, $1, $2);
1601 }
1602 | tDSTAR arg
1603 {
1604 if (parser->version < ruby20) {
1605 yywarning("tDSTAR token is only available in Ruby 2.0.x or higher.");
1606 }
1607 $$ = $2;
1608 }
1609 ;
1610
1611 operation: base | const
1612 ;
1613
1614 operation2: base
1615 | const
1616 | op
1617 {
1618 $$ = alloc_node(token_object, NULL, NULL);
1619 $$->name = parser->aux;
1620 }
1621 ;
1622
1623 operation3: base
1624 | op
1625 {
1626 $$ = alloc_node(token_object, NULL, NULL);
1627 $$->name = parser->aux;
1628 }
1629 ;
1630
1631 label: tKEY { $$ = ALLOC_N(token_symbol, NULL, NULL); POP_STACK; }
1632 ;
1633
1634 super: tSUPER { $$ = alloc_node(token_super, NULL, NULL); }
1635 ;
1636
1637 dot_or_colon: '.' | tCOLON2
1638 ;
1639
1640 opt_terms: /* none */ | terms
1641 ;
1642
1643 opt_eol: /* none */ | '\n'
1644 ;
1645
1646 rparen: opt_eol ')'
1647 ;
1648
1649 rbracket: opt_eol ']'
1650 ;
1651
1652 trailer: opt_eol | ','
1653 ;
1654
1655 term: ';' {yyerrok;} | '\n'
1656 ;
1657
1658 terms: term | terms ';' {yyerrok;}
1659 ;
1660
1661 none: /* none */ { $$ = NULL; }
1662 ;
1663
1664 %%
1665 #undef parser
1666 #undef yylex
1667
1668 #include <ctype.h>
1669 #include "hash.c"
1670
1671
1672 /* Let's define some useful macros :D */
1673
1674 #define _unused_(c) (void) c;
1675 #define multiline_comment(c) (*(c+1) == 'b' && *(c+2) == 'e' && *(c+3) == 'g' && *(c+4) == 'i' && *(c+5) == 'n')
1676 #define multiline_end(c) (*c == '=' && *(c+1) == 'e' && *(c+2) == 'n' && *(c+3) == 'd')
1677 #define not_sep(c) (is_valid_identifier(c) || is_utf8_digit(c) || *c == '_')
1678 #define is_blank(c) (c == ' ' || c == '\t')
1679 #define SWAP(a, b, aux) { aux = a; a = b; b = aux; }
1680 #define is_special_method(buffer) ((strlen(buffer) > 4) && buffer[0] == '_' && \
1681 buffer[1] == '_' && buffer[strlen(buffer) - 2] == '_' && \
1682 buffer[strlen(buffer) - 1] == '_')
1683 #define IS_EOF() ((unsigned int) (parser->lex_p - parser->blob) >= parser->length)
1684 #define IS_ARG() IS_lex_state(EXPR_ARG_ANY)
1685 #define IS_END() IS_lex_state(EXPR_END_ANY)
1686 #define IS_BEG() IS_lex_state(EXPR_BEG_ANY)
1687 #define IS_SPCARG(c) (IS_ARG() && space_seen && !isspace(c))
1688 #define IS_LABEL_POSSIBLE() ((IS_lex_state(EXPR_BEG | EXPR_ENDFN) && !cmd_state) || IS_ARG())
1689 #define IS_LABEL_SUFFIX() (*parser->lex_p == ':' && *(parser->lex_p + 1) != ':')
1690 #define IS_AFTER_OPERATOR() IS_lex_state(EXPR_FNAME | EXPR_DOT)
1691
1692
1693 /* Initialize the parser */
1694 static void init_parser(struct parser_t *parser)
1695 {
1696 parser->content_given = 0;
1697 parser->ast = NULL;
1698 parser->blob = NULL;
1699 parser->lex_p = NULL;
1700 parser->lex_prev = NULL;
1701 parser->lex_prevc = 0;
1702 parser->lex_pend = NULL;
1703 parser->line_pend = 0;
1704 parser->column_pend = 0;
1705 parser->here_found = 0;
1706 parser->eof_reached = 0;
1707 parser->cond_stack = 0;
1708 parser->cmdarg_stack = 0;
1709 parser->in_def = 0;
1710 parser->lpar_beg = 0;
1711 parser->paren_nest = 0;
1712 parser->sp = 0;
1713 parser->line = 1;
1714 parser->column = 0;
1715 parser->pos_stack = (struct pos_t *) malloc(SSIZE * sizeof(struct pos_t));
1716 parser->stack_scale = 0;
1717 parser->pos_size = 0;
1718 parser->errors = NULL;
1719 parser->last_error = NULL;
1720 parser->warning = 0;
1721 parser->unrecoverable = 0;
1722 parser->last_comment.comment = NULL;
1723 parser->last_comment.line = 0;
1724 parser->comment_index = 0;
1725 command_start = 1;
1726 lex_strterm = NULL;
1727 lex_state = EXPR_BEG;
1728 }
1729
1730 /* Free the parser */
1731 static void free_parser(struct parser_t *parser)
1732 {
1733 int index;
1734
1735 for (index = 0; index < parser->sp; index++)
1736 free(parser->stack[index]);
1737 if (parser->pos_stack != NULL)
1738 free(parser->pos_stack);
1739 if (lex_strterm && lex_strterm->word)
1740 free(lex_strterm->word);
1741 if (parser->last_comment.comment)
1742 free(parser->last_comment.comment);
1743 if (!parser->content_given)
1744 free(parser->blob);
1745 }
1746
1747 /* Read the file's source code and allocate it for further inspection. */
1748 static int retrieve_source(struct parser_t *p, const char *path)
1749 {
1750 int length;
1751
1752 /* Open specified file */
1753 FILE *fd = fopen(path, "r");
1754 if (!fd) {
1755 fprintf(stderr, "Cannot open file: %s\n", path);
1756 return 0;
1757 }
1758
1759 fseek(fd, 0, SEEK_END);
1760 length = ftell(fd);
1761 fseek(fd, 0, SEEK_SET);
1762
1763 if (!length)
1764 return 0;
1765 p->blob = (char *) malloc(sizeof(char) * length);
1766
1767 if (!p->blob) {
1768 fprintf(stderr, "Cannot store contents\n");
1769 return 0;
1770 }
1771 fread(p->blob, length, 1, fd);
1772 if (ferror(fd)) {
1773 fprintf(stderr, "Reading error\n");
1774 return 0;
1775 }
1776 p->length = length;
1777 p->lex_p = p->blob;
1778 fclose(fd);
1779 return 1;
1780 }
1781
1782 /*
1783 * Some macros to make easier the UTF-8 support
1784 */
1785 #define is_utf(c) ((c & 0xC0) != 0x80)
1786 #define is_special(c) (utf8_charsize(c) > 1)
1787 #define is_identchar(c) (is_utf8_alnum(c) || *c == '_')
1788
1789 /*
1790 * This function is really simple. It steps over a char of
1791 * the string s, that is encoded in UTF-8. The result varies on the
1792 * number of bytes that encodes a single character following the UTF-8
1793 * rules. Therefore, this function will return 1 if the character
1794 * is in plain-ASCII, and greater than 1 otherwise.
1795 */
1796 static int utf8_charsize(const char *s)
1797 {
1798 int size = 0;
1799 int i = 0;
1800
1801 do {
1802 i++;
1803 size++;
1804 } while (s[i] && !is_utf(s[i]));
1805 return size;
1806 }
1807
1808 static int is_utf8_alpha(const char *str)
1809 {
1810 return is_special(str) ? 1 : isalpha(*str);
1811 }
1812
1813 static int is_utf8_alnum(const char *str)
1814 {
1815 return is_special(str) ? 1 : isalnum(*str);
1816 }
1817
1818 static int is_utf8_graph(const char *str)
1819 {
1820 return is_special(str) ? 1 : isgraph(*str);
1821 }
1822
1823 static int is_utf8_digit(const char *str)
1824 {
1825 return is_special(str) ? 0 : isdigit(*str);
1826 }
1827
1828 /* Check that the given parameter points to a valid identifier */
1829 static int is_valid_identifier(const char *c)
1830 {
1831 if (is_utf8_alpha(c))
1832 return 1;
1833 else if (*c == '$' && is_utf8_graph(c + 1) && !is_utf8_digit(c + 1))
1834 return 1;
1835 else if ((*c == '_' || *c == '@') && is_utf8_alpha(c + 1))
1836 return 1;
1837 else if (*c == '@' && *(c + 1) == '@' && (is_utf8_alpha(c + 2) || *(c + 2) == '_'))
1838 return 1;
1839 return 0;
1840 }
1841
1842 /* Get the next character and move the lexer forward. */
1843 static int parser_nextc(struct parser_t *parser)
1844 {
1845 int c;
1846
1847 if (parser->eof_reached || IS_EOF())
1848 return -1;
1849
1850 parser->lex_prev = parser->lex_p;
1851 parser->lex_prevc = parser->column;
1852 c = (unsigned char) *parser->lex_p++;
1853 if (c == '\n') {
1854 if (parser->here_found) {
1855 parser->line = parser->line_pend;
1856 parser->column = parser->column_pend;
1857 parser->lex_p = parser->lex_pend + 1;
1858 parser->here_found = 0;
1859 }
1860 parser->line++;
1861 parser->column = -1;
1862 }
1863 parser->column++;
1864 return c;
1865 }
1866 #define nextc() parser_nextc(parser)
1867
1868 /* Move the lexer backwards. */
1869 static void parser_pushback(struct parser_t *parser)
1870 {
1871 parser->column--;
1872 parser->lex_p--;
1873 if (*parser->lex_p == '\n') {
1874 parser->line--;
1875 parser->column = parser->lex_prevc;
1876 }
1877 }
1878 #define pushback() parser_pushback(parser)
1879
1880 /* It parses a heredoc identifier and sets a new lex_strterm */
1881 static int parse_heredoc_identifier(struct parser_t *parser)
1882 {
1883 char *buffer, *ptr;
1884 int count = SSIZE, scale = 0;
1885 char c = nextc();
1886 unsigned char quote_seen = 0, term = ' ';
1887 unsigned char dash_seen = 0;
1888
1889 /* Check for <<- case */
1890 if (c == '-') {
1891 dash_seen = 1;
1892 c = nextc();
1893 }
1894 /* And now surrounding quotes */
1895 if (c == '\'' || c == '"' || c == '`') {
1896 term = c;
1897 c = nextc();
1898 quote_seen = 1;
1899 }
1900 if (!quote_seen && !is_identchar(parser->lex_prev)) {
1901 if (dash_seen)
1902 pushback();
1903 return 0;
1904 }
1905
1906 buffer = (char *) malloc(SSIZE * sizeof(char));
1907 ptr = buffer;
1908 for (;;) {
1909 /* If quote was seen, anything except the term is accepted */
1910 if (quote_seen) {
1911 if (c == term || !is_utf8_graph(parser->lex_prev))
1912 break;
1913 } else if (!is_identchar(parser->lex_prev))
1914 break;
1915 if (!count) {
1916 scale++;
1917 buffer = (char *) realloc(buffer, (SSIZE << scale) * sizeof(char));
1918 }
1919 *ptr++ = c;
1920 c = nextc();
1921 if (c < 0) {
1922 free(buffer);
1923 yyerror(parser, "unterminated here document identifier");
1924 return 0;
1925 }
1926 }
1927 *ptr = '\0';
1928 pushback();
1929
1930 lex_strterm = (struct term_t *) malloc(sizeof(struct term_t));
1931 lex_strterm->term = 1;
1932 lex_strterm->can_embed = dash_seen;
1933 lex_strterm->word = buffer;
1934 lex_strterm->token = token_heredoc;
1935 lex_strterm->nestable = 0;
1936 lex_strterm->paren = 0;
1937 parser->lex_pend = parser->lex_p + quote_seen;
1938 parser->line_pend = parser->line;
1939 parser->column_pend = parser->column;
1940 return 1;
1941 }
1942
1943 /* Let's parse a heredoc */
1944 static int parse_heredoc(struct parser_t *parser)
1945 {
1946 int length = strlen(lex_strterm->word);
1947 char *aux = (char*)alloca(length);
1948 char c = nextc();
1949 int i = 0;
1950 int ax = 0;
1951
1952 /* Skip until next line */
1953 while (c != -1 && c != '\n')
1954 c = nextc();
1955
1956 do {
1957 c = nextc();
1958
1959 /* Ignore initial spaces if dash seen */
1960 if (i == 0 && lex_strterm->can_embed)
1961 while (isspace(c) || c == '\n')
1962 c = nextc();
1963 if (c == '#' && *(parser->lex_prev - 1) != '\\') {
1964 c = nextc();
1965 switch (c) {
1966 case '$': case '@':
1967 parser->column -= ax;
1968 pushback();
1969 return tSTRING_DVAR;
1970 case '{':
1971 parser->column -= ax;
1972 command_start = 1;
1973 return tSTRING_DBEG;
1974 }
1975 }
1976 aux[i] = c;
1977 if (c == '\n') {
1978 if ((length == i) && !strncmp(lex_strterm->word, aux, i)) {
1979 pushback();
1980 return tSTRING_END;
1981 }
1982 i = -1;
1983 } else
1984 ax += utf8_charsize(parser->lex_prev) - 1;
1985 if (i >= length)
1986 i = -1;
1987 i++;
1988 } while (c != -1);
1989
1990 parser->eof_reached = 1;
1991 if (lex_strterm->word) {
1992 free(lex_strterm->word);
1993 lex_strterm->word = NULL;
1994 }
1995 free(lex_strterm);
1996 lex_strterm = NULL;
1997 return token_invalid;
1998 }
1999
2000 /* Return what's the char that closes c */
2001 static char closing_char(char c)
2002 {
2003 switch (c) {
2004 case '[': return ']';
2005 case '(': return ')';
2006 case '<': return '>';
2007 case '{': return '}';
2008 default: return c;
2009 }
2010 }
2011
2012 /* Guess the token kind of the shortcut based on the given character */
2013 static int guess_kind(struct parser_t *parser, char c)
2014 {
2015 if (!isalpha(c))
2016 return token_string;
2017
2018 switch (c) {
2019 case 'Q': case 'q': case 'x': return token_string;
2020 case 'I': case 'i':
2021 if (parser->version < ruby20) {
2022 yywarning("This shortcut is only available in Ruby 2.0.x or higher.");
2023 }
2024 case 'W': case 'w': return token_array;
2025 case 's': return token_symbol;
2026 case 'r': return token_regexp;
2027 default:
2028 yyerror(parser, "unknown type of %string");
2029 return 0;
2030 }
2031 }
2032
2033 /* Push name to the stack */
2034 static void push_stack(struct parser_t *parser, const char *buf)
2035 {
2036 parser->stack[parser->sp] = strdup(buf);
2037 parser->sp++;
2038 }
2039
2040 /* Pop name from the stack. */
2041 static void pop_stack(struct parser_t *parser, struct Node *n)
2042 {
2043 if (n != NULL)
2044 n->name = parser->stack[0];
2045 parser->stack[0] = parser->stack[1];
2046 parser->stack[1] = NULL;
2047 parser->sp--;
2048 }
2049
2050 /* Push a position into the stack of positions */
2051 static void push_pos(struct parser_t *parser, struct pos_t tokp)
2052 {
2053 int scale = SSIZE * parser->stack_scale;
2054
2055 parser->pos_size++;
2056 if (parser->pos_size > SSIZE) {
2057 parser->pos_size = 1;
2058 parser->stack_scale++;
2059 scale += SSIZE;
2060 parser->pos_stack = (struct pos_t *) realloc(parser->pos_stack, scale * sizeof(struct pos_t));
2061 }
2062 parser->pos_stack[parser->pos_size + scale - 1] = tokp;
2063 }
2064
2065 /* Pop a position from the stack of positions and assign to the given node */
2066 static void pop_pos(struct parser_t *parser, struct Node *n)
2067 {
2068 int scale = SSIZE * parser->stack_scale;
2069 int pos = parser->pos_size - 1 + scale;
2070 struct pos_t tokp = parser->pos_stack[pos];
2071
2072 if (n != NULL) {
2073 n->pos.start_line = tokp.start_line;
2074 n->pos.start_col = tokp.start_col;
2075 n->pos.end_line = tokp.end_line;
2076 n->pos.end_col = tokp.end_col;
2077 n->pos.offset = tokp.offset;
2078 }
2079 parser->pos_size--;
2080 if (parser->pos_size == 0 && parser->stack_scale > 0) {
2081 parser->stack_scale--;
2082 parser->pos_size = SSIZE;
2083 scale -= SSIZE;
2084 parser->pos_stack = (struct pos_t *) realloc(parser->pos_stack, scale * sizeof(struct pos_t));
2085 }
2086 }
2087
2088 /* Like pop_pos but it just copies the start position to the given node */
2089 static void pop_start(struct parser_t *parser, struct Node *n)
2090 {
2091 n->pos.start_line = parser->pos_stack[parser->pos_size - 1].start_line;
2092 n->pos.start_col = parser->pos_stack[parser->pos_size - 1].start_col;
2093 pop_pos(parser, NULL);
2094 }
2095
2096 /* Like pop_pos but it just copies the end position to the given node */
2097 static void pop_end(struct parser_t *parser, struct Node *n)
2098 {
2099 n->pos.end_line = parser->pos_stack[parser->pos_size - 1].start_line;
2100 n->pos.end_col = parser->pos_stack[parser->pos_size - 1].start_col;
2101 pop_pos(parser, NULL);
2102 }
2103
2104 /* Push the last comment that we've found to the stack of comments. */
2105 static void push_last_comment(struct parser_t *parser)
2106 {
2107 if ((parser->line - parser->last_comment.line) < 2)
2108 parser->comment_stack[parser->comment_index] = parser->last_comment.comment;
2109 else {
2110 parser->comment_stack[parser->comment_index] = NULL;
2111 if (parser->last_comment.comment)
2112 free(parser->last_comment.comment);
2113 }
2114 parser->comment_index++;
2115 parser->last_comment.comment = NULL;
2116 }
2117
2118 /* Pop a comment from the stack of comments and assign it to the given node */
2119 static void pop_comment(struct parser_t *parser, struct Node *n)
2120 {
2121 if (parser->comment_index > 0) {
2122 parser->comment_index--;
2123 n->comment = parser->comment_stack[parser->comment_index];
2124 }
2125 }
2126
2127 #define __check_buffer_size(N) { \
2128 if (count > N) { \
2129 count = 0; \
2130 scale++; \
2131 buffer = (char *) realloc(buffer, scale * 1024); \
2132 } \
2133 }
2134
2135 /* Store the given comment as the last comment seen */
2136 static void store_comment(struct parser_t *parser, char *comment)
2137 {
2138 if (parser->last_comment.comment != NULL)
2139 free(parser->last_comment.comment);
2140 parser->last_comment.comment = comment;
2141 parser->last_comment.line = parser->line;
2142 }
2143
2144 /* Check if the given parameter points to an indented comment */
2145 static int is_indented_comment(struct parser_t *parser)
2146 {
2147 char *c = parser->lex_prev;
2148 char *original = c;
2149
2150 for (; *c == ' ' || *c == '\t'; ++c);
2151 parser->lex_p += (c - original);
2152 parser->column += (c - original);
2153 return (*c == '#');
2154 }
2155
2156 /* Read a comment and store it if possible */
2157 static void set_comment(struct parser_t *parser)
2158 {
2159 int c = ' ', count = 0, scale = 0;
2160 char *buffer = (char *) malloc(LSIZE);
2161
2162 for (;; ++count) {
2163 if (c != '#' && !is_indented_comment(parser))
2164 break;
2165 c = *(parser->lex_p - 1);
2166 while (c == '#' && c != -1)
2167 c = nextc();
2168 if (c != '\n') {
2169 for (; c != -1; count++) {
2170 __check_buffer_size(1000);
2171 buffer[count] = c;
2172 c = nextc();
2173 if (c == '\n') {
2174 buffer[++count] = c;
2175 break;
2176 }
2177 }
2178 } else
2179 buffer[count] = c;
2180 c = nextc();
2181 }
2182
2183 if (c != -1)
2184 pushback();
2185 buffer[count] = '\0';
2186 store_comment(parser, buffer);
2187 }
2188
2189 /* Parse a string or a regexp */
2190 static int parse_string(struct parser_t *parser)
2191 {
2192 register int c = *parser->lex_p;
2193 int next = *(parser->lex_p + 1);
2194
2195 if (c == '\\' && (next == '\\' || next == lex_strterm->term || next == lex_strterm->paren)) {
2196 parser->lex_p += 2;
2197 parser->column += 2;
2198 return tSTRING_CONTENT;
2199 }
2200
2201 if (c == lex_strterm->term) {
2202 nextc();
2203 if (lex_strterm->nestable) {
2204 lex_strterm->nest--;
2205 if (lex_strterm->nest > 0)
2206 return tSTRING_CONTENT;
2207 }
2208 return tSTRING_END;
2209 } else if (lex_strterm->nestable && lex_strterm->paren == c) {
2210 lex_strterm->nest++;
2211 nextc();
2212 return tSTRING_CONTENT;
2213 }
2214
2215 if (IS_EOF()) {
2216 parser->eof_reached = 1;
2217 yyerror(parser, "unterminated string meets end of file");
2218 free(lex_strterm);
2219 lex_strterm = NULL;
2220 return token_invalid;
2221 }
2222
2223 if (lex_strterm->can_embed && c == '#' && *(parser->lex_prev) != '\\') {
2224 nextc();
2225 switch (*parser->lex_p) {
2226 case '$': case '@':
2227 return tSTRING_DVAR;
2228 case '{':
2229 c = nextc();
2230 command_start = 1;
2231 return tSTRING_DBEG;
2232 }
2233 pushback();
2234 }
2235
2236 /* Re-using the next and the c variables */
2237 next = utf8_charsize(parser->lex_p);
2238 c = next - 1;
2239 while (next-- > 0) {
2240 if (nextc() < 0) {
2241 parser->eof_reached = 1;
2242 free(lex_strterm);
2243 lex_strterm = NULL;
2244 return token_invalid;
2245 }
2246 }
2247 parser->column -= c;
2248 return tSTRING_CONTENT;
2249 }
2250
2251 /* Regular expressions can end with some options, read them */
2252 static void parse_re_options(struct parser_t *parser)
2253 {
2254 char aux[64];
2255 int c = *parser->lex_p;
2256
2257 while (isalpha(c)) {
2258 if (c != 'i' && c != 'm' && c != 'x' && c != 'o' &&
2259 c != 'u' && c != 'e' && c != 's' && c != 'n') {
2260 sprintf(aux, "unknown regexp option - %c", c);
2261 yyerror(parser, aux);
2262 return;
2263 }
2264 c = nextc();
2265 }
2266 pushback();
2267 }
2268
2269 /* Standard warning for ambiguous arguments */
2270 static void arg_ambiguous_gen(struct parser_t *parser)
2271 {
2272 yywarning("ambiguous first argument; put parentheses or even spaces");
2273 }
2274 #define arg_ambiguous() (arg_ambiguous_gen(parser), 1)
2275
2276 /*
2277 * This is the lexer. It reads the source code (blob) and provides tokens to
2278 * the parser. It also updates the necessary flags.
2279 */
2280 static int parser_yylex(struct parser_t *parser)
2281 {
2282 register int c;
2283 int bc = 0;
2284 char *cp;
2285 char lexbuf[SSIZE];
2286 unsigned char space_seen = 0;
2287 int cmd_state;
2288 struct pos_t tokp = {-1, -1, -1, -1, 0};
2289
2290 /* Check for string terminations: string, regexp, heredoc, shortcut */
2291 if (lex_strterm) {
2292 if (lex_strterm->token == token_heredoc) {
2293 c = parse_heredoc(parser);
2294 if (c == tSTRING_END) {
2295 tokp.end_line = parser->line;
2296 tokp.end_col = parser->column;
2297 SWAP(parser->line, parser->line_pend, bc);
2298 SWAP(parser->column, parser->column_pend, bc);
2299 SWAP(parser->lex_p, parser->lex_pend, cp);
2300 parser->here_found = 1;
2301 lex_state = EXPR_END;
2302
2303 }
2304 } else {
2305 c = parse_string(parser);
2306 if (c == tSTRING_END) {
2307 if (lex_strterm->token == token_regexp && isalpha(*parser->lex_p))
2308 parse_re_options(parser);
2309 lex_state = EXPR_END;
2310 }
2311 }
2312 return c;
2313 }
2314
2315 cmd_state = command_start;
2316 command_start = 0;
2317 retry:
2318 c = nextc();
2319
2320 tokp.start_line = parser->line;
2321 tokp.start_col = parser->column - 1;
2322
2323 /* Check numeric values here instead of entering the main switch */
2324 if (isdigit(c)) {
2325 cp = lexbuf;
2326 goto tnum;
2327 }
2328
2329 switch (c) {
2330 case '\0': /* NULL */
2331 case EOF: /* end of script */
2332 parser->eof_reached = 1;
2333 return token_invalid;
2334
2335 /* white spaces */
2336 case ' ': case '\t': case '\f': case '\r':
2337 case '\13': /* vertical tab */
2338 space_seen = 1;
2339 goto retry;
2340 case '#':
2341 set_comment(parser);
2342 case '\n':
2343 if (IS_lex_state(EXPR_BEG | EXPR_VALUE | EXPR_CLASS | EXPR_FNAME | EXPR_DOT))
2344 goto retry;
2345 CMDARG_PUSH(0);
2346 lex_state = EXPR_BEG;
2347 command_start = 1;
2348 return '\n';
2349 case '=':
2350 lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG;
2351 bc = nextc();
2352 if (bc == '=') {
2353 if (nextc() == '=')
2354 return tEQQ;
2355 pushback();
2356 return tEQ;
2357 }
2358 if (bc == '~')
2359 return tMATCH;
2360 if (bc == '>')
2361 return tASSOC;
2362 if (multiline_comment(parser->lex_prev - 1)) {
2363 parser->column += 4;
2364 parser->lex_p += 4;
2365 while (!multiline_end(parser->lex_prev))
2366 nextc();
2367 parser->column += 3;
2368 parser->lex_p += 3;
2369 bc = 0;
2370 goto retry;
2371 }
2372 break;
2373 case '[':
2374 parser->paren_nest++;
2375 if (IS_AFTER_OPERATOR()) {
2376 lex_state = EXPR_ARG;
2377 bc = nextc();
2378 if (bc == ']') {
2379 if (nextc() == '=')
2380 return tASET;
2381 c = tAREF;
2382 }
2383 break;
2384 } else if (IS_BEG())
2385 c = tLBRACKET;
2386 else if (IS_ARG() && space_seen)
2387 c = tLBRACKET;
2388 lex_state = EXPR_BEG;
2389 COND_PUSH(0);
2390 CMDARG_PUSH(0);
2391 return c;
2392 case ']':
2393 parser->paren_nest--;
2394 lex_state = EXPR_ENDARG;
2395 CMDARG_LEXPOP();
2396 COND_LEXPOP();
2397 return c;
2398 case '<':
2399 bc = nextc();
2400 if (bc == '<' && !IS_lex_state(EXPR_DOT | EXPR_CLASS) &&
2401 !IS_END() && (!IS_ARG() || space_seen)) {
2402 if (parse_heredoc_identifier(parser))
2403 return tSTRING_BEG;
2404 pushback();
2405 }
2406 if (IS_AFTER_OPERATOR())
2407 lex_state = EXPR_ARG;
2408 else {
2409 if (IS_lex_state(EXPR_CLASS))
2410 command_start = 1;
2411 lex_state = EXPR_BEG;
2412 }
2413 if (bc == '=') {
2414 if (nextc() == '>')
2415 return tCMP;
2416 pushback();
2417 return tLEQ;
2418 }
2419 if (bc == '<') {
2420 if (nextc() == '=') {
2421 lex_state = EXPR_BEG;
2422 return tOP_ASGN;
2423 }
2424 c = tLSHIFT;
2425 }
2426 break;
2427 case '>':
2428 lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG;
2429 bc = nextc();
2430 if (bc == '=')
2431 return tGEQ;
2432 if (bc == '>') {
2433 if (nextc() == '=') {
2434 lex_state = EXPR_BEG;
2435 return tOP_ASGN;
2436 }
2437 c = tRSHIFT;
2438 }
2439 break;
2440 case '!':
2441 bc = nextc();
2442 if (IS_AFTER_OPERATOR()) {
2443 lex_state = EXPR_ARG;
2444 if (bc == '@')
2445 return '!';
2446 } else
2447 lex_state = EXPR_BEG;
2448 if (bc == '=')
2449 return tNEQ;
2450 if (bc == '~')
2451 return tNMATCH;
2452 break;
2453 case '+':
2454 bc = nextc();
2455 if (IS_AFTER_OPERATOR()) {
2456 lex_state = EXPR_ARG;
2457 if (bc == '@')
2458 return tUPLUS;
2459 pushback();
2460 return '+';
2461 }
2462 if (bc == '=') {
2463 lex_state = EXPR_BEG;
2464 return tOP_ASGN;
2465 }
2466 if (IS_BEG() || (IS_SPCARG(bc) && arg_ambiguous())) {
2467 lex_state = EXPR_BEG;
2468 pushback();
2469 return tUPLUS;
2470 }
2471 lex_state = EXPR_BEG;
2472 break;
2473 case '-':
2474 bc = nextc();
2475 if (IS_AFTER_OPERATOR()) {
2476 lex_state = EXPR_ARG;
2477 if (bc == '@')
2478 return tUMINUS;
2479 pushback();
2480 return '-';
2481 }
2482 if (bc == '=') {
2483 lex_state = EXPR_BEG;
2484 return tOP_ASGN;
2485 }
2486 if (bc == '>') {
2487 lex_state = EXPR_ENDFN;
2488 return tLAMBDA;
2489 }
2490 if (IS_BEG() || (IS_SPCARG(bc) && arg_ambiguous())) {
2491 lex_state = EXPR_BEG;
2492 pushback();
2493 if (bc != -1 && isdigit(bc))
2494 return tUMINUS_NUM;
2495 return tUMINUS;
2496 }
2497 lex_state = EXPR_BEG;
2498 break;
2499 case '*':
2500 bc = nextc();
2501 if (bc == '=') {
2502 lex_state = EXPR_BEG;
2503 return tOP_ASGN;
2504 }
2505 if (bc == '*') {
2506 bc = nextc();
2507 if (bc == '=') {
2508 lex_state = EXPR_BEG;
2509 return tOP_ASGN;
2510 }
2511 pushback();
2512 if (IS_SPCARG(bc)) {
2513 yywarning("`**' interpreted as argument prefix");
2514 c = tDSTAR;
2515 } else if (IS_BEG())
2516 c = tDSTAR;
2517 else
2518 c = tPOW;
2519 lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG;
2520 return c;
2521 }
2522 if (IS_SPCARG(bc)) {
2523 yywarning("`*' interpreted as argument prefix");
2524 c = tSTAR;
2525 } else if (IS_BEG())
2526 c = tSTAR;
2527 lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG;
2528 break;
2529 case '/':
2530 if (IS_lex_state(EXPR_BEG_ANY)) {
2531 regexp:
2532 lex_strterm = (struct term_t *) malloc(sizeof(struct term_t));
2533 lex_strterm->term = c;
2534 lex_strterm->can_embed = 1;
2535 lex_strterm->token = token_regexp;
2536 lex_strterm->word = NULL;
2537 lex_strterm->nestable = 0;
2538 lex_strterm->paren = 0;
2539 return tSTRING_BEG;
2540 }
2541 bc = nextc();
2542 if (bc == '=') {
2543 lex_state = EXPR_BEG;
2544 return tOP_ASGN;
2545 }
2546 pushback();
2547 if (IS_SPCARG(bc)) {
2548 arg_ambiguous_gen(parser);
2549 goto regexp;
2550 }
2551 lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG;
2552 return c;
2553 case '%':
2554 bc = nextc();
2555 if (IS_lex_state(EXPR_BEG_ANY) || IS_SPCARG(bc)) {
2556 lex_strterm = (struct term_t *) malloc(sizeof(struct term_t));
2557 lex_strterm->token = guess_kind(parser, bc);
2558 if (isalpha(bc))
2559 bc = nextc();
2560 lex_strterm->term = closing_char(bc);
2561 lex_strterm->can_embed = 1;
2562 lex_strterm->word = NULL;
2563 lex_strterm->paren = bc;
2564 lex_strterm->nestable = (bc != lex_strterm->term);
2565 lex_strterm->nest = 1;
2566 return tSTRING_BEG;
2567 }
2568 if (bc == '=') {
2569 lex_state = EXPR_BEG;
2570 return tOP_ASGN;
2571 }
2572 lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG;
2573 break;
2574 case '&':
2575 bc = nextc();
2576 if (bc == '&') {
2577 lex_state = EXPR_BEG;
2578 if (nextc() == '=')
2579 return tOP_ASGN;
2580 pushback();
2581 return tAND;
2582 }
2583 if (bc == '=') {
2584 lex_state = EXPR_BEG;
2585 return tOP_ASGN;
2586 }
2587 if (IS_SPCARG(bc)) {
2588 yywarning("`&' interpreted as argument prefix");
2589 c = tAMPER;
2590 } else if (IS_BEG())
2591 c = tAMPER;
2592 lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG;
2593 break;
2594 case '|':
2595 bc = nextc();
2596 if (bc == '|') {
2597 lex_state = EXPR_BEG;
2598 if (nextc() == '=')
2599 return tOP_ASGN;
2600 pushback();
2601 return tOR;
2602 }
2603 if (bc == '=') {
2604 lex_state = EXPR_BEG;
2605 return tOP_ASGN;
2606 }
2607 lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG;
2608 break;
2609 case '.':
2610 lex_state = EXPR_BEG;
2611 bc = nextc();
2612 if (bc == '.') {
2613 if (nextc() == '.')
2614 return tDOT3;
2615 pushback();
2616 return tDOT2;
2617 }
2618 lex_state = EXPR_DOT;
2619 break;
2620 case ':':
2621 bc = nextc();
2622 if (bc == ':') {
2623 if (IS_BEG() || IS_lex_state(EXPR_CLASS) || IS_SPCARG(-1)) {
2624 lex_state = EXPR_BEG;
2625 return tCOLON3;
2626 }
2627 lex_state = EXPR_DOT;
2628 return tCOLON2;
2629 }
2630 if (IS_END() || isspace(bc)) {
2631 lex_state = EXPR_BEG;
2632 break;
2633 }
2634 lex_state = EXPR_FNAME;
2635 pushback();
2636 return tSYMBEG;
2637 case '^':
2638 if (nextc() == '=') {
2639 lex_state = EXPR_BEG;
2640 return tOP_ASGN;
2641 }
2642 lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG;
2643 break;
2644 case ';':
2645 command_start = 1;
2646 /* fallthrough */
2647 case ',':
2648 lex_state = EXPR_BEG;
2649 return c;
2650 case '?':
2651 if (IS_END()) {
2652 lex_state = EXPR_VALUE;
2653 return c;
2654 }
2655 bc = nextc();
2656 if (isspace(bc)) {
2657 lex_state = EXPR_VALUE;
2658 break;
2659 }
2660 if (bc == '\\')
2661 nextc();
2662 if (IS_BEG()) {
2663 lex_state = EXPR_END;
2664 return tCHAR;
2665 }
2666 if (is_blank(*parser->lex_p) || *parser->lex_p == ':') {
2667 lex_state = EXPR_VALUE;
2668 break;
2669 }
2670 lex_state = EXPR_END;
2671 return tCHAR;
2672 case '`':
2673 if (IS_lex_state(EXPR_FNAME)) {
2674 lex_state = EXPR_ENDFN;
2675 return c;
2676 }
2677 if (IS_lex_state(EXPR_DOT)) {
2678 lex_state = (cmd_state) ? EXPR_CMDARG : EXPR_ARG;
2679 return c;
2680 }
2681 /* fallthrough */
2682 case '"':
2683 space_seen = 1;
2684 /* fallthrough */
2685 case '\'':
2686 lex_strterm = (struct term_t *) malloc(sizeof(struct term_t));
2687 lex_strterm->term = c;
2688 lex_strterm->can_embed = space_seen;
2689 lex_strterm->token = token_string;
2690 lex_strterm->word = NULL;
2691 lex_strterm->nestable = 0;
2692 lex_strterm->paren = 0;
2693 return tSTRING_BEG;
2694 case '\\':
2695 if (nextc() == '\n') {
2696 space_seen = 1;
2697 goto retry;
2698 }
2699 pushback();
2700 return c;
2701 case '(':
2702 if (IS_BEG())
2703 c = tLPAREN;
2704 else if (IS_SPCARG(-1))
2705 c = tLPAREN_ARG;
2706 parser->paren_nest++;
2707 lex_state = EXPR_BEG;
2708 COND_PUSH(0);
2709 CMDARG_PUSH(0);
2710 return c;
2711 case ')':
2712 parser->paren_nest--;
2713 lex_state = EXPR_ENDFN;
2714 CMDARG_LEXPOP();
2715 COND_LEXPOP();
2716 return c;
2717 case '{':
2718 if (parser->lpar_beg && parser->lpar_beg == parser->paren_nest) {
2719 lex_state = EXPR_BEG;
2720 parser->lpar_beg = 0;
2721 parser->paren_nest--;
2722 COND_PUSH(0);
2723 CMDARG_PUSH(0);
2724 push_pos(parser, tokp);
2725 if (parser->version < ruby19) {
2726 yywarning("\"->\" syntax is only available in Ruby 1.9.x or higher.");
2727 }
2728 return tLAMBEG; /* this is a lambda ->() {} construction */
2729 }
2730 if (IS_ARG() || IS_lex_state(EXPR_END | EXPR_ENDFN))
2731 c = '{';
2732 else if (IS_lex_state(EXPR_ENDARG))
2733 c = tLBRACE_ARG; /* block (expr) */
2734 else
2735 c = tLBRACE; /* smells like a hash */
2736 COND_PUSH(0);
2737 CMDARG_PUSH(0);
2738 lex_state = EXPR_BEG;
2739 if (c != tLBRACE) {
2740 push_pos(parser, tokp);
2741 command_start = 1;
2742 }
2743 return c; /* block (primary) */
2744 case '}':
2745 CMDARG_LEXPOP();
2746 COND_LEXPOP();
2747 tokp.end_line = parser->line;
2748 tokp.end_col = parser->column;
2749 push_pos(parser, tokp);
2750 lex_state = EXPR_ENDARG;
2751 return c;
2752 case '@':
2753 cp = lexbuf;
2754 *cp++ = c;
2755 c = nextc();
2756 if (c != '@') {
2757 bc = IVAR;
2758 } else {
2759 *cp++ = c;
2760 c = nextc();
2761 bc = CVAR;
2762 }
2763 goto talpha;
2764 case '$':
2765 tokp.end_line = parser->line;
2766 cp = lexbuf;
2767 *cp++ = c;
2768 bc = nextc();
2769 switch (bc) {
2770 case '1': case '2': case '3': case '4':
2771 case '5': case '6': case '7': case '8': case '9':
2772 c = bc;
2773 while (isdigit(c)) {
2774 *cp++ = c;
2775 c = nextc();
2776 }
2777 *cp = '\0';
2778 pushback();
2779 c = tNTH_REF;
2780 break;
2781 case '~': case '*': case '$': case '?': case '!': case '@':
2782 case '/': case '\\': case ';': case ',': case '.': case '=':
2783 case ':': case '<': case '>': case '\"':
2784 case '&': case '`': case '\'': case '+':
2785 case '0':
2786 c = GLOBAL;
2787 *cp++ = bc;
2788 *cp = '\0';
2789 break;
2790 case '-':
2791 c = nextc();
2792 *cp++ = bc;
2793 bc = GLOBAL;
2794 goto talpha;
2795 default:
2796 c = bc;
2797 bc = GLOBAL;
2798 goto talpha;
2799 }
2800 lex_state = EXPR_END;
2801 tokp.end_col = parser->column;
2802 push_pos(parser, tokp);
2803 push_stack(parser, lexbuf);
2804 return c;
2805 case '~':
2806 if (IS_AFTER_OPERATOR()) {
2807 bc = nextc();
2808 if (bc != '@')
2809 pushback();
2810 lex_state = EXPR_ARG;
2811 } else
2812 lex_state = EXPR_BEG;
2813 return c;
2814 default:
2815 cp = lexbuf;
2816 goto talpha;
2817 }
2818 pushback();
2819 return c;
2820
2821 talpha:
2822 {
2823 int step = 0;
2824 int ax = 0;
2825
2826 /* It's time to parse the word */
2827 while (not_sep(parser->lex_prev)) {
2828 step = utf8_charsize(parser->lex_prev);
2829 ax += step - 1;
2830 while (step-- > 0) {
2831 *cp++ = c;
2832 c = nextc();
2833 }
2834 if (c < 0) {
2835 parser->eof_reached = 1;
2836 break;
2837 }
2838 }
2839 *cp = '\0';
2840 parser->column -= ax;
2841 tokp.end_line = tokp.start_line;
2842 tokp.end_col = parser->lex_prevc - ax;
2843 pushback();
2844
2845 /* IVAR, CVAR, GLOBAL */
2846 if (bc > 0) {
2847 push_pos(parser, tokp);
2848 push_stack(parser, lexbuf);
2849 lex_state = EXPR_END;
2850 return bc;
2851 }
2852
2853 /* Check for '!', '?' and '=' at the end of the word */
2854 if (c == '!' || c == '?') {
2855 *cp++ = c;
2856 *cp = '\0';
2857 tokp.end_col++;
2858 nextc();
2859 c = BASE;
2860 } else {
2861 c = 0;
2862 if (IS_lex_state(EXPR_FNAME)) {
2863 bc = nextc();
2864 if (bc == '=') {
2865 bc = nextc();
2866 if (bc != '>') {
2867 *cp++ = '=';
2868 *cp = '\0';
2869 tokp.end_col++;
2870 c = BASE;
2871 } else
2872 pushback();
2873 }
2874 pushback();
2875 }
2876 c = (!c && isupper(lexbuf[0])) ? CONST : BASE;
2877 }
2878
2879 /* Check if this is just a hash key. */
2880 if (IS_LABEL_POSSIBLE() && IS_LABEL_SUFFIX()) {
2881 lex_state = EXPR_BEG;
2882 nextc();
2883 push_stack(parser, lexbuf);
2884 push_pos(parser, tokp);
2885 return tKEY;
2886 }
2887
2888 /* Check if this is a keyword */
2889 const struct kwtable *kw = NULL;
2890 if (!IS_lex_state(EXPR_DOT)) {
2891 kw = rb_reserved_word(lexbuf, cp - lexbuf);
2892 if (kw) {
2893 enum lex_state_e state = lex_state;
2894 lex_state = kw->state;
2895 if (state == EXPR_FNAME)
2896 return kw->id[0];
2897 if (lex_state == EXPR_BEG)
2898 command_start = 1;
2899 switch (kw->id[0]) {
2900 case tCLASS: case tMODULE: case tDEF:
2901 push_last_comment(parser);
2902 break;
2903 case tDO:
2904 if (parser->lpar_beg && parser->lpar_beg == parser->paren_nest) {
2905 parser->lpar_beg = 0;
2906 parser->paren_nest--;
2907 return tDO_LAMBDA;
2908 }
2909 if (COND_P())
2910 return tDO_COND;
2911 push_pos(parser, tokp);
2912 if (CMDARG_P() && state != EXPR_CMDARG)
2913 return tDO_BLOCK;
2914 return tDO;
2915 case tEND:
2916 push_pos(parser, tokp);
2917 break;
2918 }
2919 if (IS_lex_state_for(state, EXPR_BEG | EXPR_VALUE))
2920 return kw->id[0];
2921 else {
2922 if (kw->id[0] != kw->id[1])
2923 lex_state = EXPR_BEG;
2924 return kw->id[1];
2925 }
2926 }
2927 }
2928
2929 /* Maybe this is just some special method */
2930 if (is_special_method(lexbuf)) {
2931 if (!strcmp(lexbuf, "__END__")) {
2932 parser->eof_reached = 1;
2933 return tpEND;
2934 }
2935 }
2936
2937 /* If this is not a keyword, push its position and the name */
2938 if (!kw) {
2939 push_stack(parser, lexbuf);
2940 push_pos(parser, tokp);
2941 }
2942
2943 /* Update the state of the lexer */
2944 if (IS_lex_state(EXPR_BEG_ANY | EXPR_ARG_ANY | EXPR_DOT))
2945 lex_state = (cmd_state) ? EXPR_CMDARG : EXPR_ARG;
2946 else if (lex_state == EXPR_FNAME)
2947 lex_state = EXPR_ENDFN;
2948 else
2949 lex_state = EXPR_END;
2950 return c;
2951 }
2952
2953 tnum:
2954 {
2955 char hex, bin, has_point, aux;
2956 hex = bin = has_point = aux = 0;
2957
2958 lex_state = EXPR_END;
2959 if (c == '0') {
2960 bc = nextc();
2961 if (toupper(bc) == 'X') {
2962 hex = 1;
2963 c = nextc();
2964 } else if (toupper(bc) == 'B') {
2965 bin = 1;
2966 c = nextc();
2967 }
2968 pushback();
2969 }
2970 while (c > 0 && ((isdigit(c) && !bin) || (!hex && !bin && !has_point && c == '.')
2971 || (hex && toupper(c) >= 'A' && toupper(c) < 'G')
2972 || (bin && (c == '1' || c == '0')) || c == '_')) {
2973 if (c == '.') {
2974 if (!isdigit(*parser->lex_p)) {
2975 tokp.end_line = parser->line;
2976 tokp.end_col = parser->column - 1;
2977 pushback();
2978 return tINTEGER;
2979 }
2980 has_point = 1;
2981 }
2982 aux = 1;
2983 c = nextc();
2984 }
2985 if ((bin || hex) && !aux)
2986 yyerror(parser, "numeric literal without digits");
2987
2988 /* is it an exponential number ? */
2989 if (!bin && !hex && toupper(c) == 'E') {
2990 c = nextc();
2991 if (isdigit(c) || ((c == '+' || c == '-') && isdigit(*(parser->lex_p))))
2992 c = nextc();
2993 while (c != -1 && isdigit(c))
2994 c = nextc();
2995 }
2996
2997 if (c != -1)
2998 pushback();
2999 tokp.end_line = parser->line + 1;
3000 tokp.end_col = parser->column + 1;
3001 if (c == 'r') {
3002 nextc();
3003 return tRATIONAL;
3004 } else if (c == 'i') {
3005 nextc();
3006 return tIMAGINARY;
3007 }
3008 tokp.end_line--;
3009 tokp.end_col--;
3010 return (has_point) ? tFLOAT : tINTEGER;
3011 }
3012 }
3013
3014 /* Standard yylex. */
3015 #if YYPURE
3016 static int yylex(void *lval, void *p)
3017 #else
3018 static int yylex(void *p)
3019 #endif
3020 {
3021 struct parser_t *parser = (struct parser_t *) p;
3022 int t = token_invalid;
3023 _unused_(lval);
3024
3025 t = parser_yylex(parser);
3026 return t;
3027 }
3028
3029 /*
3030 * Error handling. Take the formmated string s and append the error
3031 * string to the list of errors p->errors.
3032 */
3033 static void yyerror(struct parser_t *parser, const char *s)
3034 {
3035 struct error_t *e = (struct error_t *) malloc(sizeof(struct error_t));
3036
3037 e->msg = strdup(s);
3038 e->line = parser->line;
3039 e->column = parser->column;
3040 e->warning = parser->warning;
3041 e->next = e;
3042 if (parser->errors)
3043 parser->last_error->next = e;
3044 else
3045 parser->errors = e;
3046 parser->last_error = e;
3047 parser->last_error->next = NULL;
3048
3049 parser->eof_reached = !e->warning;
3050 if (!parser->unrecoverable)
3051 parser->unrecoverable = !e->warning;
3052 }
3053
3054 struct ast_t * rb_compile_file(struct options_t *opts)
3055 {
3056 struct parser_t p;
3057 struct ast_t *result;
3058
3059 /* Initialize parser */
3060 init_parser(&p);
3061 p.version = opts->version;
3062 if (!opts->contents) {
3063 if (!retrieve_source(&p, opts->path))
3064 return NULL;
3065 } else {
3066 p.content_given = 1;
3067 p.length = strlen(opts->contents);
3068 p.blob = opts->contents;
3069 p.lex_p = opts->contents;
3070 }
3071
3072 /* Let's parse */
3073 result = (struct ast_t *) malloc(sizeof(struct ast_t));
3074 result->tree = NULL;
3075 result->unrecoverable = 0;
3076 for (;;) {
3077 yyparse(&p);
3078 if (p.ast != NULL) {
3079 if (result->tree == NULL)
3080 result->tree = p.ast;
3081 else
3082 update_list(result->tree, p.ast);
3083 }
3084 if (p.eof_reached) {
3085 result->errors = p.errors;
3086 result->unrecoverable = p.unrecoverable;
3087 break;
3088 }
3089 }
3090 free_parser(&p);
3091
3092 return result;
3093 }
3094
3095 #ifdef BUILD_TESTS
3096 /*
3097 * Compile a file like the rb_compile_file function but printing
3098 * things directly to the stdout. This function is used for the tests.
3099 */
3100 KDEVRUBYPARSER_EXPORT int rb_debug_file(struct options_t *opts);
3101
3102 int rb_debug_file(struct options_t *opts)
3103 {
3104 struct parser_t p;
3105 int index;
3106
3107 /* Set up parser */
3108 init_parser(&p);
3109 p.version = opts->version;
3110 if (!retrieve_source(&p, opts->path))
3111 return 0;
3112
3113 printf("Resulting AST's:");
3114 for (;;) {
3115 printf("\n");
3116 yyparse(&p);
3117 print_node(p.ast);
3118 if (p.ast != NULL) {
3119 if (p.ast->cond != NULL) {
3120 printf("\nCondition: ");
3121 print_node(p.ast->cond);
3122 }
3123 if (p.ast->l != NULL && p.ast->l->ensure != NULL) {
3124 if (p.ast->l->cond != NULL) {
3125 printf("\nCondition: ");
3126 print_node(p.ast->l->cond);
3127 }
3128 printf("\nEnsure: ");
3129 print_node(p.ast->l->ensure);
3130 }
3131 free_ast(p.ast);
3132 p.ast = NULL;
3133 }
3134 if (p.eof_reached) {
3135 if (p.errors)
3136 print_errors(p.errors);
3137 break;
3138 }
3139 }
3140
3141 /* Check that all the stacks are empty */
3142 for (index = 0; index < p.sp; index++)
3143 printf("\nS: %s", p.stack[index]);
3144 printf("\n");
3145
3146 for (index = 0; index < p.pos_size; index++)
3147 printf("\nP: %i:%i", p.pos_stack[index].start_line, p.pos_stack[index].start_col);
3148 printf("\n");
3149 free_parser(&p);
3150 return 1;
3151 }
3152 #endif