kdev-ruby/parser/parser.y

0001 /* This file is part of KDevelop
0002  *
0003  * This file is based on the file parse.y from the MRI, version 1.9.2-p136.
0004  * So, at this point I must recognize the amazing job ruby developers
0005  * are doing and specially Yukihiro Matsumoto, the Ruby original author
0006  * and the one who signed parse.y.
0007  *
0008  * Copyright (C) 1993-2007 Yukihiro Matsumoto
0009  * Copyright (C) 2010-2015 Miquel Sabaté Solà <mikisabate@gmail.com>
0010  *
0011  * This program is free software: you can redistribute it and/or modify
0012  * it under the terms of the GNU General Public License as published by
0013  * the Free Software Foundation, either version 3 of the License, or
0014  * (at your option) any later version.
0015  *
0016  * This program is distributed in the hope that it will be useful,
0017  * but WITHOUT ANY WARRANTY; without even the implied warranty of
0018  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0019  * GNU General Public License for more details.
0020  *
0021  * You should have received a copy of the GNU General Public License
0022  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
0023  */
0024
0025
0026 %{
0027 /* make clang happy */
0028 #ifndef _MSC_VER
0029 extern char *strdup(const char *s);
0030 #endif
0031
0032 /* for alloca */
0033 #ifdef _WIN32
0034 #include <malloc.h>
0035 #elif !defined(__FreeBSD__) /* alloca() on FreeBSD is in stdlib.h (included later) */
0036 #include <alloca.h>
0037 #endif
0038
0039 #include <stdio.h>
0040 #include <stdlib.h>
0041 #include <string.h>
0042
0043 #include "node.h"
0044
0045
0046 #define SSIZE 256
0047 #define LSIZE (SSIZE << 2)
0048
0049
0050 /* The state bits, as defined below, have been extracted from the MRI. */
0051 enum lex_state_bits {
0052     EXPR_BEG_bit,               /* ignore newline, +/- is a sign. */
0053     EXPR_END_bit,               /* newline significant, +/- is an operator. */
0054     EXPR_ENDARG_bit,            /* ditto, and unbound braces. */
0055     EXPR_ENDFN_bit,             /* ditto, and unbound braces. */
0056     EXPR_ARG_bit,               /* newline significant, +/- is an operator. */
0057     EXPR_CMDARG_bit,            /* newline significant, +/- is an operator. */
0058     EXPR_MID_bit,               /* newline significant, +/- is an operator. */
0059     EXPR_FNAME_bit,             /* ignore newline, no reserved words. */
0060     EXPR_DOT_bit,               /* right after `.' or `::', no reserved words. */
0061     EXPR_CLASS_bit,             /* immediate after `class', no here document. */
0062     EXPR_VALUE_bit,             /* alike EXPR_BEG but label is disallowed. */
0063 };
0064
0065 /* This enum defines the states in which the lexer can be. */
0066 enum lex_state_e {
0067 #define DEF_EXPR(n) EXPR_##n = (1 << EXPR_##n##_bit)
0068     DEF_EXPR(BEG),
0069     DEF_EXPR(END),
0070     DEF_EXPR(ENDARG),
0071     DEF_EXPR(ENDFN),
0072     DEF_EXPR(ARG),
0073     DEF_EXPR(CMDARG),
0074     DEF_EXPR(MID),
0075     DEF_EXPR(FNAME),
0076     DEF_EXPR(DOT),
0077     DEF_EXPR(CLASS),
0078     DEF_EXPR(VALUE),
0079     EXPR_BEG_ANY  =  (EXPR_BEG | EXPR_VALUE | EXPR_MID | EXPR_CLASS),
0080     EXPR_ARG_ANY  =  (EXPR_ARG | EXPR_CMDARG),
0081     EXPR_END_ANY  =  (EXPR_END | EXPR_ENDARG | EXPR_ENDFN)
0082 };
0083
0084 /* Helper macros for handling the lexer states. */
0085 #define IS_lex_state_for(x, ls) ((x) & (ls))
0086 #define IS_lex_state(ls)        IS_lex_state_for(lex_state, (ls))
0087
0088 /* And now some macros that will help us on some stacks of the parser. */
0089 #define BITSTACK_PUSH(stack, n) ((stack) = ((stack)<<1)|((n)&1))
0090 #define BITSTACK_POP(stack)     ((stack) = (stack) >> 1)
0091 #define BITSTACK_LEXPOP(stack)  ((stack) = ((stack) >> 1) | ((stack) & 1))
0092 #define BITSTACK_SET_P(stack)   ((stack)&1)
0093
0094 #define COND_PUSH(n)    BITSTACK_PUSH(parser->cond_stack, (n))
0095 #define COND_POP()      BITSTACK_POP(parser->cond_stack)
0096 #define COND_LEXPOP()   BITSTACK_LEXPOP(parser->cond_stack)
0097 #define COND_P()        BITSTACK_SET_P(parser->cond_stack)
0098
0099 #define CMDARG_PUSH(n)  BITSTACK_PUSH(parser->cmdarg_stack, (n))
0100 #define CMDARG_POP()    BITSTACK_POP(parser->cmdarg_stack)
0101 #define CMDARG_LEXPOP() BITSTACK_LEXPOP(parser->cmdarg_stack)
0102 #define CMDARG_P()      BITSTACK_SET_P(parser->cmdarg_stack)
0103
0104
0105 /*
0106  * This structure represents a string/heredoc/regexp/shortcut term.
0107  */
0108 struct term_t {
0109     int token;
0110     char *word;
0111     int nest;
0112     unsigned char term;
0113     unsigned char paren;
0114     unsigned char can_embed : 1;
0115     unsigned char nestable : 1;
0116 };
0117
0118 /*
0119  * This structure contains a comment on the code. It basically stores
0120  * the comment itself in a dynamically allocated char pointer, and the
0121  * line where it was found.
0122  */
0123 struct comment_t {
0124     char *comment;
0125     int line;
0126 };
0127
0128 /*
0129  * This structure defines all the information that the parser has.
0130  * It contains the AST, flags, stacks, etc.
0131  */
0132 struct parser_t {
0133     /* Abstract Syntax Tree */
0134     struct Node *ast;
0135
0136     /* Stack of positions */
0137     struct pos_t *pos_stack;
0138     int stack_scale;
0139     int pos_size;
0140
0141     /* Flags used by the parser */
0142     unsigned char eof_reached : 1;
0143     unsigned int cond_stack;
0144     unsigned int cmdarg_stack;
0145     int in_def;
0146     int paren_nest;
0147     int lpar_beg;
0148     int parser_command_start;
0149     enum ruby_version version;
0150
0151     /* Stuff from the lexer */
0152     enum lex_state_e lex_state;
0153     struct term_t *lex_strterm;
0154     char *lex_p;
0155     char *lex_prev;
0156     char *lex_pend;
0157     unsigned long lex_prevc;
0158
0159     /* Basically used to handle heredocs properly */
0160     unsigned long line_pend;
0161     unsigned long column_pend;
0162     unsigned char here_found : 1;
0163
0164     /* Errors on the file */
0165     struct error_t *errors;
0166     struct error_t *last_error;
0167     unsigned char warning : 1;
0168     unsigned char unrecoverable : 1;
0169
0170     /* Stack of names */
0171     char *stack[2];
0172     char *aux;
0173     int sp;
0174
0175     /* The last allocated comment + the comment stack    */
0176     struct comment_t last_comment;
0177     char *comment_stack[SSIZE];
0178     int comment_index;
0179
0180     /* Info about the content to parse */
0181     unsigned long length;
0182     unsigned long line;
0183     unsigned long column;
0184     unsigned char content_given : 1;
0185     char *blob;
0186 };
0187
0188 #include "parser_gen.h"
0189 #define yyparse ruby_yyparse
0190 #define YYERROR_VERBOSE 1
0191
0192 /* Macros to access some attributes in a fancier way. */
0193 #define lex_strterm parser->lex_strterm
0194 #define lex_state parser->lex_state
0195 #define command_start parser->parser_command_start
0196
0197 /* yy's functions */
0198 #if YYPURE
0199 static int yylex(void *, void *);
0200 #else
0201 static int yylex(void *);
0202 #endif
0203 static void yyerror(struct parser_t *, const char *);
0204 #define yywarning(msg) { parser->warning = 1; yyerror(parser, (msg)); parser->warning = 0;}
0205
0206 /* The static functions below deal with stacks. */
0207 static void pop_stack(struct parser_t *parser, struct Node *n);
0208 static void push_last_comment(struct parser_t *parser);
0209 static void pop_comment(struct parser_t *parser, struct Node *n);
0210 static void pop_pos(struct parser_t *parser, struct Node *n);
0211 static void pop_start(struct parser_t *parser, struct Node *n);
0212 static void pop_end(struct parser_t *parser, struct Node *n);
0213
0214 /* Helper macros for nodes, positions and stacks */
0215 #define ALLOC_N(kind, l, r) alloc_node(kind, l, r); pop_pos(parser, yyval.n);
0216 #define DISPOSE2(node1, node2) { free_ast(node1); free_ast(node2); }
0217 #define DISPOSE3(node1, node2, node3) { DISPOSE2(node1, node2); free_ast(node3); }
0218 #define POP_STACK pop_stack(parser, yyval.n)
0219 #define discard_pos() pop_pos(parser, NULL)
0220 #define copy_op(op) { parser->aux = strdup(op); }
0221 %}
0222
0223 %pure-parser
0224 %lex-param {struct parser_t *parser }
0225 %parse-param { struct parser_t *parser }
0226 %union {
0227     struct Node *n;
0228     int num;
0229     struct term_t *term;
0230 }
0231
0232 /* Tokens */
0233 %token tCLASS tMODULE tDEF tUNDEF tBEGIN tRESCUE tENSURE tEND tIF tUNLESS
0234 %token tTHEN tELSIF tELSE tCASE tWHEN tWHILE tUNTIL tFOR tBREAK tNEXT tREDO
0235 %token tRETRY tIN tDO tDO_COND tDO_BLOCK tRETURN tYIELD tKWAND tKWOR tKWNOT
0236 %token tALIAS tDEFINED upBEGIN upEND tTRUE tFALSE tNIL tENCODING tDSTAR
0237 %token tFILE tLINE tSELF tSUPER GLOBAL BASE CONST tDO_LAMBDA tCHAR tIMAGINARY
0238 %token IVAR CVAR tINTEGER tFLOAT tNTH_REF tBACKTICK tpEND tSYMBEG tRATIONAL
0239 %token tAMPER tAREF tASET tASSOC tCOLON2 tCOLON3 tLAMBDA tLAMBEG tLBRACE
0240 %token tLBRACKET tLPAREN tLPAREN_ARG tSTAR tCOMMENT ARRAY tKEY SYMBOL tUMINUS_NUM
0241 %token tSTRING_BEG tSTRING_CONTENT tSTRING_DBEG tSTRING_DEND tSTRING_END tSTRING_DVAR
0242
0243 /* Types */
0244 %type <n> singleton strings string literal numeric cpath rescue_arg
0245 %type <n> top_compstmt top_stmt bodystmt compstmt stmts stmt expr arg primary
0246 %type <n> command command_call method_call if_tail opt_else case_body cases
0247 %type <n> opt_rescue exc_list exc_var opt_ensure args call_args opt_call_args
0248 %type <n> paren_args opt_paren_args super aref_args opt_block_arg block_arg
0249 %type <n> mrhs superclass block_call block_command f_block_optarg f_block_opt
0250 %type <n> const f_arglist f_args f_arg f_arg_item f_optarg f_marg f_marg_list
0251 %type <n> f_margs assoc_list assocs assoc undef_list backref for_var bvar base
0252 %type <n> block_param opt_block_param block_param_def f_opt bv_decls label none
0253 %type <n> lambda f_larglist lambda_body command_args opt_bv_decl lhs do_block
0254 %type <n> mlhs mlhs_head mlhs_basic mlhs_item mlhs_node mlhs_post mlhs_inner
0255 %type <n> fsym variable symbol operation operation2 operation3 other_vars
0256 %type <n> cname fname f_rest_arg f_block_arg opt_f_block_arg f_norm_arg
0257 %type <n> brace_block cmd_brace_block f_bad_arg sym opt_brace_block
0258 %type <n> opt_args_tail args_tail f_kwarg block_args_tail opt_block_args_tail
0259 %type <n> f_kw f_block_kw f_block_kwarg f_kwrest simple_numeric
0260 %type <n> string_contents string_content string_dvar
0261
0262 /* When an error has been found, free all the nodes from bison's stacks */
0263 %destructor { free_ast($$); } <n>
0264
0265 /* precedence table */
0266 %nonassoc tLOWEST
0267 %nonassoc tLBRACE_ARG
0268
0269 %nonassoc modifier_if modifier_unless modifier_while modifier_until
0270 %left tKWOR tKWAND
0271 %right tKWNOT
0272 %nonassoc tDEFINED
0273 %right '=' tOP_ASGN
0274 %left modifier_rescue
0275 %right '?' ':'
0276 %nonassoc tDOT2 tDOT3
0277 %left tOR
0278 %left tAND
0279 %nonassoc tCMP tEQ tEQQ tNEQ tMATCH tNMATCH
0280 %left '>' tGEQ '<' tLEQ
0281 %left '|' '^'
0282 %left '&'
0283 %left tLSHIFT tRSHIFT
0284 %left '+' '-'
0285 %left '*' '/' '%'
0286 %right tUMINUS_NUM tUMINUS
0287 %right tPOW
0288 %right '!' '~' tUPLUS
0289
0290 %%
0291
0292 top_compstmt: top_stmt  { parser->ast = $1; $$ = 0; YYACCEPT; }
0293     | term              { $$ = 0; YYACCEPT; }
0294 ;
0295
0296 top_stmt: none
0297     | stmt
0298     | error stmt { $$ = $2; }
0299 ;
0300
0301 bodystmt:
0302     {
0303         $<num>$ = parser->line;
0304     }
0305     {
0306         $<num>$ = parser->column;
0307     }
0308     compstmt opt_rescue opt_else opt_ensure
0309     {
0310         $$ = alloc_ensure(token_body, $3, $4, $5, $6);
0311         pop_end(parser, $$); /* Every bodystmt ends with tEND */
0312         $$->pos.start_line = $<num>1;
0313         $$->pos.start_col = $<num>2;
0314     }
0315 ;
0316
0317 compstmt: stmts opt_terms { $$ = $1; }
0318 ;
0319
0320 stmts: none
0321     | stmt
0322     | stmts terms stmt  { $$ = ($1 == NULL) ? $3 : update_list($1, $3); }
0323     | error stmt        { $$ = $2; }
0324 ;
0325
0326 stmt: tALIAS fsym { lex_state = EXPR_FNAME; } fsym
0327     {
0328         $$ = alloc_node(token_alias, $2, $4);
0329     }
0330     | tALIAS GLOBAL GLOBAL
0331     {
0332         /* Ugly as hell, but it works */
0333         struct Node *l = alloc_node(token_object, NULL, NULL);
0334         l->flags = global;
0335         struct Node *r = alloc_node(token_object, NULL, NULL);
0336         r->flags = global;
0337         pop_pos(parser, r);
0338         pop_pos(parser, l);
0339         pop_stack(parser, l);
0340         pop_stack(parser, r);
0341         $$ = alloc_node(token_alias, l, r);
0342     }
0343     | tALIAS GLOBAL tNTH_REF
0344     {
0345         yyerror(parser, "can't make alias for the number variables");
0346         $$ = 0;
0347     }
0348     | tUNDEF undef_list
0349     {
0350         $$ = alloc_node(token_undef, NULL, $2);;
0351     }
0352     | stmt modifier_if expr
0353     {
0354         $$ = alloc_cond(token_if, $3, $1, NULL);
0355     }
0356     | stmt modifier_unless expr
0357     {
0358         $$ = alloc_cond(token_unless, $3, $1, NULL);
0359     }
0360     | stmt modifier_while expr
0361     {
0362         $$ = alloc_cond(token_while, $3, $1, NULL);
0363     }
0364     | stmt modifier_until expr
0365     {
0366         $$ = alloc_cond(token_until, $3, $1, NULL);
0367     }
0368     | stmt modifier_rescue stmt
0369     {
0370         $$ = alloc_cond(token_rescue, $3, $1, NULL);
0371     }
0372     | upBEGIN
0373     {
0374         if (parser->in_def)
0375             yyerror(parser, "BEGIN in method");
0376     }
0377     '{' compstmt '}'
0378     {
0379         $$ = alloc_node(token_up_begin, $4, NULL);
0380         discard_pos(); /* } */
0381         discard_pos(); /* { */
0382     }
0383     | upEND '{' compstmt '}'
0384     {
0385         $$ = alloc_node(token_up_end, $3, NULL);
0386         discard_pos(); /* } */
0387         discard_pos(); /* { */
0388     }
0389     | lhs '=' command_call  { $$ = alloc_node(token_assign, $1, $3); }
0390     | mlhs '=' command_call { $$ = alloc_node(token_assign, $1, $3); }
0391     | variable tOP_ASGN command_call { $$ = alloc_node(token_op_assign, $1, $3); }
0392     | primary '[' opt_call_args rbracket tOP_ASGN command_call
0393     {
0394         struct Node *aux = alloc_node(token_array_value, $1, $3);
0395         $$ = alloc_node(token_op_assign, aux, $6);
0396     }
0397     | primary '.' base tOP_ASGN command_call
0398     {
0399         struct Node *aux = alloc_node(token_object, $1, $3);
0400         $$ = alloc_node(token_op_assign, aux, $5);
0401     }
0402     | primary '.' const tOP_ASGN command_call
0403     {
0404         struct Node *aux = alloc_node(token_object, $1, $3);
0405         $$ = alloc_node(token_op_assign, aux, $5);
0406     }
0407     | primary tCOLON2 const tOP_ASGN command_call
0408     {
0409         yyerror(parser, "constant re-assignment");
0410         $$ = NULL;
0411         DISPOSE3($1, $3, $5);
0412     }
0413     | primary tCOLON2 base tOP_ASGN command_call
0414     {
0415         struct Node *aux = alloc_node(token_object, $1, $3);
0416         $$ = alloc_node(token_op_assign, aux, $5);
0417     }
0418     | backref tOP_ASGN command_call { $$ = alloc_node(token_op_assign, $1, $3); }
0419     | lhs '=' mrhs  { $$ = alloc_node(token_assign, $1, $3); }
0420     | mlhs '=' arg  { $$ = alloc_node(token_assign, $1, $3); }
0421     | mlhs '=' mrhs { $$ = alloc_node(token_assign, $1, $3); }
0422     | expr
0423     | tpEND { $$ = alloc_node(token__end__, NULL, NULL); }
0424 ;
0425
0426 expr: command_call
0427     | expr tKWAND expr      { $$ = alloc_node(token_kw_and, $1, $3);   }
0428     | expr tKWOR expr       { $$ = alloc_node(token_kw_or, $1, $3);    }
0429     | tKWNOT opt_eol expr   { $$ = alloc_node(token_kw_not, $3, NULL); }
0430     | '!' command_call      { $$ = alloc_node(token_not, $2, NULL);    }
0431     | arg
0432 ;
0433
0434 command_call: command | block_command
0435 ;
0436
0437 block_command: block_call
0438     | block_call '.' operation2 command_args
0439     {
0440         struct Node *aux = update_list($1, $3);
0441         $$ = alloc_node(token_method_call, aux, $4);
0442     }
0443     | block_call tCOLON2 operation2 command_args
0444     {
0445         struct Node *aux = update_list($1, $3);
0446         $$ = alloc_node(token_method_call, aux, $4);
0447     }
0448 ;
0449
0450 cmd_brace_block: tLBRACE_ARG opt_block_param compstmt '}'
0451     {
0452         $$ = ALLOC_N(token_block, $3, $2);
0453         pop_start(parser, $$);
0454     }
0455 ;
0456
0457 command: operation command_args             %prec tLOWEST
0458     {
0459         $$ = alloc_node(token_method_call, $1, $2);
0460     }
0461     | operation command_args cmd_brace_block
0462     {
0463         $$ = alloc_cond(token_method_call, $3, $1, $2);
0464     }
0465     | primary '.' operation2 command_args         %prec tLOWEST
0466     {
0467         struct Node *aux = update_list($1, $3);
0468         $$ = alloc_node(token_method_call, aux, $4);
0469     }
0470     | primary '.' operation2 command_args cmd_brace_block
0471     {
0472         struct Node *aux = update_list($1, $3);
0473         $$ = alloc_cond(token_method_call, $5, aux, $4);
0474     }
0475     | primary tCOLON2 operation2 command_args %prec tLOWEST
0476     {
0477         struct Node *aux = update_list($1, $3);
0478         $$ = alloc_node(token_method_call, aux, $4);
0479     }
0480     | primary tCOLON2 operation2 command_args cmd_brace_block
0481     {
0482         struct Node *aux = update_list($1, $3);
0483         $$ = alloc_cond(token_method_call, $5, aux, $4);
0484     }
0485     | tSUPER call_args  { $$ = alloc_node(token_method_call, $2, NULL); }
0486     | tYIELD call_args  { $$ = alloc_node(token_yield, $2, NULL);       }
0487     | tRETURN call_args { $$ = alloc_node(token_return, $2, NULL);      }
0488     | tBREAK call_args  { $$ = alloc_node(token_break, $2, NULL);       }
0489     | tNEXT call_args   { $$ = alloc_node(token_next, $2, NULL);        }
0490 ;
0491
0492 mlhs: mlhs_basic
0493     | tLPAREN mlhs_inner rparen { $$ = $2; }
0494 ;
0495
0496 mlhs_inner: mlhs_basic
0497     | tLPAREN mlhs_inner rparen { $$ = $2; }
0498 ;
0499
0500 mlhs_basic: mlhs_head
0501     | mlhs_head mlhs_item { $$ = update_list($1, $2); }
0502     | mlhs_head tSTAR mlhs_node
0503     {
0504         $3->flags = kwrest;
0505         $$ = update_list($1, $3);
0506     }
0507     | mlhs_head tSTAR mlhs_node ',' mlhs_post
0508     {
0509         $3->flags = kwrest;
0510         $$ = concat_list($1, update_list($3, $5));
0511     }
0512     | mlhs_head tSTAR
0513     {
0514         $$ = alloc_node(token_object, NULL, NULL);
0515         $$->flags = star;
0516         $$ = update_list($1, $$);
0517     }
0518     | mlhs_head tSTAR ',' mlhs_post
0519     {
0520         $$ = alloc_node(token_object, NULL, NULL);
0521         $$->flags = star;
0522         $$ = update_list($1, $$);
0523         $$ = concat_list($$, $4);
0524     }
0525     | tSTAR mlhs_node               { $$ = $2; $$->flags = kwrest; }
0526     | tSTAR mlhs_node ',' mlhs_post { $$ = update_list($2, $4); $2->flags = kwrest; }
0527     | tSTAR
0528     {
0529         $$ = alloc_node(token_object, NULL, NULL);
0530         $$->flags = star;
0531     }
0532     | tSTAR ',' mlhs_post
0533     {
0534         $$ = alloc_node(token_object, NULL, NULL);
0535         $$->flags = star;
0536         $$ = update_list($$, $3);
0537     }
0538 ;
0539
0540 mlhs_item: mlhs_node
0541     | tLPAREN mlhs_inner rparen { $$ = alloc_node(token_object, $2, NULL); }
0542 ;
0543
0544 mlhs_head: mlhs_item ','        { $$ = $1; }
0545     | mlhs_head mlhs_item ','   { $$ = update_list($1, $2); }
0546 ;
0547
0548 mlhs_post: mlhs_item            { $$ = $1; }
0549     | mlhs_post ',' mlhs_item   { $$ = update_list($1, $3); }
0550 ;
0551
0552 mlhs_node: variable
0553     | primary '[' opt_call_args rbracket
0554     {
0555         $$ = alloc_node(token_array_value, $1, $3);
0556     }
0557     | primary '.' base          { $$ = alloc_node(token_method_call, $1, $3); }
0558     | primary tCOLON2 base      { $$ = alloc_node(token_method_call, $1, $3); }
0559     | primary '.' const         { $$ = alloc_node(token_method_call, $1, $3); }
0560     | primary tCOLON2 const
0561     {
0562         if (parser->in_def)
0563             yyerror(parser, "dynamic constant assignment");
0564         $$ = alloc_node(token_method_call, $1, $3);
0565     }
0566     | tCOLON3 const
0567     {
0568         if (parser->in_def)
0569             yyerror(parser, "dynamic constant assignment");
0570         $$ = $2;
0571     }
0572     | backref
0573 ;
0574
0575 lhs: variable
0576     | primary '[' opt_call_args rbracket
0577     {
0578         $$ = alloc_node(token_array_value, $1, $3);
0579     }
0580     | primary '.' base          { $$ = alloc_node(token_method_call, $1, $3); }
0581     | primary tCOLON2 base      { $$ = alloc_node(token_method_call, $1, $3); }
0582     | primary '.' const         { $$ = alloc_node(token_method_call, $1, $3); }
0583     | primary tCOLON2 const
0584     {
0585         if (parser->in_def)
0586             yyerror(parser, "dynamic constant assignment");
0587         $$ = alloc_node(token_method_call, $1, $3);
0588     }
0589     | tCOLON3 const
0590     {
0591         if (parser->in_def)
0592             yyerror(parser, "dynamic constant assignment");
0593         $$ = $2;
0594     }
0595 ;
0596
0597 cname: BASE
0598     {
0599       yyerror(parser, "class/module name must be CONSTANT");
0600       $$ = 0;
0601     }
0602     | const
0603 ;
0604
0605 cpath: tCOLON3 cname        { $$ = $2; }
0606     | cname                 { $$ = $1; }
0607     | primary tCOLON2 cname { $$ = update_list($1, $3); }
0608 ;
0609
0610 fname: base
0611     | const
0612     | op
0613     {
0614         lex_state = EXPR_ENDFN;
0615         $$ = alloc_node(token_object, NULL, NULL);
0616         $$->name = parser->aux;
0617         $$->pos.start_line = $$->pos.end_line = parser->line;
0618         $$->pos.end_col = parser->column;
0619         $$->pos.start_col = $$->pos.end_col - strlen(parser->aux);
0620     }
0621     | reswords
0622     {
0623         lex_state = EXPR_ENDFN;
0624         $$ = alloc_node(token_object, NULL, NULL);
0625     }
0626 ;
0627
0628 fsym: fname | symbol
0629 ;
0630
0631 undef_list: fsym
0632     | undef_list ',' { lex_state = EXPR_FNAME; } fsym { $$ = update_list($1, $4); }
0633 ;
0634
0635 op: '|' { copy_op("|"); } | '^' { copy_op("^"); } | '&' { copy_op("&"); }
0636     | tCMP { copy_op("<=>"); } | tEQ { copy_op("=="); } | tEQQ { copy_op("===");}
0637     | tMATCH { copy_op("=~"); } | tNMATCH {copy_op("!~");} | '>' { copy_op(">");}
0638     | tGEQ { copy_op(">="); } | '<' { copy_op("<"); } | tLEQ { copy_op("<="); }
0639     | tNEQ {copy_op("!=");} | tLSHIFT {copy_op("<<");} | tRSHIFT {copy_op(">>");}
0640     | '+' { copy_op("+"); } | '-' { copy_op("-"); } | '*' { copy_op("*"); }
0641     | tSTAR { copy_op("*"); } | '/' { copy_op("/"); } | '%' { copy_op("%"); }
0642     | tPOW { copy_op("**"); } | tAREF { copy_op("[]"); } | '`' { copy_op("`");}
0643     | tUPLUS { copy_op("+"); } | tASET { copy_op("[]="); }
0644     | tUMINUS { copy_op("-"); } | tDSTAR { copy_op("**"); }
0645     | '!' { copy_op("!"); } | '~' { copy_op("~"); }
0646 ;
0647
0648 reswords: tLINE | tFILE | tENCODING | upBEGIN | upEND | tALIAS | tKWAND
0649     | tBEGIN | tBREAK | tCASE | tCLASS | tDEF | tDEFINED | tDO | tELSE | tELSIF
0650     | tEND | tENSURE | tFALSE | tFOR | tIN | tMODULE | tNEXT | tNIL | tKWNOT
0651     | tKWOR | tREDO | tRESCUE | tRETRY | tRETURN | tSELF | tSUPER | tTHEN | tTRUE
0652     | tUNDEF | tWHEN | tYIELD | tIF | tUNLESS | tWHILE | tUNTIL
0653 ;
0654
0655 arg: lhs '=' arg { $$ = alloc_node(token_assign, $1, $3); }
0656     | lhs '=' arg modifier_rescue arg
0657     {
0658         struct Node *aux = alloc_cond(token_rescue, $5, $3, NULL);
0659         $$ = alloc_node(token_assign, $1, aux);
0660     }
0661     | variable tOP_ASGN arg { $$ = alloc_node(token_op_assign, $1, $3); }
0662     | variable tOP_ASGN arg modifier_rescue arg
0663     {
0664         struct Node *aux = alloc_cond(token_rescue, $5, $3, NULL);
0665         $$ = alloc_node(token_op_assign, $1, aux);
0666     }
0667     | primary '[' opt_call_args rbracket tOP_ASGN arg
0668     {
0669         struct Node *aux = alloc_node(token_array_value, $1, $3);
0670         $$ = alloc_node(token_op_assign, aux, $6);
0671     }
0672     | primary '.' base tOP_ASGN arg
0673     {
0674         struct Node *aux = alloc_node(token_object, $1, $3);
0675         $$ = alloc_node(token_op_assign, aux, $5);
0676     }
0677     | primary '.' const tOP_ASGN arg
0678     {
0679         struct Node *aux = alloc_node(token_object, $1, $3);
0680         $$ = alloc_node(token_op_assign, aux, $5);
0681     }
0682     | primary tCOLON2 base tOP_ASGN arg
0683     {
0684         struct Node *aux = alloc_node(token_object, $1, $3);
0685         $$ = alloc_node(token_op_assign, aux, $5);
0686     }
0687     | primary tCOLON2 const tOP_ASGN arg
0688     {
0689         yyerror(parser, "constant re-assignment");
0690         $$ = NULL;
0691         DISPOSE3($1, $3, $5);
0692     }
0693     | tCOLON3 const tOP_ASGN arg
0694     {
0695         yyerror(parser, "constant re-assignment");
0696         $$ = NULL;
0697         DISPOSE2($2, $4);
0698     }
0699     | backref tOP_ASGN arg { $$ = alloc_node(token_assign, $1, $3); }
0700     | arg tDOT2 arg { $$ = alloc_node(token_dot2, $1, $3); }
0701     | arg tDOT3 arg { $$ = alloc_node(token_dot3, $1, $3);}
0702     | arg '+' arg { $$ = alloc_node(token_plus, $1, $3); }
0703     | arg '-' arg { $$ = alloc_node(token_minus, $1, $3);}
0704     | arg '*' arg { $$ = alloc_node(token_mul, $1, $3);}
0705     | arg '/' arg { $$ = alloc_node(token_div, $1, $3);}
0706     | arg '%' arg { $$ = alloc_node(token_mod, $1, $3);}
0707     | arg tPOW arg { $$ = alloc_node(token_pow, $1, $3);}
0708     | tUMINUS_NUM simple_numeric tPOW arg
0709     {
0710         struct Node *aux = alloc_node(token_pow, $2, $4);
0711         $$ = alloc_node(token_unary_minus, aux, NULL);
0712     }
0713     | tUPLUS arg    { $$ = alloc_node(token_unary_plus, $2, NULL);    }
0714     | tUMINUS arg { $$ = alloc_node(token_unary_minus, $2, NULL); }
0715     | arg '|' arg { $$ = alloc_node(token_bit_or, $1, $3);    }
0716     | arg '^' arg { $$ = alloc_node(token_bit_xor, $1, $3);    }
0717     | arg '&' arg { $$ = alloc_node(token_bit_and, $1, $3);    }
0718     | arg tCMP arg    { $$ = alloc_node(token_cmp, $1, $3);    }
0719     | arg '>' arg    { $$ = alloc_node(token_greater, $1, $3);    }
0720     | arg tGEQ arg    { $$ = alloc_node(token_geq, $1, $3);    }
0721     | arg '<' arg    { $$ = alloc_node(token_lesser, $1, $3);    }
0722     | arg tLEQ arg    { $$ = alloc_node(token_leq, $1, $3);    }
0723     | arg tEQ arg    { $$ = alloc_node(token_eq, $1, $3);    }
0724     | arg tEQQ arg    { $$ = alloc_node(token_eqq, $1, $3);    }
0725     | arg tNEQ arg    { $$ = alloc_node(token_neq, $1, $3);    }
0726     | arg tMATCH arg    { $$ = alloc_node(token_match, $1, $3); }
0727     | arg tNMATCH arg    { $$ = alloc_node(token_nmatch, $1, $3);    }
0728     | '!' arg    { $$ = alloc_node(token_not, $2, NULL);    }
0729     | '~' arg { $$ = alloc_node(token_neg, $2, NULL);    }
0730     | arg tLSHIFT arg { $$ = alloc_node(token_lshift, $1, $3); }
0731     | arg tRSHIFT arg { $$ = alloc_node(token_rshift, $1, $3); }
0732     | arg tAND arg { $$ = alloc_node(token_and, $1, $3); }
0733     | arg tOR arg { $$ = alloc_node(token_or, $1, $3); }
0734     | tDEFINED opt_eol arg { $$ = alloc_node(token_defined, $3, NULL); }
0735     | arg '?' arg opt_eol ':' arg
0736     {
0737         $$ = alloc_cond(token_ternary, $1, $3, $6);
0738     }
0739     | primary
0740 ;
0741
0742 aref_args: none
0743     | args trailer              { $$ = $1; }
0744     | args ',' assocs trailer   { $$ = update_list($1, $3); }
0745     | assocs trailer            { $$ = $1; }
0746 ;
0747
0748 paren_args: '(' opt_call_args rparen { $$ = $2; }
0749 ;
0750
0751 opt_paren_args : none | paren_args
0752 ;
0753
0754 opt_call_args: none | call_args
0755 ;
0756
0757 call_args: command
0758     | args opt_block_arg { $$ = update_list($1, $2); }
0759     | assocs opt_block_arg
0760     {
0761         struct Node *aux = alloc_node(token_hash, $1, NULL);
0762         $$ = update_list(aux, $2);
0763     }
0764     | args ',' assocs opt_block_arg
0765     {
0766         struct Node *aux = alloc_node(token_hash, $3, NULL);
0767         struct Node *n = update_list(aux, $4);
0768         $$ = concat_list($1, n);
0769     }
0770     | block_arg
0771 ;
0772
0773 command_args:
0774     {
0775         $<num>$ = parser->cmdarg_stack;
0776         CMDARG_PUSH(1);
0777     } call_args
0778     {
0779         parser->cmdarg_stack = $<num>$;
0780         $$ = $2;
0781     }
0782 ;
0783
0784 block_arg: tAMPER arg { $$ = $2; }
0785 ;
0786
0787 opt_block_arg: ',' block_arg { $$ = $2; }
0788     | ',' { $$ = NULL; }
0789     | none
0790 ;
0791
0792 args: arg
0793     | tSTAR arg             { $$ = $2; }
0794     | args ',' arg          { $$ = update_list($1, $3); }
0795     | args ',' tSTAR arg    { $$ = update_list($1, $4); }
0796 ;
0797
0798 mrhs: args ',' arg          { $$ = update_list($1, $3); }
0799     | args ',' tSTAR arg    { $$ = update_list($1, $4); }
0800     | tSTAR arg             { $$ = $2; }
0801 ;
0802
0803 primary: literal
0804     | strings
0805     | variable
0806     | backref
0807     | tBEGIN bodystmt tEND      { $$ = alloc_node(token_begin, $2, NULL); }
0808     | tLPAREN_ARG expr { lex_state = EXPR_ENDARG; } rparen   { $$ = $2; }
0809     | tLPAREN compstmt ')'      { $$ = $2; }
0810     | primary tCOLON2 const
0811     {
0812         struct Node *aux = update_list($1, $3);
0813         $$ = alloc_node(token_method_call, aux, NULL);
0814     }
0815     | tCOLON3 const             { $$ = $2; }
0816     | ARRAY                     { $$ = alloc_node(token_array, NULL, NULL); }
0817     | tLBRACKET aref_args ']'   { $$ = alloc_node(token_array, $2, NULL); }
0818     | tLBRACE assoc_list '}'
0819     {
0820         $$ = alloc_node(token_hash, $2, NULL);
0821         discard_pos();
0822     }
0823     | tRETURN                   { $$ = alloc_node(token_return, NULL, NULL); }
0824     | tYIELD '(' call_args rparen { $$ = alloc_node(token_yield, $3, NULL); }
0825     | tYIELD '(' rparen         { $$ = alloc_node(token_yield, NULL, NULL); }
0826     | tYIELD                    { $$ = alloc_node(token_yield, NULL, NULL); }
0827     | tDEFINED opt_eol '(' expr rparen
0828     {
0829         $$ = alloc_node(token_defined, $4, NULL);
0830     }
0831     | tKWNOT '(' expr rparen    { $$ = alloc_node(token_kw_not, $3, NULL); }
0832     | tKWNOT '(' rparen         { $$ = alloc_node(token_kw_not, NULL, NULL); }
0833     | operation brace_block     { $$ = alloc_cond(token_method_call, $2, $1, NULL); }
0834     | method_call opt_brace_block
0835     {
0836         $$ = $1;
0837         $$->cond = $2;
0838     }
0839     | tLAMBDA lambda    { $$ = alloc_cond(token_method_call, $2, NULL, NULL); }
0840     | tIF expr then compstmt if_tail tEND
0841     {
0842         $$ = alloc_cond(token_if, $2, $4, $5);
0843         discard_pos(); /* tEND */
0844     }
0845     | tUNLESS expr then compstmt opt_else tEND
0846     {
0847         $$ = alloc_cond(token_unless, $2, $4, $5);
0848         discard_pos(); /* tEND */
0849     }
0850     | tWHILE { COND_PUSH(1); } expr do { COND_POP(); } compstmt tEND
0851     {
0852         $$ = alloc_cond(token_while, $3, $6, NULL);
0853         discard_pos(); /* tEND */
0854     }
0855     | tUNTIL { COND_PUSH(1); } expr do { COND_POP(); } compstmt tEND
0856     {
0857         $$ = alloc_cond(token_while, $3, $6, NULL);
0858         discard_pos(); /* tEND */
0859     }
0860     | tCASE expr opt_terms case_body tEND
0861     {
0862         $$ = alloc_cond(token_case, $2, $4, NULL);
0863         discard_pos(); /* tEND */
0864     }
0865     | tCASE opt_terms case_body tEND
0866     {
0867         $$ = alloc_node(token_case, $3, NULL);
0868         discard_pos(); /* tEND */
0869     }
0870     | tFOR for_var tIN { COND_PUSH(1); } expr do { COND_POP(); } compstmt tEND
0871     {
0872         $$ = alloc_cond(token_for, $5, $8, $2);
0873         discard_pos(); /* tEND */
0874     }
0875     | tCLASS cpath superclass
0876     {
0877         if (parser->in_def)
0878             yyerror(parser, "class definition in method body");
0879     }
0880     bodystmt tEND
0881     {
0882         $$ = alloc_cond(token_class, $3, $5, $2);
0883         pop_comment(parser, $$);
0884     }
0885     | tCLASS opt_terms tLSHIFT expr term bodystmt tEND
0886     {
0887         $$ = alloc_node(token_singleton_class, $6, $4);
0888         pop_comment(parser, $$);
0889     }
0890     | tMODULE cpath
0891     {
0892         if (parser->in_def)
0893             yyerror(parser, "module definition in method body");
0894     }
0895     bodystmt tEND
0896     {
0897         $$ = alloc_node(token_module, $4, $2);
0898         pop_comment(parser, $$);
0899     }
0900     | tDEF fname
0901     {
0902         parser->in_def++;
0903     }
0904     f_arglist bodystmt tEND
0905     {
0906         parser->in_def--;
0907         $$ = alloc_cond(token_function, $2, $5, $4);
0908         pop_comment(parser, $$);
0909     }
0910     | tDEF singleton dot_or_colon { lex_state = EXPR_FNAME; } fname
0911     {
0912         lex_state = EXPR_ENDFN;
0913         parser->in_def++;
0914     }
0915     f_arglist bodystmt tEND
0916     {
0917         $$ = alloc_node(token_object, $2, $5);
0918         $$ = alloc_cond(token_function, $$, $8, $7);
0919         $$->flags = 1; /* Class method */
0920         pop_comment(parser, $$);
0921         parser->in_def--;
0922     }
0923     | tBREAK    { $$ = alloc_node(token_break, NULL, NULL);    }
0924     | tNEXT     { $$ = alloc_node(token_next, NULL, NULL);     }
0925     | tREDO     { $$ = alloc_node(token_redo, NULL, NULL);     }
0926     | tRETRY    { $$ = alloc_node(token_retry, NULL, NULL);    }
0927 ;
0928
0929 then: term
0930     | tTHEN
0931     | term tTHEN
0932 ;
0933
0934 do: term | tDO_COND
0935 ;
0936
0937 if_tail: opt_else
0938     | tELSIF expr then compstmt if_tail
0939     {
0940         $$ = alloc_cond(token_if, $2, $4, $5);
0941     }
0942 ;
0943
0944 opt_else: none
0945     | tELSE compstmt    { $$ = alloc_node(token_if, $2, NULL);  }
0946 ;
0947
0948 for_var: lhs | mlhs
0949 ;
0950
0951 f_marg: f_norm_arg              { $$ = $1; }
0952     | tLPAREN f_margs rparen    { $$ = $2; }
0953 ;
0954
0955 f_marg_list: f_marg
0956     | f_marg_list ',' f_marg { $$ = update_list($1, $3); }
0957 ;
0958
0959 f_margs: f_marg_list { $$ = $1; }
0960     | f_marg_list ',' tSTAR f_norm_arg { $$ = update_list($1, $4); }
0961     | f_marg_list ',' tSTAR f_norm_arg ',' f_marg_list
0962     {
0963         $$ = concat_list($1, update_list($4, $6));
0964     }
0965     | f_marg_list ',' tSTAR
0966     {
0967         struct Node *n = alloc_node(token_object, NULL, NULL);
0968         $$ = update_list($1, n);
0969     }
0970     | f_marg_list ',' tSTAR ',' f_marg_list
0971     {
0972         struct Node *n = alloc_node(token_object, NULL, NULL);
0973         $$ = concat_list($1, update_list(n, $5));
0974     }
0975     | tSTAR f_norm_arg { $$ = $2; }
0976     | tSTAR f_norm_arg ',' f_marg_list { $$ = update_list($2, $4); }
0977     | tSTAR { $$ = alloc_node(token_object, NULL, NULL); }
0978     | tSTAR ',' f_marg_list
0979     {
0980         struct Node *n = alloc_node(token_object, NULL, NULL);
0981         $$ = update_list(n, $3);
0982     }
0983 ;
0984
0985 block_args_tail: f_block_kwarg ',' f_kwrest opt_f_block_arg
0986     {
0987         $$ = concat_list($1, update_list($3, $4));
0988     }
0989     | f_block_kwarg opt_f_block_arg
0990     {
0991         $$ = update_list($1, $2);
0992     }
0993     | f_kwrest opt_f_block_arg
0994     {
0995         $$ = update_list($1, $2);
0996     }
0997     | f_block_arg { $$ = $1; }
0998 ;
0999
1000 opt_block_args_tail: ',' block_args_tail { $$ = $2; }
1001     | /* none */ { $$ = 0; }
1002 ;
1003
1004 block_param: f_arg ',' f_block_optarg ',' f_rest_arg opt_block_args_tail
1005     {
1006         $$ = concat_list($1, concat_list($3, update_list($5, $6)));
1007     }
1008     | f_arg ',' f_block_optarg ',' f_rest_arg ',' f_arg opt_block_args_tail
1009     {
1010         $$ = concat_list($1, concat_list($3, create_list($5, update_list($7, $8))));
1011     }
1012     | f_arg ',' f_block_optarg opt_block_args_tail
1013     {
1014         $$ = concat_list($1, update_list($3, $4));
1015     }
1016     | f_arg ',' f_block_optarg ',' f_arg opt_block_args_tail
1017     {
1018         $$ = concat_list($1, concat_list($3, update_list($5, $6)));
1019     }
1020     | f_arg ',' f_rest_arg opt_block_args_tail
1021     {
1022         $$ = update_list($1, update_list($3, $4));
1023     }
1024     | f_arg ',' { $$ = $1; }
1025     | f_arg ',' f_rest_arg ',' f_arg opt_block_args_tail
1026     {
1027         $$ = concat_list($1, concat_list($3, update_list($5, $6)));
1028     }
1029     | f_arg opt_block_args_tail { $$ = update_list($1, $2); }
1030     | f_block_optarg ',' f_rest_arg opt_block_args_tail
1031     {
1032         $$ = concat_list($1, update_list($3, $4));
1033     }
1034     | f_block_optarg ',' f_rest_arg ',' f_arg opt_block_args_tail
1035     {
1036         $$ = concat_list($1, create_list($3, update_list($5, $6)));
1037     }
1038     | f_block_optarg opt_block_args_tail { $$ = update_list($1, $2); }
1039     | f_block_optarg ',' f_arg opt_block_args_tail
1040     {
1041         $$ = concat_list($1, update_list($3, $4));
1042     }
1043     | f_rest_arg opt_block_args_tail { $$ = update_list($1, $2); }
1044     | f_rest_arg ',' f_arg opt_block_args_tail
1045     {
1046         $$ = create_list($1, update_list($3, $4));
1047     }
1048     | block_args_tail
1049 ;
1050
1051 opt_block_param: none
1052     | block_param_def
1053     {
1054       command_start = 1;
1055       $$ = $1;
1056     }
1057 ;
1058
1059 block_param_def : '|' opt_bv_decl '|'   { $$ = $2;      }
1060     | tOR                               { $$ = NULL;    }
1061     | '|' block_param opt_bv_decl '|'   { $$ = update_list($2, $3); }
1062 ;
1063
1064 opt_bv_decl: none
1065     | ';' bv_decls
1066     {
1067         if (parser->version < ruby19) {
1068             yywarning("Block local variables are only available in Ruby 1.9.x or higher.");
1069         }
1070         $$ = $2;
1071     }
1072 ;
1073
1074 bv_decls: bvar
1075     | bv_decls ',' bvar { $$ = update_list($1, $3); }
1076 ;
1077
1078 bvar: base
1079     | f_bad_arg
1080     {
1081       $$ = NULL;
1082       free_ast($1);
1083     }
1084 ;
1085
1086 lambda:
1087     {
1088         $<num>$ = parser->lpar_beg;
1089         parser->lpar_beg = ++parser->paren_nest;
1090     }
1091     f_larglist lambda_body
1092     {
1093         parser->lpar_beg = $<num>1;
1094         $$ = alloc_node(token_block, $3, $2);
1095     }
1096 ;
1097
1098 f_larglist: '(' f_args opt_bv_decl rparen { $$ = update_list($2, $3); }
1099     | f_args
1100 ;
1101
1102 lambda_body: tLAMBEG compstmt '}'
1103     {
1104         $$ = $2;
1105         discard_pos(); /* } */
1106         discard_pos(); /* { */
1107     }
1108     | tDO_LAMBDA compstmt tEND
1109     {
1110         $$ = $2;
1111         discard_pos(); /* end */
1112     }
1113 ;
1114
1115 do_block: tDO_BLOCK opt_block_param compstmt tEND
1116     {
1117         $$ = ALLOC_N(token_block, $3, $2);
1118         pop_start(parser, $$);
1119     }
1120 ;
1121
1122 block_call: command do_block { $1->cond = $2; $$ = $1; }
1123     | block_call '.' operation2 opt_paren_args
1124     {
1125         struct Node *aux = update_list($1, $3);
1126         $$ = update_list(aux, $4);
1127     }
1128     | block_call tCOLON2 operation2 opt_paren_args
1129     {
1130         struct Node *aux = update_list($1, $3);
1131         $$ = update_list(aux, $4);
1132     }
1133 ;
1134
1135 method_call: operation paren_args
1136     {
1137         $$ = alloc_node(token_method_call, $1, $2);
1138     }
1139     | primary '.' operation2 opt_paren_args
1140     {
1141         struct Node *aux = update_list($1, $3);
1142         $$ = alloc_node(token_method_call, aux, $4);
1143     }
1144     | primary tCOLON2 operation2 paren_args
1145     {
1146         struct Node *aux = update_list($1, $3);
1147         $$ = alloc_node(token_method_call, aux, $4);
1148     }
1149     | primary tCOLON2 operation3
1150     {
1151         struct Node *aux = update_list($1, $3);
1152         $$ = alloc_node(token_method_call, aux, NULL);
1153     }
1154     | primary '.' paren_args
1155     {
1156         $$ = alloc_node(token_method_call, $1, $3);
1157     }
1158     | primary tCOLON2 paren_args
1159     {
1160         $$ = alloc_node(token_method_call, $1, $3);
1161     }
1162     | super paren_args { $$ = $1; $$->r = $2; }
1163     | super
1164     | primary '[' opt_call_args rbracket
1165     {
1166         $$ = alloc_node(token_array_value, $1, $3);
1167     }
1168 ;
1169
1170 opt_brace_block: none
1171     | brace_block
1172 ;
1173
1174 brace_block: '{' opt_block_param compstmt '}'
1175     {
1176         $$ = ALLOC_N(token_block, $3, $2);
1177         pop_start(parser, $$);
1178     }
1179     | tDO opt_block_param compstmt tEND
1180     {
1181         $$ = ALLOC_N(token_block, $3, $2);
1182         pop_start(parser, $$);
1183     }
1184 ;
1185
1186 case_body: tWHEN args then compstmt cases
1187     {
1188         $$ = alloc_cond(token_when, $2, $4, $5);
1189     }
1190 ;
1191
1192 cases: opt_else | case_body
1193 ;
1194
1195 opt_rescue: tRESCUE rescue_arg then compstmt opt_rescue
1196     {
1197          $$ = alloc_node(token_rescue, $2, $4);
1198          $$->ensure = $5;
1199     }
1200     | none
1201 ;
1202
1203 rescue_arg: exc_list exc_var
1204     {
1205         $$ = ($1 || $2) ? alloc_node(token_rescue_arg, $1, $2) : NULL;
1206     }
1207 ;
1208
1209 exc_list: arg | mrhs | none
1210 ;
1211
1212 exc_var: none | tASSOC lhs { $$ = $2; }
1213 ;
1214
1215 opt_ensure: none
1216     | tENSURE compstmt  { $$ = alloc_node(token_ensure, $2, NULL); }
1217 ;
1218
1219 literal: numeric | symbol
1220 ;
1221
1222 strings: string         { $$ = $1; }
1223     | strings string    { $$ = update_list($1, $2); }
1224 ;
1225
1226 string: tCHAR
1227     {
1228         $$ = alloc_node(token_string, NULL, NULL);
1229     }
1230     | tSTRING_BEG string_contents tSTRING_END
1231     {
1232         $$ = alloc_node(lex_strterm->token, $2, NULL);
1233         if (lex_strterm->word) {
1234             free(lex_strterm->word);
1235             lex_strterm->word = NULL;
1236         }
1237         free(lex_strterm);
1238         lex_strterm = NULL;
1239     }
1240 ;
1241
1242 string_contents: /* none */ { $$ = 0; }
1243     | string_contents string_content
1244     {
1245         if ($1 != NULL)
1246             $$ = update_list($1, $2);
1247         else
1248             $$ = $2;
1249     }
1250 ;
1251
1252 string_content: tSTRING_CONTENT { $$ = 0; }
1253     | tSTRING_DBEG
1254     {
1255         lex_state = EXPR_BEG;
1256         $<num>$ = parser->cond_stack;
1257     }
1258     {
1259         $<term>$ = lex_strterm;
1260         lex_strterm = NULL;
1261     }
1262     compstmt '}'
1263     {
1264         parser->cond_stack = $<num>2;
1265         lex_strterm = $<term>3;
1266         $$ = $4;
1267         discard_pos(); /* } */
1268     }
1269     | tSTRING_DVAR
1270     {
1271         $<term>$ = lex_strterm;
1272         lex_strterm = NULL;
1273         lex_state = EXPR_BEG;
1274     }
1275     string_dvar
1276     {
1277         lex_strterm = $<term>2;
1278         $$ = $3;
1279     }
1280 ;
1281
1282 string_dvar: backref
1283     | GLOBAL    { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = global; POP_STACK; }
1284     | IVAR      { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = ivar; POP_STACK; }
1285     | CVAR      { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = cvar; POP_STACK; }
1286 ;
1287
1288 symbol: tSYMBEG sym
1289     {
1290         $$ = $2;
1291         $$->kind = token_symbol;
1292         $$->pos.start_col--;
1293     }
1294 ;
1295
1296 sym: fname
1297     | strings
1298     | GLOBAL    { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = global; POP_STACK; }
1299     | IVAR      { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = ivar; POP_STACK; }
1300     | CVAR      { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = cvar; POP_STACK; }
1301 ;
1302
1303 numeric: simple_numeric
1304     | tUMINUS_NUM simple_numeric   %prec tLOWEST
1305     {
1306         $$ = alloc_node(token_unary_minus, $2, NULL);
1307     }
1308 ;
1309
1310 simple_numeric: tINTEGER        { $$ = alloc_node(token_numeric, NULL, NULL); $$->flags = int_l; }
1311     | tFLOAT                    { $$ = alloc_node(token_numeric, NULL, NULL); $$->flags = float_l; }
1312     | tRATIONAL
1313     {
1314         if (parser->version < ruby21) {
1315             yywarning("Rational literals are only available in Ruby 2.1.x or higher.");
1316         }
1317         $$ = alloc_node(token_numeric, NULL, NULL); $$->flags = rational_l;
1318     }
1319     | tIMAGINARY
1320     {
1321         if (parser->version < ruby21) {
1322             yywarning("Imaginary literals are only available in Ruby 2.1.x or higher.");
1323         }
1324         $$ = alloc_node(token_numeric, NULL, NULL); $$->flags = imaginary_l;
1325     }
1326 ;
1327
1328 variable: base
1329     | GLOBAL    { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = global; POP_STACK; }
1330     | IVAR      { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = ivar; POP_STACK; }
1331     | CVAR      { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = cvar; POP_STACK; }
1332     | const
1333     | other_vars
1334 ;
1335
1336 other_vars: tNIL    { $$ = alloc_node(token_nil, NULL, NULL);      }
1337     | tSELF         { $$ = alloc_node(token_self, NULL, NULL);     }
1338     | tTRUE         { $$ = alloc_node(token_true, NULL, NULL);     }
1339     | tFALSE        { $$ = alloc_node(token_false, NULL, NULL);    }
1340     | tFILE         { $$ = alloc_node(token_file, NULL, NULL);     }
1341     | tLINE         { $$ = alloc_node(token_line, NULL, NULL);     }
1342     | tENCODING     { $$ = alloc_node(token_encoding, NULL, NULL); }
1343 ;
1344
1345 backref: tNTH_REF   { $$ = ALLOC_N(token_object, NULL, NULL); POP_STACK; }
1346 ;
1347
1348 superclass: term { $$ = NULL; }
1349     | '<'
1350     {
1351         lex_state = EXPR_BEG;
1352         command_start = 1;
1353     }
1354     expr term
1355     {
1356         $$ = $3;
1357     }
1358     | error term { yyerrok; $$ = NULL; }
1359 ;
1360
1361 f_arglist: '(' f_args rparen
1362     {
1363         $$ = $2;
1364         lex_state = EXPR_BEG;
1365         command_start = 1;
1366     }
1367     | f_args term
1368     {
1369         $$ = $1;
1370         lex_state = EXPR_BEG;
1371         command_start = 1;
1372     }
1373 ;
1374
1375 args_tail: f_kwarg ',' f_kwrest opt_f_block_arg
1376     {
1377         if (parser->version < ruby20) {
1378             yywarning("Keyword arguments are only available in Ruby 2.0.x or higher.");
1379         }
1380         $$ = concat_list($1, update_list($3, $4));
1381     }
1382     | f_kwarg opt_f_block_arg
1383     {
1384         if (parser->version < ruby20) {
1385             yywarning("Keyword arguments are only available in Ruby 2.0.x or higher.");
1386         }
1387         $$ = update_list($1, $2);
1388     }
1389     | f_kwrest opt_f_block_arg
1390     {
1391         if (parser->version < ruby20) {
1392             yywarning("Keyword arguments are only available in Ruby 2.0.x or higher.");
1393         }
1394         $$ = update_list($1, $2);
1395     }
1396     | f_block_arg
1397     {
1398         $$ = $1;
1399     }
1400 ;
1401
1402 opt_args_tail: ',' args_tail    { $$ = $2; }
1403     | /* none */                { $$ = 0;  }
1404 ;
1405
1406 f_args: f_arg ',' f_optarg ',' f_rest_arg opt_args_tail
1407     {
1408         $$ = concat_list($1, concat_list($3, concat_list($5, $6)));
1409     }
1410     | f_arg ',' f_optarg ',' f_rest_arg ',' f_arg opt_args_tail
1411     {
1412         $$ = concat_list($1, concat_list($3, create_list($5, concat_list($7, $8))));
1413     }
1414     | f_arg ',' f_optarg opt_args_tail
1415     {
1416         $$ = concat_list($1, concat_list($3, $4));
1417     }
1418     | f_arg ',' f_optarg ',' f_arg opt_args_tail
1419     {
1420         $$ = concat_list($1, concat_list($3, concat_list($5, $6)));
1421     }
1422     | f_arg ',' f_rest_arg opt_args_tail
1423     {
1424         $$ = concat_list($1, concat_list($3, $4));
1425     }
1426     | f_arg ',' f_rest_arg ',' f_arg opt_args_tail
1427     {
1428         $$ = concat_list($1, concat_list($3, concat_list($5, $6)));
1429     }
1430     | f_arg opt_args_tail
1431     {
1432         $$ = concat_list($1, $2);
1433     }
1434     | f_optarg ',' f_rest_arg opt_args_tail
1435     {
1436         $$ = concat_list($1, concat_list($3, $4));
1437     }
1438     | f_optarg ',' f_rest_arg ',' f_arg opt_args_tail
1439     {
1440         $$ = concat_list($1, create_list($3, concat_list($5, $6)));
1441     }
1442     | f_optarg opt_args_tail
1443     {
1444         $$ = concat_list($1, $2);
1445     }
1446     | f_optarg ',' f_arg opt_args_tail
1447     {
1448         $$ = concat_list($1, concat_list($3, $4));
1449     }
1450     | f_rest_arg opt_args_tail
1451     {
1452         $$ = concat_list($1, $2);
1453     }
1454     | f_rest_arg ',' f_arg opt_args_tail
1455     {
1456         $$ = create_list($1, concat_list($3, $4));
1457     }
1458     | args_tail
1459     | none
1460 ;
1461
1462 f_bad_arg: CONST    { yyerror(parser, "formal argument cannot be a constant"); $$ = 0;             }
1463     | IVAR          { yyerror(parser, "formal argument cannot be an instance variable"); $$ = 0;   }
1464     | GLOBAL        { yyerror(parser, "formal argument cannot be a global variable"); $$ = 0;      }
1465     | CVAR          { yyerror(parser, "formal argument cannot be a class variable"); $$ = 0;       }
1466 ;
1467
1468 f_norm_arg: f_bad_arg | base
1469 ;
1470
1471 f_arg_item: f_norm_arg
1472     | tLPAREN f_margs rparen { $$ = $2; }
1473 ;
1474
1475 f_arg: f_arg_item
1476     | f_arg ',' f_arg_item { $$ = concat_list($1, $3); }
1477 ;
1478
1479 f_kw: label arg
1480     {
1481         $$ = alloc_node(token_object, $1, $2);
1482         $$->flags = label;
1483     }
1484 ;
1485
1486 f_block_kw: label primary
1487     {
1488         $$ = alloc_node(token_object, $1, $2);
1489         $$->flags = label;
1490     }
1491 ;
1492
1493 f_block_kwarg: f_block_kw               { $$ = $1; }
1494     | f_block_kwarg ',' f_block_kw      { $$ = update_list($1, $3); }
1495 ;
1496
1497 f_kwarg: f_kw           { $$ = $1; }
1498     | f_kwarg ',' f_kw  { $$ = update_list($1, $3); }
1499 ;
1500
1501 kwrest_mark: tPOW | tDSTAR
1502 ;
1503
1504 f_kwrest: kwrest_mark base
1505     {
1506         $$ = $2;
1507         $$->flags = kwrest;
1508     }
1509     | kwrest_mark
1510     {
1511         $$ = alloc_node(token_object, NULL, NULL);
1512         $$->flags = kwrest;
1513     }
1514 ;
1515
1516 f_opt: base '='
1517     {
1518         $<num>$ = parser->column;
1519     }
1520     arg
1521     {
1522         $$ = alloc_node(token_assign, $1, $4);
1523         $1->flags = opt; /* TODO: not sure about this */
1524         $4->pos.start_col = $<num>3;
1525         $4->pos.end_col = parser->column;
1526         $4->pos.offset = parser->lex_prev - parser->blob;
1527     }
1528 ;
1529
1530 f_block_opt: base '=' primary { $$ = alloc_node(token_assign, $1, $3); }
1531 ;
1532
1533 f_block_optarg: f_block_opt
1534     | f_block_optarg ',' f_block_opt { $$ = update_list($1, $3); }
1535 ;
1536
1537 f_optarg: f_opt
1538     | f_optarg ',' f_opt { $$ = update_list($1, $3); }
1539 ;
1540
1541 restarg_mark: '*' | tSTAR
1542 ;
1543
1544 f_rest_arg: restarg_mark base { $$ = $2; $$->flags = kwrest; }
1545     | restarg_mark { $$ = alloc_node(token_object, NULL, NULL); $$->flags = kwrest; }
1546 ;
1547
1548 blkarg_mark: '&' | tAMPER
1549 ;
1550
1551 f_block_arg: blkarg_mark base { $$ = $2; $$->flags = block; }
1552 ;
1553
1554 opt_f_block_arg : ',' f_block_arg { $$ = $2; }
1555     | none
1556 ;
1557
1558 singleton: variable { $$ = $1; }
1559     | '(' { lex_state = EXPR_BEG; } expr rparen
1560     {
1561         if ($3 == 0)
1562             yyerror(parser, "can't define singleton method for ().");
1563         else {
1564             switch ($3->kind) {
1565                 case token_string:
1566                 case token_regexp:
1567                 case token_numeric:
1568                 case token_symbol:
1569                 case token_array:
1570                     yyerror(parser, "can't define singleton method for literals");
1571             }
1572         }
1573         $$ = $3;
1574     }
1575 ;
1576
1577 const: CONST { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = constant; POP_STACK; }
1578 ;
1579
1580 base: BASE { $$ = ALLOC_N(token_object, NULL, NULL); $$->flags = var; POP_STACK; }
1581 ;
1582
1583 assoc_list: none
1584     | assocs trailer { $$ = $1; }
1585 ;
1586
1587 assocs: assoc
1588     | assocs ',' assoc { $$ = update_list($1, $3); }
1589 ;
1590
1591 assoc: arg tASSOC arg
1592     {
1593         $$ = alloc_node(token_object, $1, $3);
1594     }
1595     | label arg
1596     {
1597         if (parser->version < ruby19) {
1598             yywarning("This syntax is only available in Ruby 1.9.x or higher.");
1599         }
1600         $$ = alloc_node(token_object, $1, $2);
1601     }
1602     | tDSTAR arg
1603     {
1604         if (parser->version < ruby20) {
1605             yywarning("tDSTAR token is only available in Ruby 2.0.x or higher.");
1606         }
1607         $$ = $2;
1608     }
1609 ;
1610
1611 operation: base | const
1612 ;
1613
1614 operation2: base
1615     | const
1616     | op
1617     {
1618         $$ = alloc_node(token_object, NULL, NULL);
1619         $$->name = parser->aux;
1620     }
1621 ;
1622
1623 operation3: base
1624     | op
1625     {
1626         $$ = alloc_node(token_object, NULL, NULL);
1627         $$->name = parser->aux;
1628     }
1629 ;
1630
1631 label: tKEY     { $$ = ALLOC_N(token_symbol, NULL, NULL); POP_STACK; }
1632 ;
1633
1634 super: tSUPER   { $$ = alloc_node(token_super, NULL, NULL); }
1635 ;
1636
1637 dot_or_colon: '.' | tCOLON2
1638 ;
1639
1640 opt_terms: /* none */ | terms
1641 ;
1642
1643 opt_eol: /* none */ | '\n'
1644 ;
1645
1646 rparen: opt_eol ')'
1647 ;
1648
1649 rbracket: opt_eol ']'
1650 ;
1651
1652 trailer: opt_eol | ','
1653 ;
1654
1655 term: ';' {yyerrok;} | '\n'
1656 ;
1657
1658 terms: term | terms ';' {yyerrok;}
1659 ;
1660
1661 none: /* none */ { $$ = NULL; }
1662 ;
1663
1664 %%
1665 #undef parser
1666 #undef yylex
1667
1668 #include <ctype.h>
1669 #include "hash.c"
1670
1671
1672 /* Let's define some useful macros :D */
1673
1674 #define _unused_(c) (void) c;
1675 #define multiline_comment(c) (*(c+1) == 'b' && *(c+2) == 'e' && *(c+3) == 'g' && *(c+4) == 'i' && *(c+5) == 'n')
1676 #define multiline_end(c) (*c == '=' && *(c+1) == 'e' && *(c+2) == 'n' && *(c+3) == 'd')
1677 #define not_sep(c) (is_valid_identifier(c) || is_utf8_digit(c) || *c == '_')
1678 #define is_blank(c) (c == ' ' || c == '\t')
1679 #define SWAP(a, b, aux) { aux = a; a = b; b = aux; }
1680 #define is_special_method(buffer) ((strlen(buffer) > 4) && buffer[0] == '_' && \
1681                                                                 buffer[1] == '_' && buffer[strlen(buffer) - 2] == '_' && \
1682                                                                 buffer[strlen(buffer) - 1] == '_')
1683 #define IS_EOF() ((unsigned int) (parser->lex_p - parser->blob) >= parser->length)
1684 #define IS_ARG() IS_lex_state(EXPR_ARG_ANY)
1685 #define IS_END() IS_lex_state(EXPR_END_ANY)
1686 #define IS_BEG() IS_lex_state(EXPR_BEG_ANY)
1687 #define IS_SPCARG(c) (IS_ARG() && space_seen && !isspace(c))
1688 #define IS_LABEL_POSSIBLE() ((IS_lex_state(EXPR_BEG | EXPR_ENDFN) && !cmd_state) || IS_ARG())
1689 #define IS_LABEL_SUFFIX() (*parser->lex_p == ':' && *(parser->lex_p + 1) != ':')
1690 #define IS_AFTER_OPERATOR() IS_lex_state(EXPR_FNAME | EXPR_DOT)
1691
1692
1693 /* Initialize the parser */
1694 static void init_parser(struct parser_t *parser)
1695 {
1696     parser->content_given = 0;
1697     parser->ast = NULL;
1698     parser->blob = NULL;
1699     parser->lex_p = NULL;
1700     parser->lex_prev = NULL;
1701     parser->lex_prevc = 0;
1702     parser->lex_pend = NULL;
1703     parser->line_pend = 0;
1704     parser->column_pend = 0;
1705     parser->here_found = 0;
1706     parser->eof_reached = 0;
1707     parser->cond_stack = 0;
1708     parser->cmdarg_stack = 0;
1709     parser->in_def = 0;
1710     parser->lpar_beg = 0;
1711     parser->paren_nest = 0;
1712     parser->sp = 0;
1713     parser->line = 1;
1714     parser->column = 0;
1715     parser->pos_stack = (struct pos_t *) malloc(SSIZE * sizeof(struct pos_t));
1716     parser->stack_scale = 0;
1717     parser->pos_size = 0;
1718     parser->errors = NULL;
1719     parser->last_error = NULL;
1720     parser->warning = 0;
1721     parser->unrecoverable = 0;
1722     parser->last_comment.comment = NULL;
1723     parser->last_comment.line = 0;
1724     parser->comment_index = 0;
1725     command_start = 1;
1726     lex_strterm = NULL;
1727     lex_state = EXPR_BEG;
1728 }
1729
1730 /* Free the parser */
1731 static void free_parser(struct parser_t *parser)
1732 {
1733     int index;
1734
1735     for (index = 0; index < parser->sp; index++)
1736         free(parser->stack[index]);
1737     if (parser->pos_stack != NULL)
1738         free(parser->pos_stack);
1739     if (lex_strterm && lex_strterm->word)
1740         free(lex_strterm->word);
1741     if (parser->last_comment.comment)
1742       free(parser->last_comment.comment);
1743     if (!parser->content_given)
1744         free(parser->blob);
1745 }
1746
1747 /* Read the file's source code and allocate it for further inspection. */
1748 static int retrieve_source(struct parser_t *p, const char *path)
1749 {
1750     int length;
1751
1752     /* Open specified file */
1753     FILE *fd = fopen(path, "r");
1754     if (!fd) {
1755         fprintf(stderr, "Cannot open file: %s\n", path);
1756         return 0;
1757     }
1758
1759     fseek(fd, 0, SEEK_END);
1760     length = ftell(fd);
1761     fseek(fd, 0, SEEK_SET);
1762
1763     if (!length)
1764         return 0;
1765     p->blob = (char *) malloc(sizeof(char) * length);
1766
1767     if (!p->blob) {
1768         fprintf(stderr, "Cannot store contents\n");
1769         return 0;
1770     }
1771     fread(p->blob, length, 1, fd);
1772     if (ferror(fd)) {
1773         fprintf(stderr, "Reading error\n");
1774         return 0;
1775     }
1776     p->length = length;
1777     p->lex_p = p->blob;
1778     fclose(fd);
1779     return 1;
1780 }
1781
1782 /*
1783  * Some macros to make easier the UTF-8 support
1784  */
1785 #define is_utf(c) ((c & 0xC0) != 0x80)
1786 #define is_special(c) (utf8_charsize(c) > 1)
1787 #define is_identchar(c) (is_utf8_alnum(c) || *c == '_')
1788
1789 /*
1790  * This function is really simple. It steps over a char of
1791  * the string s, that is encoded in UTF-8. The result varies on the
1792  * number of bytes that encodes a single character following the UTF-8
1793  * rules. Therefore, this function will return 1 if the character
1794  * is in plain-ASCII, and greater than 1 otherwise.
1795  */
1796 static int utf8_charsize(const char *s)
1797 {
1798     int size = 0;
1799     int i = 0;
1800
1801     do {
1802         i++;
1803         size++;
1804     } while (s[i] && !is_utf(s[i]));
1805     return size;
1806 }
1807
1808 static int is_utf8_alpha(const char *str)
1809 {
1810     return is_special(str) ? 1 : isalpha(*str);
1811 }
1812
1813 static int is_utf8_alnum(const char *str)
1814 {
1815     return is_special(str) ? 1 : isalnum(*str);
1816 }
1817
1818 static int is_utf8_graph(const char *str)
1819 {
1820     return is_special(str) ? 1 : isgraph(*str);
1821 }
1822
1823 static int is_utf8_digit(const char *str)
1824 {
1825     return is_special(str) ? 0 : isdigit(*str);
1826 }
1827
1828 /* Check that the given parameter points to a valid identifier */
1829 static int is_valid_identifier(const char *c)
1830 {
1831     if (is_utf8_alpha(c))
1832         return 1;
1833     else if (*c == '$' && is_utf8_graph(c + 1) && !is_utf8_digit(c + 1))
1834         return 1;
1835     else if ((*c == '_' || *c == '@') && is_utf8_alpha(c + 1))
1836         return 1;
1837     else if (*c == '@' && *(c + 1) == '@' && (is_utf8_alpha(c + 2) || *(c + 2) == '_'))
1838         return 1;
1839     return 0;
1840 }
1841
1842 /* Get the next character and move the lexer forward. */
1843 static int parser_nextc(struct parser_t *parser)
1844 {
1845     int c;
1846
1847     if (parser->eof_reached || IS_EOF())
1848         return -1;
1849
1850     parser->lex_prev = parser->lex_p;
1851     parser->lex_prevc = parser->column;
1852     c = (unsigned char) *parser->lex_p++;
1853     if (c == '\n') {
1854         if (parser->here_found) {
1855             parser->line = parser->line_pend;
1856             parser->column = parser->column_pend;
1857             parser->lex_p = parser->lex_pend + 1;
1858             parser->here_found = 0;
1859         }
1860         parser->line++;
1861         parser->column = -1;
1862     }
1863     parser->column++;
1864     return c;
1865 }
1866 #define nextc() parser_nextc(parser)
1867
1868 /* Move the lexer backwards. */
1869 static void parser_pushback(struct parser_t *parser)
1870 {
1871     parser->column--;
1872     parser->lex_p--;
1873     if (*parser->lex_p == '\n') {
1874         parser->line--;
1875         parser->column = parser->lex_prevc;
1876     }
1877 }
1878 #define pushback() parser_pushback(parser)
1879
1880 /* It parses a heredoc identifier and sets a new lex_strterm */
1881 static int parse_heredoc_identifier(struct parser_t *parser)
1882 {
1883     char *buffer, *ptr;
1884     int count = SSIZE, scale = 0;
1885     char c = nextc();
1886     unsigned char quote_seen = 0, term = ' ';
1887     unsigned char dash_seen = 0;
1888
1889     /* Check for <<- case */
1890     if (c == '-') {
1891         dash_seen = 1;
1892         c = nextc();
1893     }
1894     /* And now surrounding quotes */
1895     if (c == '\'' || c == '"' || c == '`') {
1896         term = c;
1897         c = nextc();
1898         quote_seen = 1;
1899     }
1900     if (!quote_seen && !is_identchar(parser->lex_prev)) {
1901         if (dash_seen)
1902             pushback();
1903         return 0;
1904     }
1905
1906     buffer = (char *) malloc(SSIZE * sizeof(char));
1907     ptr = buffer;
1908     for (;;) {
1909         /* If quote was seen, anything except the term is accepted */
1910         if (quote_seen) {
1911             if (c == term || !is_utf8_graph(parser->lex_prev))
1912                 break;
1913         } else if (!is_identchar(parser->lex_prev))
1914             break;
1915         if (!count) {
1916             scale++;
1917             buffer = (char *) realloc(buffer, (SSIZE << scale) * sizeof(char));
1918         }
1919         *ptr++ = c;
1920         c = nextc();
1921         if (c < 0) {
1922             free(buffer);
1923             yyerror(parser, "unterminated here document identifier");
1924             return 0;
1925         }
1926     }
1927     *ptr = '\0';
1928     pushback();
1929
1930     lex_strterm = (struct term_t *) malloc(sizeof(struct term_t));
1931     lex_strterm->term = 1;
1932     lex_strterm->can_embed = dash_seen;
1933     lex_strterm->word = buffer;
1934     lex_strterm->token = token_heredoc;
1935     lex_strterm->nestable = 0;
1936     lex_strterm->paren = 0;
1937     parser->lex_pend = parser->lex_p + quote_seen;
1938     parser->line_pend = parser->line;
1939     parser->column_pend = parser->column;
1940     return 1;
1941 }
1942
1943 /* Let's parse a heredoc */
1944 static int parse_heredoc(struct parser_t *parser)
1945 {
1946     int length = strlen(lex_strterm->word);
1947     char *aux = (char*)alloca(length);
1948     char c = nextc();
1949     int i = 0;
1950     int ax = 0;
1951
1952     /* Skip until next line */
1953     while (c != -1 && c != '\n')
1954         c = nextc();
1955
1956     do {
1957         c = nextc();
1958
1959         /* Ignore initial spaces if dash seen */
1960         if (i == 0 && lex_strterm->can_embed)
1961             while (isspace(c) || c == '\n')
1962                 c = nextc();
1963         if (c == '#' && *(parser->lex_prev - 1) != '\\') {
1964             c = nextc();
1965             switch (c) {
1966                 case '$': case '@':
1967                     parser->column -= ax;
1968                     pushback();
1969                     return tSTRING_DVAR;
1970                 case '{':
1971                     parser->column -= ax;
1972                     command_start = 1;
1973                     return tSTRING_DBEG;
1974             }
1975         }
1976         aux[i] = c;
1977         if (c == '\n') {
1978             if ((length == i) && !strncmp(lex_strterm->word, aux, i)) {
1979                 pushback();
1980                 return tSTRING_END;
1981             }
1982             i = -1;
1983         } else
1984             ax += utf8_charsize(parser->lex_prev) - 1;
1985         if (i >= length)
1986             i = -1;
1987         i++;
1988     } while (c != -1);
1989
1990     parser->eof_reached = 1;
1991     if (lex_strterm->word) {
1992         free(lex_strterm->word);
1993         lex_strterm->word = NULL;
1994     }
1995     free(lex_strterm);
1996     lex_strterm = NULL;
1997     return token_invalid;
1998 }
1999
2000 /* Return what's the char that closes c */
2001 static char closing_char(char c)
2002 {
2003     switch (c) {
2004         case '[': return ']';
2005         case '(': return ')';
2006         case '<': return '>';
2007         case '{': return '}';
2008         default: return c;
2009     }
2010 }
2011
2012 /* Guess the token kind of the shortcut based on the given character */
2013 static int guess_kind(struct parser_t *parser, char c)
2014 {
2015     if (!isalpha(c))
2016         return token_string;
2017
2018     switch (c) {
2019         case 'Q': case 'q': case 'x': return token_string;
2020         case 'I': case 'i':
2021             if (parser->version < ruby20) {
2022                 yywarning("This shortcut is only available in Ruby 2.0.x or higher.");
2023             }
2024         case 'W': case 'w': return token_array;
2025         case 's': return token_symbol;
2026         case 'r': return token_regexp;
2027         default:
2028             yyerror(parser, "unknown type of %string");
2029             return 0;
2030     }
2031 }
2032
2033 /* Push name to the stack */
2034 static void push_stack(struct parser_t *parser, const char *buf)
2035 {
2036     parser->stack[parser->sp] = strdup(buf);
2037     parser->sp++;
2038 }
2039
2040 /* Pop name from the stack. */
2041 static void pop_stack(struct parser_t *parser, struct Node *n)
2042 {
2043     if (n != NULL)
2044         n->name = parser->stack[0];
2045     parser->stack[0] = parser->stack[1];
2046     parser->stack[1] = NULL;
2047     parser->sp--;
2048 }
2049
2050 /* Push a position into the stack of positions */
2051 static void push_pos(struct parser_t *parser, struct pos_t tokp)
2052 {
2053     int scale = SSIZE * parser->stack_scale;
2054
2055     parser->pos_size++;
2056     if (parser->pos_size > SSIZE) {
2057         parser->pos_size = 1;
2058         parser->stack_scale++;
2059         scale += SSIZE;
2060         parser->pos_stack = (struct pos_t *) realloc(parser->pos_stack, scale * sizeof(struct pos_t));
2061     }
2062     parser->pos_stack[parser->pos_size + scale - 1] = tokp;
2063 }
2064
2065 /* Pop a position from the stack of positions and assign to the given node */
2066 static void pop_pos(struct parser_t *parser, struct Node *n)
2067 {
2068     int scale = SSIZE * parser->stack_scale;
2069     int pos = parser->pos_size - 1 + scale;
2070     struct pos_t tokp = parser->pos_stack[pos];
2071
2072     if (n != NULL) {
2073         n->pos.start_line = tokp.start_line;
2074         n->pos.start_col = tokp.start_col;
2075         n->pos.end_line = tokp.end_line;
2076         n->pos.end_col = tokp.end_col;
2077         n->pos.offset = tokp.offset;
2078     }
2079     parser->pos_size--;
2080     if (parser->pos_size == 0 && parser->stack_scale > 0) {
2081         parser->stack_scale--;
2082         parser->pos_size = SSIZE;
2083         scale -= SSIZE;
2084         parser->pos_stack = (struct pos_t *) realloc(parser->pos_stack, scale * sizeof(struct pos_t));
2085     }
2086 }
2087
2088 /* Like pop_pos but it just copies the start position to the given node */
2089 static void pop_start(struct parser_t *parser, struct Node *n)
2090 {
2091     n->pos.start_line = parser->pos_stack[parser->pos_size - 1].start_line;
2092     n->pos.start_col = parser->pos_stack[parser->pos_size - 1].start_col;
2093     pop_pos(parser, NULL);
2094 }
2095
2096 /* Like pop_pos but it just copies the end position to the given node */
2097 static void pop_end(struct parser_t *parser, struct Node *n)
2098 {
2099     n->pos.end_line = parser->pos_stack[parser->pos_size - 1].start_line;
2100     n->pos.end_col = parser->pos_stack[parser->pos_size - 1].start_col;
2101     pop_pos(parser, NULL);
2102 }
2103
2104 /* Push the last comment that we've found to the stack of comments. */
2105 static void push_last_comment(struct parser_t *parser)
2106 {
2107     if ((parser->line - parser->last_comment.line) < 2)
2108         parser->comment_stack[parser->comment_index] = parser->last_comment.comment;
2109     else {
2110         parser->comment_stack[parser->comment_index] = NULL;
2111         if (parser->last_comment.comment)
2112             free(parser->last_comment.comment);
2113     }
2114     parser->comment_index++;
2115     parser->last_comment.comment = NULL;
2116 }
2117
2118 /* Pop a comment from the stack of comments and assign it to the given node */
2119 static void pop_comment(struct parser_t *parser, struct Node *n)
2120 {
2121     if (parser->comment_index > 0) {
2122         parser->comment_index--;
2123         n->comment = parser->comment_stack[parser->comment_index];
2124     }
2125 }
2126
2127 #define __check_buffer_size(N) { \
2128   if (count > N) { \
2129     count = 0; \
2130     scale++; \
2131     buffer = (char *) realloc(buffer, scale * 1024); \
2132   } \
2133 }
2134
2135 /* Store the given comment as the last comment seen */
2136 static void store_comment(struct parser_t *parser, char *comment)
2137 {
2138     if (parser->last_comment.comment != NULL)
2139         free(parser->last_comment.comment);
2140     parser->last_comment.comment = comment;
2141     parser->last_comment.line = parser->line;
2142 }
2143
2144 /* Check if the given parameter points to an indented comment */
2145 static int is_indented_comment(struct parser_t *parser)
2146 {
2147     char *c = parser->lex_prev;
2148     char *original = c;
2149
2150     for (; *c == ' ' || *c == '\t'; ++c);
2151     parser->lex_p += (c - original);
2152     parser->column += (c - original);
2153     return (*c == '#');
2154 }
2155
2156 /* Read a comment and store it if possible */
2157 static void set_comment(struct parser_t *parser)
2158 {
2159     int c = ' ', count = 0, scale = 0;
2160     char *buffer = (char *) malloc(LSIZE);
2161
2162     for (;; ++count) {
2163         if (c != '#' && !is_indented_comment(parser))
2164             break;
2165         c = *(parser->lex_p - 1);
2166         while (c == '#' && c != -1)
2167             c = nextc();
2168         if (c != '\n') {
2169             for (; c != -1; count++) {
2170                 __check_buffer_size(1000);
2171                 buffer[count] = c;
2172                 c = nextc();
2173                 if (c == '\n') {
2174                     buffer[++count] = c;
2175                     break;
2176                 }
2177             }
2178         } else
2179             buffer[count] = c;
2180         c = nextc();
2181     }
2182
2183     if (c != -1)
2184         pushback();
2185     buffer[count] = '\0';
2186     store_comment(parser, buffer);
2187 }
2188
2189 /* Parse a string or a regexp */
2190 static int parse_string(struct parser_t *parser)
2191 {
2192     register int c = *parser->lex_p;
2193     int next = *(parser->lex_p + 1);
2194
2195     if (c == '\\' && (next == '\\' || next == lex_strterm->term || next == lex_strterm->paren)) {
2196         parser->lex_p += 2;
2197         parser->column += 2;
2198         return tSTRING_CONTENT;
2199     }
2200
2201     if (c == lex_strterm->term) {
2202         nextc();
2203         if (lex_strterm->nestable) {
2204             lex_strterm->nest--;
2205             if (lex_strterm->nest > 0)
2206                 return tSTRING_CONTENT;
2207         }
2208         return tSTRING_END;
2209     } else if (lex_strterm->nestable && lex_strterm->paren == c) {
2210         lex_strterm->nest++;
2211         nextc();
2212         return tSTRING_CONTENT;
2213     }
2214
2215     if (IS_EOF()) {
2216         parser->eof_reached = 1;
2217         yyerror(parser, "unterminated string meets end of file");
2218         free(lex_strterm);
2219         lex_strterm = NULL;
2220         return token_invalid;
2221     }
2222
2223     if (lex_strterm->can_embed && c == '#' && *(parser->lex_prev) != '\\') {
2224         nextc();
2225         switch (*parser->lex_p) {
2226             case '$': case '@':
2227                 return tSTRING_DVAR;
2228             case '{':
2229                 c = nextc();
2230                 command_start = 1;
2231                 return tSTRING_DBEG;
2232         }
2233         pushback();
2234     }
2235
2236     /* Re-using the next and the c variables */
2237     next = utf8_charsize(parser->lex_p);
2238     c = next - 1;
2239     while (next-- > 0) {
2240         if (nextc() < 0) {
2241             parser->eof_reached = 1;
2242             free(lex_strterm);
2243             lex_strterm = NULL;
2244             return token_invalid;
2245         }
2246     }
2247     parser->column -= c;
2248     return tSTRING_CONTENT;
2249 }
2250
2251 /* Regular expressions can end with some options, read them */
2252 static void parse_re_options(struct parser_t *parser)
2253 {
2254     char aux[64];
2255     int c = *parser->lex_p;
2256
2257     while (isalpha(c)) {
2258         if (c != 'i' && c != 'm' && c != 'x' && c != 'o' &&
2259             c != 'u' && c != 'e' && c != 's' && c != 'n') {
2260             sprintf(aux, "unknown regexp option - %c", c);
2261             yyerror(parser, aux);
2262             return;
2263         }
2264         c = nextc();
2265     }
2266     pushback();
2267 }
2268
2269 /* Standard warning for ambiguous arguments */
2270 static void arg_ambiguous_gen(struct parser_t *parser)
2271 {
2272     yywarning("ambiguous first argument; put parentheses or even spaces");
2273 }
2274 #define arg_ambiguous() (arg_ambiguous_gen(parser), 1)
2275
2276 /*
2277  * This is the lexer. It reads the source code (blob) and provides tokens to
2278  * the parser. It also updates the necessary flags.
2279  */
2280 static int parser_yylex(struct parser_t *parser)
2281 {
2282     register int c;
2283     int bc = 0;
2284     char *cp;
2285     char lexbuf[SSIZE];
2286     unsigned char space_seen = 0;
2287     int cmd_state;
2288     struct pos_t tokp = {-1, -1, -1, -1, 0};
2289
2290     /* Check for string terminations: string, regexp, heredoc, shortcut */
2291     if (lex_strterm) {
2292         if (lex_strterm->token == token_heredoc) {
2293             c = parse_heredoc(parser);
2294             if (c == tSTRING_END) {
2295                 tokp.end_line = parser->line;
2296                 tokp.end_col = parser->column;
2297                 SWAP(parser->line, parser->line_pend, bc);
2298                 SWAP(parser->column, parser->column_pend, bc);
2299                 SWAP(parser->lex_p, parser->lex_pend, cp);
2300                 parser->here_found = 1;
2301                 lex_state = EXPR_END;
2302
2303             }
2304         } else {
2305             c = parse_string(parser);
2306             if (c == tSTRING_END) {
2307                 if (lex_strterm->token == token_regexp && isalpha(*parser->lex_p))
2308                     parse_re_options(parser);
2309                 lex_state = EXPR_END;
2310             }
2311         }
2312         return c;
2313     }
2314
2315     cmd_state = command_start;
2316     command_start = 0;
2317 retry:
2318     c = nextc();
2319
2320     tokp.start_line = parser->line;
2321     tokp.start_col = parser->column - 1;
2322
2323     /* Check numeric values here instead of entering the main switch */
2324     if (isdigit(c)) {
2325         cp = lexbuf;
2326         goto tnum;
2327     }
2328
2329     switch (c) {
2330         case '\0':      /* NULL */
2331         case EOF:       /* end of script */
2332             parser->eof_reached = 1;
2333             return token_invalid;
2334
2335         /* white spaces */
2336         case ' ': case '\t': case '\f': case '\r':
2337         case '\13': /* vertical tab */
2338             space_seen = 1;
2339             goto retry;
2340         case '#':
2341             set_comment(parser);
2342         case '\n':
2343             if (IS_lex_state(EXPR_BEG | EXPR_VALUE | EXPR_CLASS | EXPR_FNAME | EXPR_DOT))
2344                 goto retry;
2345             CMDARG_PUSH(0);
2346             lex_state = EXPR_BEG;
2347             command_start = 1;
2348             return '\n';
2349         case '=':
2350             lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG;
2351             bc = nextc();
2352             if (bc == '=') {
2353                 if (nextc() == '=')
2354                     return tEQQ;
2355                 pushback();
2356                 return tEQ;
2357             }
2358             if (bc == '~')
2359                 return tMATCH;
2360             if (bc == '>')
2361                 return tASSOC;
2362             if (multiline_comment(parser->lex_prev - 1)) {
2363                 parser->column += 4;
2364                 parser->lex_p += 4;
2365                 while (!multiline_end(parser->lex_prev))
2366                     nextc();
2367                 parser->column += 3;
2368                 parser->lex_p += 3;
2369                 bc = 0;
2370                 goto retry;
2371             }
2372             break;
2373         case '[':
2374             parser->paren_nest++;
2375             if (IS_AFTER_OPERATOR()) {
2376                 lex_state = EXPR_ARG;
2377                 bc = nextc();
2378                 if (bc == ']') {
2379                     if (nextc() == '=')
2380                         return tASET;
2381                     c = tAREF;
2382                 }
2383                 break;
2384             } else if (IS_BEG())
2385                 c = tLBRACKET;
2386             else if (IS_ARG() && space_seen)
2387                 c = tLBRACKET;
2388             lex_state = EXPR_BEG;
2389             COND_PUSH(0);
2390             CMDARG_PUSH(0);
2391             return c;
2392         case ']':
2393             parser->paren_nest--;
2394             lex_state = EXPR_ENDARG;
2395             CMDARG_LEXPOP();
2396             COND_LEXPOP();
2397             return c;
2398         case '<':
2399             bc = nextc();
2400             if (bc == '<' && !IS_lex_state(EXPR_DOT | EXPR_CLASS) &&
2401                 !IS_END() && (!IS_ARG() || space_seen)) {
2402                 if (parse_heredoc_identifier(parser))
2403                     return tSTRING_BEG;
2404                 pushback();
2405             }
2406             if (IS_AFTER_OPERATOR())
2407                 lex_state = EXPR_ARG;
2408             else {
2409                 if (IS_lex_state(EXPR_CLASS))
2410                     command_start = 1;
2411                 lex_state = EXPR_BEG;
2412             }
2413             if (bc == '=') {
2414                 if (nextc() == '>')
2415                     return tCMP;
2416                 pushback();
2417                 return tLEQ;
2418             }
2419             if (bc == '<') {
2420                 if (nextc() == '=') {
2421                     lex_state = EXPR_BEG;
2422                     return tOP_ASGN;
2423                 }
2424                 c = tLSHIFT;
2425             }
2426             break;
2427         case '>':
2428             lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG;
2429             bc = nextc();
2430             if (bc == '=')
2431                 return tGEQ;
2432             if (bc == '>') {
2433                 if (nextc() == '=') {
2434                     lex_state = EXPR_BEG;
2435                     return tOP_ASGN;
2436                 }
2437                 c = tRSHIFT;
2438             }
2439             break;
2440         case '!':
2441             bc = nextc();
2442             if (IS_AFTER_OPERATOR()) {
2443                 lex_state = EXPR_ARG;
2444                 if (bc == '@')
2445                     return '!';
2446             } else
2447                 lex_state = EXPR_BEG;
2448             if (bc == '=')
2449                 return tNEQ;
2450             if (bc == '~')
2451                 return tNMATCH;
2452             break;
2453         case '+':
2454             bc = nextc();
2455             if (IS_AFTER_OPERATOR()) {
2456                 lex_state = EXPR_ARG;
2457                 if (bc == '@')
2458                     return tUPLUS;
2459                 pushback();
2460                 return '+';
2461             }
2462             if (bc == '=') {
2463                 lex_state = EXPR_BEG;
2464                 return tOP_ASGN;
2465             }
2466             if (IS_BEG() || (IS_SPCARG(bc) && arg_ambiguous())) {
2467                 lex_state = EXPR_BEG;
2468                 pushback();
2469                 return tUPLUS;
2470             }
2471             lex_state = EXPR_BEG;
2472             break;
2473         case '-':
2474             bc = nextc();
2475             if (IS_AFTER_OPERATOR()) {
2476                 lex_state = EXPR_ARG;
2477                 if (bc == '@')
2478                     return tUMINUS;
2479                 pushback();
2480                 return '-';
2481             }
2482             if (bc == '=') {
2483                 lex_state = EXPR_BEG;
2484                 return tOP_ASGN;
2485             }
2486             if (bc == '>') {
2487                 lex_state = EXPR_ENDFN;
2488                 return tLAMBDA;
2489             }
2490             if (IS_BEG() || (IS_SPCARG(bc) && arg_ambiguous())) {
2491                 lex_state = EXPR_BEG;
2492                 pushback();
2493                 if (bc != -1 && isdigit(bc))
2494                     return tUMINUS_NUM;
2495                 return tUMINUS;
2496             }
2497             lex_state = EXPR_BEG;
2498             break;
2499         case '*':
2500             bc = nextc();
2501             if (bc == '=') {
2502                 lex_state = EXPR_BEG;
2503                 return tOP_ASGN;
2504             }
2505             if (bc == '*') {
2506                 bc = nextc();
2507                 if (bc == '=') {
2508                     lex_state = EXPR_BEG;
2509                     return tOP_ASGN;
2510                 }
2511                 pushback();
2512                 if (IS_SPCARG(bc)) {
2513                     yywarning("`**' interpreted as argument prefix");
2514                     c = tDSTAR;
2515                 } else if (IS_BEG())
2516                     c = tDSTAR;
2517                 else
2518                     c = tPOW;
2519                 lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG;
2520                 return c;
2521             }
2522             if (IS_SPCARG(bc)) {
2523                 yywarning("`*' interpreted as argument prefix");
2524                 c = tSTAR;
2525             } else if (IS_BEG())
2526                 c = tSTAR;
2527             lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG;
2528             break;
2529         case '/':
2530             if (IS_lex_state(EXPR_BEG_ANY)) {
2531             regexp:
2532                 lex_strterm = (struct term_t *) malloc(sizeof(struct term_t));
2533                 lex_strterm->term = c;
2534                 lex_strterm->can_embed = 1;
2535                 lex_strterm->token = token_regexp;
2536                 lex_strterm->word = NULL;
2537                 lex_strterm->nestable = 0;
2538                 lex_strterm->paren = 0;
2539                 return tSTRING_BEG;
2540             }
2541             bc = nextc();
2542             if (bc == '=') {
2543                 lex_state = EXPR_BEG;
2544                 return tOP_ASGN;
2545             }
2546             pushback();
2547             if (IS_SPCARG(bc)) {
2548                 arg_ambiguous_gen(parser);
2549                 goto regexp;
2550             }
2551             lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG;
2552             return c;
2553         case '%':
2554             bc = nextc();
2555             if (IS_lex_state(EXPR_BEG_ANY) || IS_SPCARG(bc)) {
2556                 lex_strterm = (struct term_t *) malloc(sizeof(struct term_t));
2557                 lex_strterm->token = guess_kind(parser, bc);
2558                 if (isalpha(bc))
2559                     bc = nextc();
2560                 lex_strterm->term = closing_char(bc);
2561                 lex_strterm->can_embed = 1;
2562                 lex_strterm->word = NULL;
2563                 lex_strterm->paren = bc;
2564                 lex_strterm->nestable = (bc != lex_strterm->term);
2565                 lex_strterm->nest = 1;
2566                 return tSTRING_BEG;
2567             }
2568             if (bc == '=') {
2569                 lex_state = EXPR_BEG;
2570                 return tOP_ASGN;
2571             }
2572             lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG;
2573             break;
2574         case '&':
2575             bc = nextc();
2576             if (bc == '&') {
2577                 lex_state = EXPR_BEG;
2578                 if (nextc() == '=')
2579                     return tOP_ASGN;
2580                 pushback();
2581                 return tAND;
2582             }
2583             if (bc == '=') {
2584                 lex_state = EXPR_BEG;
2585                 return tOP_ASGN;
2586             }
2587             if (IS_SPCARG(bc)) {
2588                 yywarning("`&' interpreted as argument prefix");
2589                 c = tAMPER;
2590             } else if (IS_BEG())
2591                 c = tAMPER;
2592             lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG;
2593             break;
2594         case '|':
2595             bc = nextc();
2596             if (bc == '|') {
2597                 lex_state = EXPR_BEG;
2598                 if (nextc() == '=')
2599                     return tOP_ASGN;
2600                 pushback();
2601                 return tOR;
2602             }
2603             if (bc == '=') {
2604                 lex_state = EXPR_BEG;
2605                 return tOP_ASGN;
2606             }
2607             lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG;
2608             break;
2609         case '.':
2610             lex_state = EXPR_BEG;
2611             bc = nextc();
2612             if (bc == '.') {
2613                 if (nextc() == '.')
2614                     return tDOT3;
2615                 pushback();
2616                 return tDOT2;
2617             }
2618             lex_state = EXPR_DOT;
2619             break;
2620         case ':':
2621             bc = nextc();
2622             if (bc == ':') {
2623                 if (IS_BEG() || IS_lex_state(EXPR_CLASS) || IS_SPCARG(-1)) {
2624                     lex_state = EXPR_BEG;
2625                     return tCOLON3;
2626                 }
2627                 lex_state = EXPR_DOT;
2628                 return tCOLON2;
2629             }
2630             if (IS_END() || isspace(bc)) {
2631                 lex_state = EXPR_BEG;
2632                 break;
2633             }
2634             lex_state = EXPR_FNAME;
2635             pushback();
2636             return tSYMBEG;
2637         case '^':
2638             if (nextc() == '=') {
2639                 lex_state = EXPR_BEG;
2640                 return tOP_ASGN;
2641             }
2642             lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG;
2643             break;
2644         case ';':
2645             command_start = 1;
2646             /* fallthrough */
2647         case ',':
2648             lex_state = EXPR_BEG;
2649             return c;
2650         case '?':
2651             if (IS_END()) {
2652                 lex_state = EXPR_VALUE;
2653                 return c;
2654             }
2655             bc = nextc();
2656             if (isspace(bc)) {
2657                 lex_state = EXPR_VALUE;
2658                 break;
2659             }
2660             if (bc == '\\')
2661                 nextc();
2662             if (IS_BEG()) {
2663                 lex_state = EXPR_END;
2664                 return tCHAR;
2665             }
2666             if (is_blank(*parser->lex_p) || *parser->lex_p == ':') {
2667                 lex_state = EXPR_VALUE;
2668                 break;
2669             }
2670             lex_state = EXPR_END;
2671             return tCHAR;
2672         case '`':
2673             if (IS_lex_state(EXPR_FNAME)) {
2674                 lex_state = EXPR_ENDFN;
2675                 return c;
2676             }
2677             if (IS_lex_state(EXPR_DOT)) {
2678                 lex_state = (cmd_state) ? EXPR_CMDARG : EXPR_ARG;
2679                 return c;
2680             }
2681             /* fallthrough */
2682         case '"':
2683             space_seen = 1;
2684             /* fallthrough */
2685         case '\'':
2686             lex_strterm = (struct term_t *) malloc(sizeof(struct term_t));
2687             lex_strterm->term = c;
2688             lex_strterm->can_embed = space_seen;
2689             lex_strterm->token = token_string;
2690             lex_strterm->word = NULL;
2691             lex_strterm->nestable = 0;
2692             lex_strterm->paren = 0;
2693             return tSTRING_BEG;
2694         case '\\':
2695             if (nextc() == '\n') {
2696                 space_seen = 1;
2697                 goto retry;
2698             }
2699             pushback();
2700             return c;
2701         case '(':
2702             if (IS_BEG())
2703                 c = tLPAREN;
2704             else if (IS_SPCARG(-1))
2705                 c = tLPAREN_ARG;
2706             parser->paren_nest++;
2707             lex_state = EXPR_BEG;
2708             COND_PUSH(0);
2709             CMDARG_PUSH(0);
2710             return c;
2711         case ')':
2712             parser->paren_nest--;
2713             lex_state = EXPR_ENDFN;
2714             CMDARG_LEXPOP();
2715             COND_LEXPOP();
2716             return c;
2717         case '{':
2718             if (parser->lpar_beg && parser->lpar_beg == parser->paren_nest) {
2719                 lex_state = EXPR_BEG;
2720                 parser->lpar_beg = 0;
2721                 parser->paren_nest--;
2722                 COND_PUSH(0);
2723                 CMDARG_PUSH(0);
2724                 push_pos(parser, tokp);
2725                 if (parser->version < ruby19) {
2726                     yywarning("\"->\" syntax is only available in Ruby 1.9.x or higher.");
2727                 }
2728                 return tLAMBEG; /* this is a lambda ->() {} construction */
2729             }
2730             if (IS_ARG() || IS_lex_state(EXPR_END | EXPR_ENDFN))
2731                 c = '{';
2732             else if (IS_lex_state(EXPR_ENDARG))
2733                 c = tLBRACE_ARG; /* block (expr) */
2734             else
2735                 c = tLBRACE; /* smells like a hash */
2736             COND_PUSH(0);
2737             CMDARG_PUSH(0);
2738             lex_state = EXPR_BEG;
2739             if (c != tLBRACE) {
2740                 push_pos(parser, tokp);
2741                 command_start = 1;
2742             }
2743             return c; /* block (primary) */
2744         case '}':
2745             CMDARG_LEXPOP();
2746             COND_LEXPOP();
2747             tokp.end_line = parser->line;
2748             tokp.end_col = parser->column;
2749             push_pos(parser, tokp);
2750             lex_state = EXPR_ENDARG;
2751             return c;
2752         case '@':
2753             cp = lexbuf;
2754             *cp++ = c;
2755             c = nextc();
2756             if (c != '@') {
2757                 bc = IVAR;
2758             } else {
2759                 *cp++ = c;
2760                 c = nextc();
2761                 bc = CVAR;
2762             }
2763             goto talpha;
2764         case '$':
2765             tokp.end_line = parser->line;
2766             cp = lexbuf;
2767             *cp++ = c;
2768             bc = nextc();
2769             switch (bc) {
2770                 case '1': case '2': case '3': case '4':
2771                 case '5': case '6': case '7': case '8': case '9':
2772                     c = bc;
2773                     while (isdigit(c)) {
2774                         *cp++ = c;
2775                         c = nextc();
2776                     }
2777                     *cp = '\0';
2778                     pushback();
2779                     c = tNTH_REF;
2780                     break;
2781                 case '~': case '*': case '$': case '?': case '!': case '@':
2782                 case '/': case '\\': case ';': case ',': case '.': case '=':
2783                 case ':': case '<': case '>': case '\"':
2784                 case '&': case '`': case '\'': case '+':
2785                 case '0':
2786                     c = GLOBAL;
2787                     *cp++ = bc;
2788                     *cp = '\0';
2789                     break;
2790                 case '-':
2791                     c = nextc();
2792                     *cp++ = bc;
2793                     bc = GLOBAL;
2794                     goto talpha;
2795                 default:
2796                     c = bc;
2797                     bc = GLOBAL;
2798                     goto talpha;
2799             }
2800             lex_state = EXPR_END;
2801             tokp.end_col = parser->column;
2802             push_pos(parser, tokp);
2803             push_stack(parser, lexbuf);
2804             return c;
2805         case '~':
2806             if (IS_AFTER_OPERATOR()) {
2807                 bc = nextc();
2808                 if (bc != '@')
2809                     pushback();
2810                 lex_state = EXPR_ARG;
2811             } else
2812                 lex_state = EXPR_BEG;
2813             return c;
2814         default:
2815             cp = lexbuf;
2816             goto talpha;
2817     }
2818     pushback();
2819     return c;
2820
2821 talpha:
2822     {
2823         int step = 0;
2824         int ax = 0;
2825
2826         /* It's time to parse the word */
2827         while (not_sep(parser->lex_prev)) {
2828             step = utf8_charsize(parser->lex_prev);
2829             ax += step - 1;
2830             while (step-- > 0) {
2831                 *cp++ = c;
2832                 c = nextc();
2833             }
2834             if (c < 0) {
2835                 parser->eof_reached = 1;
2836                 break;
2837             }
2838         }
2839         *cp = '\0';
2840         parser->column -= ax;
2841         tokp.end_line = tokp.start_line;
2842         tokp.end_col = parser->lex_prevc - ax;
2843         pushback();
2844
2845         /* IVAR, CVAR, GLOBAL */
2846         if (bc > 0) {
2847             push_pos(parser, tokp);
2848             push_stack(parser, lexbuf);
2849             lex_state = EXPR_END;
2850             return bc;
2851         }
2852
2853         /* Check for '!', '?' and '=' at the end of the word */
2854         if (c == '!' || c == '?') {
2855             *cp++ = c;
2856             *cp = '\0';
2857             tokp.end_col++;
2858             nextc();
2859             c = BASE;
2860         } else {
2861             c = 0;
2862             if (IS_lex_state(EXPR_FNAME)) {
2863                 bc = nextc();
2864                 if (bc == '=') {
2865                     bc = nextc();
2866                     if (bc != '>') {
2867                         *cp++ = '=';
2868                         *cp = '\0';
2869                         tokp.end_col++;
2870                         c = BASE;
2871                     } else
2872                         pushback();
2873                 }
2874                 pushback();
2875             }
2876             c = (!c && isupper(lexbuf[0])) ? CONST : BASE;
2877         }
2878
2879         /* Check if this is just a hash key. */
2880         if (IS_LABEL_POSSIBLE() && IS_LABEL_SUFFIX()) {
2881             lex_state = EXPR_BEG;
2882             nextc();
2883             push_stack(parser, lexbuf);
2884             push_pos(parser, tokp);
2885             return tKEY;
2886         }
2887
2888         /* Check if this is a keyword */
2889         const struct kwtable *kw = NULL;
2890         if (!IS_lex_state(EXPR_DOT)) {
2891             kw = rb_reserved_word(lexbuf, cp - lexbuf);
2892             if (kw) {
2893                 enum lex_state_e state = lex_state;
2894                 lex_state = kw->state;
2895                 if (state == EXPR_FNAME)
2896                     return kw->id[0];
2897                 if (lex_state == EXPR_BEG)
2898                     command_start = 1;
2899                 switch (kw->id[0]) {
2900                     case tCLASS: case tMODULE: case tDEF:
2901                         push_last_comment(parser);
2902                         break;
2903                     case tDO:
2904                         if (parser->lpar_beg && parser->lpar_beg == parser->paren_nest) {
2905                             parser->lpar_beg = 0;
2906                             parser->paren_nest--;
2907                             return tDO_LAMBDA;
2908                         }
2909                         if (COND_P())
2910                             return tDO_COND;
2911                         push_pos(parser, tokp);
2912                         if (CMDARG_P() && state != EXPR_CMDARG)
2913                             return tDO_BLOCK;
2914                         return tDO;
2915                     case tEND:
2916                         push_pos(parser, tokp);
2917                         break;
2918                 }
2919                 if (IS_lex_state_for(state, EXPR_BEG | EXPR_VALUE))
2920                     return kw->id[0];
2921                 else {
2922                     if (kw->id[0] != kw->id[1])
2923                         lex_state = EXPR_BEG;
2924                     return kw->id[1];
2925                 }
2926             }
2927         }
2928
2929         /* Maybe this is just some special method */
2930         if (is_special_method(lexbuf)) {
2931             if (!strcmp(lexbuf, "__END__")) {
2932                 parser->eof_reached = 1;
2933                 return tpEND;
2934             }
2935         }
2936
2937         /* If this is not a keyword, push its position and the name */
2938         if (!kw) {
2939             push_stack(parser, lexbuf);
2940             push_pos(parser, tokp);
2941         }
2942
2943         /* Update the state of the lexer */
2944         if (IS_lex_state(EXPR_BEG_ANY | EXPR_ARG_ANY | EXPR_DOT))
2945             lex_state = (cmd_state) ? EXPR_CMDARG : EXPR_ARG;
2946         else if (lex_state == EXPR_FNAME)
2947             lex_state = EXPR_ENDFN;
2948         else
2949             lex_state = EXPR_END;
2950         return c;
2951     }
2952
2953 tnum:
2954     {
2955         char hex, bin, has_point, aux;
2956         hex = bin = has_point = aux = 0;
2957
2958         lex_state = EXPR_END;
2959         if (c == '0') {
2960             bc = nextc();
2961             if (toupper(bc) == 'X') {
2962                 hex = 1;
2963                 c = nextc();
2964             } else if (toupper(bc) == 'B') {
2965                 bin = 1;
2966                 c = nextc();
2967             }
2968             pushback();
2969         }
2970         while (c > 0 && ((isdigit(c) && !bin) || (!hex && !bin && !has_point && c == '.')
2971                     || (hex && toupper(c) >= 'A' && toupper(c) < 'G')
2972                     || (bin && (c == '1' || c == '0')) || c == '_')) {
2973             if (c == '.') {
2974                 if (!isdigit(*parser->lex_p)) {
2975                     tokp.end_line = parser->line;
2976                     tokp.end_col = parser->column - 1;
2977                     pushback();
2978                     return tINTEGER;
2979                 }
2980                 has_point = 1;
2981             }
2982             aux = 1;
2983             c = nextc();
2984         }
2985         if ((bin || hex) && !aux)
2986             yyerror(parser, "numeric literal without digits");
2987
2988         /* is it an exponential number ? */
2989         if (!bin && !hex && toupper(c) == 'E') {
2990             c = nextc();
2991             if (isdigit(c) || ((c == '+' || c == '-') && isdigit(*(parser->lex_p))))
2992                 c = nextc();
2993             while (c != -1 && isdigit(c))
2994                 c = nextc();
2995         }
2996
2997         if (c != -1)
2998             pushback();
2999         tokp.end_line = parser->line + 1;
3000         tokp.end_col = parser->column + 1;
3001         if (c == 'r') {
3002             nextc();
3003             return tRATIONAL;
3004         } else if (c == 'i') {
3005             nextc();
3006             return tIMAGINARY;
3007         }
3008         tokp.end_line--;
3009         tokp.end_col--;
3010         return (has_point) ? tFLOAT : tINTEGER;
3011     }
3012 }
3013
3014 /* Standard yylex. */
3015 #if YYPURE
3016 static int yylex(void *lval, void *p)
3017 #else
3018 static int yylex(void *p)
3019 #endif
3020 {
3021     struct parser_t *parser = (struct parser_t *) p;
3022     int t = token_invalid;
3023     _unused_(lval);
3024
3025     t = parser_yylex(parser);
3026     return t;
3027 }
3028
3029 /*
3030  * Error handling. Take the formmated string s and append the error
3031  * string to the list of errors p->errors.
3032  */
3033 static void yyerror(struct parser_t *parser, const char *s)
3034 {
3035     struct error_t *e = (struct error_t *) malloc(sizeof(struct error_t));
3036
3037     e->msg = strdup(s);
3038     e->line = parser->line;
3039     e->column = parser->column;
3040     e->warning = parser->warning;
3041     e->next = e;
3042     if (parser->errors)
3043         parser->last_error->next = e;
3044     else
3045         parser->errors = e;
3046     parser->last_error = e;
3047     parser->last_error->next = NULL;
3048
3049     parser->eof_reached = !e->warning;
3050     if (!parser->unrecoverable)
3051       parser->unrecoverable = !e->warning;
3052 }
3053
3054 struct ast_t * rb_compile_file(struct options_t *opts)
3055 {
3056     struct parser_t p;
3057     struct ast_t *result;
3058
3059     /* Initialize parser */
3060     init_parser(&p);
3061     p.version = opts->version;
3062     if (!opts->contents) {
3063         if (!retrieve_source(&p, opts->path))
3064             return NULL;
3065     } else {
3066         p.content_given = 1;
3067         p.length = strlen(opts->contents);
3068         p.blob = opts->contents;
3069         p.lex_p = opts->contents;
3070     }
3071
3072     /* Let's parse */
3073     result = (struct ast_t *) malloc(sizeof(struct ast_t));
3074     result->tree = NULL;
3075     result->unrecoverable = 0;
3076     for (;;) {
3077         yyparse(&p);
3078         if (p.ast != NULL) {
3079             if (result->tree == NULL)
3080                 result->tree = p.ast;
3081             else
3082                 update_list(result->tree, p.ast);
3083         }
3084         if (p.eof_reached) {
3085             result->errors = p.errors;
3086             result->unrecoverable = p.unrecoverable;
3087             break;
3088         }
3089     }
3090     free_parser(&p);
3091
3092     return result;
3093 }
3094
3095 #ifdef BUILD_TESTS
3096 /*
3097  * Compile a file like the rb_compile_file function but printing
3098  * things directly to the stdout. This function is used for the tests.
3099  */
3100 KDEVRUBYPARSER_EXPORT int rb_debug_file(struct options_t *opts);
3101
3102 int rb_debug_file(struct options_t *opts)
3103 {
3104     struct parser_t p;
3105     int index;
3106
3107     /* Set up parser */
3108     init_parser(&p);
3109     p.version = opts->version;
3110     if (!retrieve_source(&p, opts->path))
3111         return 0;
3112
3113     printf("Resulting AST's:");
3114     for (;;) {
3115         printf("\n");
3116         yyparse(&p);
3117         print_node(p.ast);
3118         if (p.ast != NULL) {
3119             if (p.ast->cond != NULL) {
3120                 printf("\nCondition: ");
3121                 print_node(p.ast->cond);
3122             }
3123             if (p.ast->l != NULL && p.ast->l->ensure != NULL) {
3124                 if (p.ast->l->cond != NULL) {
3125                     printf("\nCondition: ");
3126                     print_node(p.ast->l->cond);
3127                 }
3128                 printf("\nEnsure: ");
3129                 print_node(p.ast->l->ensure);
3130             }
3131             free_ast(p.ast);
3132             p.ast = NULL;
3133         }
3134         if (p.eof_reached) {
3135             if (p.errors)
3136                 print_errors(p.errors);
3137             break;
3138         }
3139     }
3140
3141     /* Check that all the stacks are empty */
3142     for (index = 0; index < p.sp; index++)
3143         printf("\nS: %s", p.stack[index]);
3144     printf("\n");
3145
3146     for (index = 0; index < p.pos_size; index++)
3147         printf("\nP: %i:%i", p.pos_stack[index].start_line, p.pos_stack[index].start_col);
3148     printf("\n");
3149     free_parser(&p);
3150     return 1;
3151 }
3152 #endif