Object
ruby constants for strings (should this be moved somewhere else?)
How the parser advances to the next token.
@return true if not at end of file (EOF).
# File lib/ruby_lexer.rb, line 67 67: def advance 68: r = yylex 69: self.token = r 70: 71: raise "yylex returned nil" unless r 72: 73: return RubyLexer::EOF != r 74: end
# File lib/ruby_lexer.rb, line 76 76: def arg_ambiguous 77: self.warning("Ambiguous first argument. make sure.") 78: end
# File lib/ruby_lexer.rb, line 86 86: def expr_beg_push val 87: cond.push false 88: cmdarg.push false 89: self.lex_state = :expr_beg 90: self.yacc_value = val 91: end
# File lib/ruby_lexer.rb, line 93 93: def fix_arg_lex_state 94: self.lex_state = if lex_state == :expr_fname || lex_state == :expr_dot 95: :expr_arg 96: else 97: :expr_beg 98: end 99: end
# File lib/ruby_lexer.rb, line 101 101: def heredoc here # 63 lines 102: _, eos, func, last_line = here 103: 104: indent = (func & STR_FUNC_INDENT) != 0 105: expand = (func & STR_FUNC_EXPAND) != 0 106: eos_re = indent ? /[ \t]*#{eos}(\r?\n|\z)/ : /#{eos}(\r?\n|\z)/ 107: err_msg = "can't match #{eos_re.inspect} anywhere in " 108: 109: rb_compile_error err_msg if 110: src.eos? 111: 112: if src.beginning_of_line? && src.scan(eos_re) then 113: src.unread_many last_line # TODO: figure out how to remove this 114: self.yacc_value = eos 115: return :tSTRING_END 116: end 117: 118: self.string_buffer = [] 119: 120: if expand then 121: case 122: when src.scan(/#[$@]/) then 123: src.pos -= 1 # FIX omg stupid 124: self.yacc_value = src.matched 125: return :tSTRING_DVAR 126: when src.scan(/#[{]/) then 127: self.yacc_value = src.matched 128: return :tSTRING_DBEG 129: when src.scan(/#/) then 130: string_buffer << '#' 131: end 132: 133: until src.scan(eos_re) do 134: c = tokadd_string func, "\n", nil 135: 136: rb_compile_error err_msg if 137: c == RubyLexer::EOF 138: 139: if c != "\n" then 140: self.yacc_value = string_buffer.join.delete("\r") 141: return :tSTRING_CONTENT 142: else 143: string_buffer << src.scan(/\n/) 144: end 145: 146: rb_compile_error err_msg if 147: src.eos? 148: end 149: 150: # tack on a NL after the heredoc token - FIX NL should not be needed 151: src.unread_many(eos + "\n") # TODO: remove this... stupid stupid stupid 152: else 153: until src.check(eos_re) do 154: string_buffer << src.scan(/.*(\n|\z)/) 155: rb_compile_error err_msg if 156: src.eos? 157: end 158: end 159: 160: self.lex_strterm = [:heredoc, eos, func, last_line] 161: self.yacc_value = string_buffer.join.delete("\r") 162: 163: return :tSTRING_CONTENT 164: end
# File lib/ruby_lexer.rb, line 166 166: def heredoc_identifier # 51 lines 167: term, func = nil, STR_FUNC_BORING 168: self.string_buffer = [] 169: 170: case 171: when src.scan(/(-?)(['"`])(.*?)\22//) then 172: term = src[2] 173: unless src[1].empty? then 174: func |= STR_FUNC_INDENT 175: end 176: func |= case term 177: when "\'" then 178: STR_SQUOTE 179: when '"' then 180: STR_DQUOTE 181: else 182: STR_XQUOTE 183: end 184: string_buffer << src[3] 185: when src.scan(/-?(['"`])(?!\11**\Z)/) then 186: rb_compile_error "unterminated here document identifier" 187: when src.scan(/(-?)(\w+)/) then 188: term = '"' 189: func |= STR_DQUOTE 190: unless src[1].empty? then 191: func |= STR_FUNC_INDENT 192: end 193: string_buffer << src[2] 194: else 195: return nil 196: end 197: 198: if src.check(/.*\n/) then 199: # TODO: think about storing off the char range instead 200: line = src.string[src.pos, src.matched_size] 201: src.string[src.pos, src.matched_size] = "\n" 202: src.extra_lines_added += 1 203: src.pos += 1 204: else 205: line = nil 206: end 207: 208: self.lex_strterm = [:heredoc, string_buffer.join, func, line] 209: 210: if term == '`' then 211: self.yacc_value = "`" 212: return :tXSTRING_BEG 213: else 214: self.yacc_value = "\"" 215: return :tSTRING_BEG 216: end 217: end
# File lib/ruby_lexer.rb, line 228 228: def int_with_base base 229: rb_compile_error "Invalid numeric format" if src.matched =~ /__/ 230: self.yacc_value = src.matched.to_i(base) 231: return :tINTEGER 232: end
# File lib/ruby_lexer.rb, line 234 234: def lex_state= o 235: raise "wtf\?" unless Symbol === o 236: @lex_state = o 237: end
# File lib/ruby_lexer.rb, line 240 240: def lineno 241: @lineno ||= src.lineno 242: end
Parse a number from the input stream.
@param c The first character of the number. @return A int constant wich represents a token.
# File lib/ruby_lexer.rb, line 250 250: def parse_number 251: self.lex_state = :expr_end 252: 253: case 254: when src.scan(/[+-]?0[xbd]\b/) then 255: rb_compile_error "Invalid numeric format" 256: when src.scan(/[+-]?0x[a-f0-9_]+/) then 257: int_with_base(16) 258: when src.scan(/[+-]?0b[01_]+/) then 259: int_with_base(2) 260: when src.scan(/[+-]?0d[0-9_]+/) then 261: int_with_base(10) 262: when src.scan(/[+-]?0[Oo]?[0-7_]*[89]/) then 263: rb_compile_error "Illegal octal digit." 264: when src.scan(/[+-]?0[Oo]?[0-7_]+|0[Oo]/) then 265: int_with_base(8) 266: when src.scan(/[+-]?[\d_]+_(e|\.)/) then 267: rb_compile_error "Trailing '_' in number." 268: when src.scan(/[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/) then 269: number = src.matched 270: if number =~ /__/ then 271: rb_compile_error "Invalid numeric format" 272: end 273: self.yacc_value = number.to_f 274: :tFLOAT 275: when src.scan(/[+-]?0\b/) then 276: int_with_base(10) 277: when src.scan(/[+-]?[\d_]+\b/) then 278: int_with_base(10) 279: else 280: rb_compile_error "Bad number format" 281: end 282: end
# File lib/ruby_lexer.rb, line 284 284: def parse_quote # 58 lines 285: beg, nnd, short_hand, c = nil, nil, false, nil 286: 287: if src.scan(/[a-z0-9]{1,2}/) then # Long-hand (e.g. %Q{}). 288: rb_compile_error "unknown type of %string" if src.matched_size == 2 289: c, beg, short_hand = src.matched, src.getch, false 290: else # Short-hand (e.g. %{, %., %!, etc) 291: c, beg, short_hand = 'Q', src.getch, true 292: end 293: 294: if src.eos? or c == RubyLexer::EOF or beg == RubyLexer::EOF then 295: rb_compile_error "unterminated quoted string meets end of file" 296: end 297: 298: # Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting? 299: nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg] 300: nnd, beg = beg, "\00"" if nnd.nil? 301: 302: token_type, self.yacc_value = nil, "%#{c}#{beg}" 303: token_type, string_type = case c 304: when 'Q' then 305: ch = short_hand ? nnd : c + beg 306: self.yacc_value = "%#{ch}" 307: [:tSTRING_BEG, STR_DQUOTE] 308: when 'q' then 309: [:tSTRING_BEG, STR_SQUOTE] 310: when 'W' then 311: src.scan(/\s*/) 312: [:tWORDS_BEG, STR_DQUOTE | STR_FUNC_AWORDS] 313: when 'w' then 314: src.scan(/\s*/) 315: [:tAWORDS_BEG, STR_SQUOTE | STR_FUNC_AWORDS] 316: when 'x' then 317: [:tXSTRING_BEG, STR_XQUOTE] 318: when 'r' then 319: [:tREGEXP_BEG, STR_REGEXP] 320: when 's' then 321: self.lex_state = :expr_fname 322: [:tSYMBEG, STR_SSYM] 323: end 324: 325: rb_compile_error "Bad %string type. Expected [Qqwxr\W], found '#{c}'." if 326: token_type.nil? 327: 328: self.lex_strterm = [:strterm, string_type, nnd, beg] 329: 330: return token_type 331: end
# File lib/ruby_lexer.rb, line 333 333: def parse_string(quote) # 65 lines 334: _, string_type, term, open = quote 335: 336: space = false # FIX: remove these 337: func = string_type 338: paren = open 339: term_re = Regexp.escape term 340: 341: awords = (func & STR_FUNC_AWORDS) != 0 342: regexp = (func & STR_FUNC_REGEXP) != 0 343: expand = (func & STR_FUNC_EXPAND) != 0 344: 345: unless func then # FIX: impossible, prolly needs == 0 346: self.lineno = nil 347: return :tSTRING_END 348: end 349: 350: space = true if awords and src.scan(/\s+/) 351: 352: if self.nest == 0 && src.scan(/#{term_re}/) then 353: if awords then 354: quote[1] = nil 355: return :tSPACE 356: elsif regexp then 357: self.yacc_value = self.regx_options 358: self.lineno = nil 359: return :tREGEXP_END 360: else 361: self.yacc_value = term 362: self.lineno = nil 363: return :tSTRING_END 364: end 365: end 366: 367: if space then 368: return :tSPACE 369: end 370: 371: self.string_buffer = [] 372: 373: if expand 374: case 375: when src.scan(/#(?=[$@])/) then 376: return :tSTRING_DVAR 377: when src.scan(/#[{]/) then 378: return :tSTRING_DBEG 379: when src.scan(/#/) then 380: string_buffer << '#' 381: end 382: end 383: 384: if tokadd_string(func, term, paren) == RubyLexer::EOF then 385: rb_compile_error "unterminated string meets end of file" 386: end 387: 388: self.yacc_value = string_buffer.join 389: 390: return :tSTRING_CONTENT 391: end
# File lib/ruby_lexer.rb, line 1211 1211: def process_token(command_state) 1212: 1213: token << src.matched if token =~ /^\w/ && src.scan(/[\!\?](?!=)/) 1214: 1215: result = nil 1216: last_state = lex_state 1217: 1218: 1219: case token 1220: when /^\$/ then 1221: self.lex_state, result = :expr_end, :tGVAR 1222: when /^@@/ then 1223: self.lex_state, result = :expr_end, :tCVAR 1224: when /^@/ then 1225: self.lex_state, result = :expr_end, :tIVAR 1226: else 1227: if token =~ /[!?]$/ then 1228: result = :tFID 1229: else 1230: if lex_state == :expr_fname then 1231: # ident=, not =~ => == or followed by => 1232: # TODO test lexing of a=>b vs a==>b 1233: if src.scan(/=(?:(?![~>=])|(?==>))/) then 1234: result = :tIDENTIFIER 1235: token << src.matched 1236: end 1237: end 1238: 1239: result ||= if token =~ /^[A-Z]/ then 1240: :tCONSTANT 1241: else 1242: :tIDENTIFIER 1243: end 1244: end 1245: 1246: unless lex_state == :expr_dot then 1247: # See if it is a reserved word. 1248: keyword = RubyParser::Keyword.keyword token 1249: 1250: if keyword then 1251: state = lex_state 1252: self.lex_state = keyword.state 1253: self.yacc_value = [token, src.lineno] 1254: 1255: if state == :expr_fname then 1256: self.yacc_value = keyword.name 1257: return keyword.id0 1258: end 1259: 1260: if keyword.id0 == :kDO then 1261: self.command_start = true 1262: return :kDO_COND if cond.is_in_state 1263: return :kDO_BLOCK if cmdarg.is_in_state && state != :expr_cmdarg 1264: return :kDO_BLOCK if state == :expr_endarg 1265: return :kDO 1266: end 1267: 1268: return keyword.id0 if state == :expr_beg or state == :expr_value 1269: 1270: self.lex_state = :expr_beg if keyword.id0 != keyword.id1 1271: 1272: return keyword.id1 1273: end 1274: end 1275: 1276: if (lex_state == :expr_beg || lex_state == :expr_mid || 1277: lex_state == :expr_dot || lex_state == :expr_arg || 1278: lex_state == :expr_cmdarg) then 1279: if command_state then 1280: self.lex_state = :expr_cmdarg 1281: else 1282: self.lex_state = :expr_arg 1283: end 1284: else 1285: self.lex_state = :expr_end 1286: end 1287: end 1288: 1289: self.yacc_value = token 1290: 1291: 1292: self.lex_state = :expr_end if 1293: last_state != :expr_dot && self.parser.env[token.to_sym] == :lvar 1294: 1295: return result 1296: end
# File lib/ruby_lexer.rb, line 393 393: def rb_compile_error msg 394: msg += ". near line #{self.lineno}: #{src.rest[/^.*/].inspect}" 395: raise SyntaxError, msg 396: end
# File lib/ruby_lexer.rb, line 398 398: def read_escape # 51 lines 399: case 400: when src.scan(/\\/) then # Backslash 401: '\' 402: when src.scan(/n/) then # newline 403: "\n" 404: when src.scan(/t/) then # horizontal tab 405: "\t" 406: when src.scan(/r/) then # carriage-return 407: "\r" 408: when src.scan(/f/) then # form-feed 409: "\f" 410: when src.scan(/v/) then # vertical tab 411: "\113"" 412: when src.scan(/a/) then # alarm(bell) 413: "\0007" 414: when src.scan(/e/) then # escape 415: "\0033" 416: when src.scan(/b/) then # backspace 417: "\0010" 418: when src.scan(/s/) then # space 419: " " 420: when src.scan(/[0-7]{1,3}/) then # octal constant 421: src.matched.to_i(8).chr 422: when src.scan(/x([0-9a-fA-F]{1,2})/) then # hex constant 423: src[1].to_i(16).chr 424: when src.check(/M-\\[\\MCc]/) then 425: src.scan(/M-\\/) # eat it 426: c = self.read_escape 427: c[0] = (c[0].ord | 0x80).chr 428: c 429: when src.scan(/M-(.)/) then 430: c = src[1] 431: c[0] = (c[0].ord | 0x80).chr 432: c 433: when src.check(/(C-|c)\\[\\MCc]/) then 434: src.scan(/(C-|c)\\/) # eat it 435: c = self.read_escape 436: c[0] = (c[0].ord & 0x9f).chr 437: c 438: when src.scan(/C-\?|c\?/) then 439: 127.chr 440: when src.scan(/(C-|c)(.)/) then 441: c = src[2] 442: c[0] = (c[0].ord & 0x9f).chr 443: c 444: when src.scan(/[McCx0-9]/) || src.eos? then 445: rb_compile_error("Invalid escape character syntax") 446: else 447: src.getch 448: end 449: end
# File lib/ruby_lexer.rb, line 451 451: def regx_options # 15 lines 452: good, bad = [], [] 453: 454: if src.scan(/[a-z]+/) then 455: good, bad = src.matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ } 456: end 457: 458: unless bad.empty? then 459: rb_compile_error("unknown regexp option%s - %s" % 460: [(bad.size > 1 ? "s" : ""), bad.join.inspect]) 461: end 462: 463: return good.join 464: end
# File lib/ruby_lexer.rb, line 466 466: def reset 467: self.command_start = true 468: self.lex_strterm = nil 469: self.token = nil 470: self.yacc_value = nil 471: 472: @src = nil 473: @lex_state = nil 474: end
# File lib/ruby_lexer.rb, line 476 476: def src= src 477: raise "bad src: #{src.inspect}" unless String === src 478: @src = RPStringScanner.new(src) 479: end
# File lib/ruby_lexer.rb, line 481 481: def tokadd_escape term # 20 lines 482: case 483: when src.scan(/\\\n/) then 484: # just ignore 485: when src.scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then 486: self.string_buffer << src.matched 487: when src.scan(/\\([MC]-|c)(?=\\)/) then 488: self.string_buffer << src.matched 489: self.tokadd_escape term 490: when src.scan(/\\([MC]-|c)(.)/) then 491: self.string_buffer << src.matched 492: when src.scan(/\\[McCx]/) then 493: rb_compile_error "Invalid escape character syntax" 494: when src.scan(/\\(.)/) then 495: self.string_buffer << src.matched 496: else 497: rb_compile_error "Invalid escape character syntax" 498: end 499: end
# File lib/ruby_lexer.rb, line 501 501: def tokadd_string(func, term, paren) # 105 lines 502: awords = (func & STR_FUNC_AWORDS) != 0 503: escape = (func & STR_FUNC_ESCAPE) != 0 504: expand = (func & STR_FUNC_EXPAND) != 0 505: regexp = (func & STR_FUNC_REGEXP) != 0 506: symbol = (func & STR_FUNC_SYMBOL) != 0 507: 508: paren_re = paren.nil? ? nil : Regexp.new(Regexp.escape(paren)) 509: term_re = Regexp.new(Regexp.escape(term)) 510: 511: until src.eos? do 512: c = nil 513: handled = true 514: case 515: when self.nest == 0 && src.scan(term_re) then 516: src.pos -= 1 517: break 518: when paren_re && src.scan(paren_re) then 519: self.nest += 1 520: when src.scan(term_re) then 521: self.nest -= 1 522: when awords && src.scan(/\s/) then 523: src.pos -= 1 524: break 525: when expand && src.scan(/#(?=[\$\@\{])/) then 526: src.pos -= 1 527: break 528: when expand && src.scan(/#(?!\n)/) then 529: # do nothing 530: when src.check(/\\/) then 531: case 532: when awords && src.scan(/\\\n/) then 533: string_buffer << "\n" 534: next 535: when awords && src.scan(/\\\s/) then 536: c = ' ' 537: when expand && src.scan(/\\\n/) then 538: next 539: when regexp && src.check(/\\/) then 540: self.tokadd_escape term 541: next 542: when expand && src.scan(/\\/) then 543: c = self.read_escape 544: when src.scan(/\\\n/) then 545: # do nothing 546: when src.scan(/\\\\/) then 547: string_buffer << '\' if escape 548: c = '\' 549: when src.scan(/\\/) then 550: unless src.scan(term_re) || paren.nil? || src.scan(paren_re) then 551: string_buffer << "\\" 552: end 553: else 554: handled = false 555: end 556: else 557: handled = false 558: end # case 559: 560: unless handled then 561: 562: t = Regexp.escape term 563: x = Regexp.escape(paren) if paren && paren != "\0000" 564: re = if awords then 565: /[^#{t}#{x}\#\00\\\\n\ ]+|./ # |. to pick up whatever 566: else 567: /[^#{t}#{x}\#\00\\\]+|./ 568: end 569: 570: src.scan re 571: c = src.matched 572: 573: rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\00// 574: end # unless handled 575: 576: c ||= src.matched 577: string_buffer << c 578: end # until 579: 580: c ||= src.matched 581: c = RubyLexer::EOF if src.eos? 582: 583: 584: return c 585: end
# File lib/ruby_lexer.rb, line 587 587: def unescape s 588: 589: r = { 590: "a" => "\0007", 591: "b" => "\0010", 592: "e" => "\0033", 593: "f" => "\f", 594: "n" => "\n", 595: "r" => "\r", 596: "s" => " ", 597: "t" => "\t", 598: "v" => "\113"", 599: "\\" => '\', 600: "\n" => "", 601: "C-\?" => 127.chr, 602: "c\?" => 127.chr, 603: }[s] 604: 605: return r if r 606: 607: case s 608: when /^[0-7]{1,3}/ then 609: $&.to_i(8).chr 610: when /^x([0-9a-fA-F]{1,2})/ then 611: $1.to_i(16).chr 612: when /^M-(.)/ then 613: ($1[0].ord | 0x80).chr 614: when /^(C-|c)(.)/ then 615: ($2[0].ord & 0x9f).chr 616: when /^[McCx0-9]/ then 617: rb_compile_error("Invalid escape character syntax") 618: else 619: s 620: end 621: end
# File lib/ruby_lexer.rb, line 623 623: def warning s 624: # do nothing for now 625: end
Returns the next token. Also sets yy_val is needed.
@return Description of the Returned Value
# File lib/ruby_lexer.rb, line 632 632: def yylex # 826 lines 633: 634: c = '' 635: space_seen = false 636: command_state = false 637: src = self.src 638: 639: self.token = nil 640: self.yacc_value = nil 641: 642: return yylex_string if lex_strterm 643: 644: command_state = self.command_start 645: self.command_start = false 646: 647: last_state = lex_state 648: 649: loop do # START OF CASE 650: if src.scan(/[\ \t\r\f\v]/) then # \s - \n + \v 651: space_seen = true 652: next 653: elsif src.check(/[^a-zA-Z]/) then 654: if src.scan(/\n|#/) then 655: self.lineno = nil 656: c = src.matched 657: if c == '#' then 658: src.pos -= 1 659: 660: while src.scan(/\s*#.*(\n+|\z)/) do 661: @comments << src.matched.gsub(/^ +#/, '#').gsub(/^ +$/, '') 662: end 663: 664: if src.eos? then 665: return RubyLexer::EOF 666: end 667: end 668: 669: # Replace a string of newlines with a single one 670: src.scan(/\n+/) 671: 672: if [:expr_beg, :expr_fname, 673: :expr_dot, :expr_class].include? lex_state then 674: next 675: end 676: 677: self.command_start = true 678: self.lex_state = :expr_beg 679: return :tNL 680: elsif src.scan(/[\]\)\}]/) then 681: cond.lexpop 682: cmdarg.lexpop 683: self.lex_state = :expr_end 684: self.yacc_value = src.matched 685: result = { 686: ")" => :tRPAREN, 687: "]" => :tRBRACK, 688: "}" => :tRCURLY 689: }[src.matched] 690: return result 691: elsif src.scan(/\.\.\.?|,|![=~]?/) then 692: self.lex_state = :expr_beg 693: tok = self.yacc_value = src.matched 694: return TOKENS[tok] 695: elsif src.check(/\./) then 696: if src.scan(/\.\d/) then 697: rb_compile_error "no .<digit> floating literal anymore put 0 before dot" 698: elsif src.scan(/\./) then 699: self.lex_state = :expr_dot 700: self.yacc_value = "." 701: return :tDOT 702: end 703: elsif src.scan(/\(/) then 704: result = :tLPAREN2 705: self.command_start = true 706: 707: if lex_state == :expr_beg || lex_state == :expr_mid then 708: result = :tLPAREN 709: elsif space_seen then 710: if lex_state == :expr_cmdarg then 711: result = :tLPAREN_ARG 712: elsif lex_state == :expr_arg then 713: warning("don't put space before argument parentheses") 714: result = :tLPAREN2 715: end 716: end 717: 718: self.expr_beg_push "(" 719: 720: return result 721: elsif src.check(/\=/) then 722: if src.scan(/\=\=\=|\=\=|\=~|\=>|\=(?!begin\b)/) then 723: self.fix_arg_lex_state 724: tok = self.yacc_value = src.matched 725: return TOKENS[tok] 726: elsif src.scan(/\=begin(?=\s)/) then 727: # @comments << '=' << src.matched 728: @comments << src.matched 729: 730: unless src.scan(/.*?\n=end( |\t|\f)*[^\n]*(\n|\z)/) then 731: @comments.clear 732: rb_compile_error("embedded document meets end of file") 733: end 734: 735: @comments << src.matched 736: 737: next 738: else 739: raise "you shouldn't be able to get here" 740: end 741: elsif src.scan(/\"(#{ESC_RE}|#(#{ESC_RE}|[^\{\#\@\$\"\\])|[^\"\\\#])*\"/) then 742: self.yacc_value = src.matched[1..2].gsub(ESC_RE) { unescape $1 } 743: self.lex_state = :expr_end 744: return :tSTRING 745: elsif src.scan(/\"/) then # FALLBACK 746: self.lex_strterm = [:strterm, STR_DQUOTE, '"', "\00""] # TODO: question this 747: self.yacc_value = "\"" 748: return :tSTRING_BEG 749: elsif src.scan(/\@\@?\w*/) then 750: self.token = src.matched 751: 752: rb_compile_error "`#{token}` is not allowed as a variable name" if 753: token =~ /\@\d/ 754: 755: return process_token(command_state) 756: elsif src.scan(/\:\:/) then 757: if (lex_state == :expr_beg || 758: lex_state == :expr_mid || 759: lex_state == :expr_class || 760: (lex_state.is_argument && space_seen)) then 761: self.lex_state = :expr_beg 762: self.yacc_value = "::" 763: return :tCOLON3 764: end 765: 766: self.lex_state = :expr_dot 767: self.yacc_value = "::" 768: return :tCOLON2 769: elsif lex_state != :expr_end && lex_state != :expr_endarg && src.scan(/:([a-zA-Z_]\w*(?:[?!]|=(?!>))?)/) then 770: self.yacc_value = src[1] 771: self.lex_state = :expr_end 772: return :tSYMBOL 773: elsif src.scan(/\:/) then 774: # ?: / then / when 775: if (lex_state == :expr_end || lex_state == :expr_endarg|| 776: src.check(/\s/)) then 777: self.lex_state = :expr_beg 778: self.yacc_value = ":" 779: return :tCOLON 780: end 781: 782: case 783: when src.scan(/\'/) then 784: self.lex_strterm = [:strterm, STR_SSYM, src.matched, "\00""] 785: when src.scan(/\"/) then 786: self.lex_strterm = [:strterm, STR_DSYM, src.matched, "\00""] 787: end 788: 789: self.lex_state = :expr_fname 790: self.yacc_value = ":" 791: return :tSYMBEG 792: elsif src.check(/[0-9]/) then 793: return parse_number 794: elsif src.scan(/\[/) then 795: result = src.matched 796: 797: if lex_state == :expr_fname || lex_state == :expr_dot then 798: self.lex_state = :expr_arg 799: case 800: when src.scan(/\]\=/) then 801: self.yacc_value = "[]=" 802: return :tASET 803: when src.scan(/\]/) then 804: self.yacc_value = "[]" 805: return :tAREF 806: else 807: rb_compile_error "unexpected '['" 808: end 809: elsif lex_state == :expr_beg || lex_state == :expr_mid then 810: result = :tLBRACK 811: elsif lex_state.is_argument && space_seen then 812: result = :tLBRACK 813: end 814: 815: self.expr_beg_push "[" 816: 817: return result 818: elsif src.scan(/\'(\\.|[^\'])*\'/) then 819: self.yacc_value = src.matched[1..2].gsub(/\\\\/, "\\").gsub(/\\'/, "'") 820: self.lex_state = :expr_end 821: return :tSTRING 822: elsif src.check(/\|/) then 823: if src.scan(/\|\|\=/) then 824: self.lex_state = :expr_beg 825: self.yacc_value = "||" 826: return :tOP_ASGN 827: elsif src.scan(/\|\|/) then 828: self.lex_state = :expr_beg 829: self.yacc_value = "||" 830: return :tOROP 831: elsif src.scan(/\|\=/) then 832: self.lex_state = :expr_beg 833: self.yacc_value = "|" 834: return :tOP_ASGN 835: elsif src.scan(/\|/) then 836: self.fix_arg_lex_state 837: self.yacc_value = "|" 838: return :tPIPE 839: end 840: elsif src.scan(/\{/) then 841: result = if lex_state.is_argument || lex_state == :expr_end then 842: :tLCURLY # block (primary) 843: elsif lex_state == :expr_endarg then 844: :tLBRACE_ARG # block (expr) 845: else 846: :tLBRACE # hash 847: end 848: 849: self.expr_beg_push "{" 850: self.command_start = true unless result == :tLBRACE 851: 852: return result 853: elsif src.scan(/[+-]/) then 854: sign = src.matched 855: utype, type = if sign == "+" then 856: [:tUPLUS, :tPLUS] 857: else 858: [:tUMINUS, :tMINUS] 859: end 860: 861: if lex_state == :expr_fname || lex_state == :expr_dot then 862: self.lex_state = :expr_arg 863: if src.scan(/@/) then 864: self.yacc_value = "#{sign}@" 865: return utype 866: else 867: self.yacc_value = sign 868: return type 869: end 870: end 871: 872: if src.scan(/\=/) then 873: self.lex_state = :expr_beg 874: self.yacc_value = sign 875: return :tOP_ASGN 876: end 877: 878: if (lex_state == :expr_beg || lex_state == :expr_mid || 879: (lex_state.is_argument && space_seen && !src.check(/\s/))) then 880: if lex_state.is_argument then 881: arg_ambiguous 882: end 883: 884: self.lex_state = :expr_beg 885: self.yacc_value = sign 886: 887: if src.check(/\d/) then 888: if utype == :tUPLUS then 889: return self.parse_number 890: else 891: return :tUMINUS_NUM 892: end 893: end 894: 895: return utype 896: end 897: 898: self.lex_state = :expr_beg 899: self.yacc_value = sign 900: return type 901: elsif src.check(/\*/) then 902: if src.scan(/\*\*=/) then 903: self.lex_state = :expr_beg 904: self.yacc_value = "**" 905: return :tOP_ASGN 906: elsif src.scan(/\*\*/) then 907: self.yacc_value = "**" 908: self.fix_arg_lex_state 909: return :tPOW 910: elsif src.scan(/\*\=/) then 911: self.lex_state = :expr_beg 912: self.yacc_value = "*" 913: return :tOP_ASGN 914: elsif src.scan(/\*/) then 915: result = if lex_state.is_argument && space_seen && src.check(/\S/) then 916: warning("`*' interpreted as argument prefix") 917: :tSTAR 918: elsif lex_state == :expr_beg || lex_state == :expr_mid then 919: :tSTAR 920: else 921: :tSTAR2 922: end 923: self.yacc_value = "*" 924: self.fix_arg_lex_state 925: 926: return result 927: end 928: elsif src.check(/\</) then 929: if src.scan(/\<\=\>/) then 930: self.fix_arg_lex_state 931: self.yacc_value = "<=>" 932: return :tCMP 933: elsif src.scan(/\<\=/) then 934: self.fix_arg_lex_state 935: self.yacc_value = "<=" 936: return :tLEQ 937: elsif src.scan(/\<\<\=/) then 938: self.fix_arg_lex_state 939: self.lex_state = :expr_beg 940: self.yacc_value = "\<\<" 941: return :tOP_ASGN 942: elsif src.scan(/\<\</) then 943: if (! [:expr_end, :expr_dot, 944: :expr_endarg, :expr_class].include?(lex_state) && 945: (!lex_state.is_argument || space_seen)) then 946: tok = self.heredoc_identifier 947: if tok then 948: return tok 949: end 950: end 951: 952: self.fix_arg_lex_state 953: self.yacc_value = "\<\<" 954: return :tLSHFT 955: elsif src.scan(/\</) then 956: self.fix_arg_lex_state 957: self.yacc_value = "<" 958: return :tLT 959: end 960: elsif src.check(/\>/) then 961: if src.scan(/\>\=/) then 962: self.fix_arg_lex_state 963: self.yacc_value = ">=" 964: return :tGEQ 965: elsif src.scan(/\>\>=/) then 966: self.fix_arg_lex_state 967: self.lex_state = :expr_beg 968: self.yacc_value = ">>" 969: return :tOP_ASGN 970: elsif src.scan(/\>\>/) then 971: self.fix_arg_lex_state 972: self.yacc_value = ">>" 973: return :tRSHFT 974: elsif src.scan(/\>/) then 975: self.fix_arg_lex_state 976: self.yacc_value = ">" 977: return :tGT 978: end 979: elsif src.scan(/\`/) then 980: self.yacc_value = "`" 981: case lex_state 982: when :expr_fname then 983: self.lex_state = :expr_end 984: return :tBACK_REF2 985: when :expr_dot then 986: self.lex_state = if command_state then 987: :expr_cmdarg 988: else 989: :expr_arg 990: end 991: return :tBACK_REF2 992: end 993: self.lex_strterm = [:strterm, STR_XQUOTE, '`', "\00""] 994: return :tXSTRING_BEG 995: elsif src.scan(/\?/) then 996: if lex_state == :expr_end || lex_state == :expr_endarg then 997: self.lex_state = :expr_beg 998: self.yacc_value = "?" 999: return :tEH 1000: end 1001: 1002: if src.eos? then 1003: rb_compile_error "incomplete character syntax" 1004: end 1005: 1006: if src.check(/\s|\v/) then 1007: unless lex_state.is_argument then 1008: c2 = { " " => 's', 1009: "\n" => 'n', 1010: "\t" => 't', 1011: "\v" => 'v', 1012: "\r" => 'r', 1013: "\f" => 'f' }[src.matched] 1014: 1015: if c2 then 1016: warning("invalid character syntax; use ?\\" + c2) 1017: end 1018: end 1019: 1020: # ternary 1021: self.lex_state = :expr_beg 1022: self.yacc_value = "?" 1023: return :tEH 1024: elsif src.check(/\w(?=\w)/) then # ternary, also 1025: self.lex_state = :expr_beg 1026: self.yacc_value = "?" 1027: return :tEH 1028: end 1029: 1030: c = if src.scan(/\\/) then 1031: self.read_escape 1032: else 1033: src.getch 1034: end 1035: self.lex_state = :expr_end 1036: self.yacc_value = c[0].ord & 0xff 1037: return :tINTEGER 1038: elsif src.check(/\&/) then 1039: if src.scan(/\&\&\=/) then 1040: self.yacc_value = "&&" 1041: self.lex_state = :expr_beg 1042: return :tOP_ASGN 1043: elsif src.scan(/\&\&/) then 1044: self.lex_state = :expr_beg 1045: self.yacc_value = "&&" 1046: return :tANDOP 1047: elsif src.scan(/\&\=/) then 1048: self.yacc_value = "&" 1049: self.lex_state = :expr_beg 1050: return :tOP_ASGN 1051: elsif src.scan(/&/) then 1052: result = if lex_state.is_argument && space_seen && 1053: !src.check(/\s/) then 1054: warning("`&' interpreted as argument prefix") 1055: :tAMPER 1056: elsif lex_state == :expr_beg || lex_state == :expr_mid then 1057: :tAMPER 1058: else 1059: :tAMPER2 1060: end 1061: 1062: self.fix_arg_lex_state 1063: self.yacc_value = "&" 1064: return result 1065: end 1066: elsif src.scan(/\//) then 1067: if lex_state == :expr_beg || lex_state == :expr_mid then 1068: self.lex_strterm = [:strterm, STR_REGEXP, '/', "\00""] 1069: self.yacc_value = "/" 1070: return :tREGEXP_BEG 1071: end 1072: 1073: if src.scan(/\=/) then 1074: self.yacc_value = "/" 1075: self.lex_state = :expr_beg 1076: return :tOP_ASGN 1077: end 1078: 1079: if lex_state.is_argument && space_seen then 1080: unless src.scan(/\s/) then 1081: arg_ambiguous 1082: self.lex_strterm = [:strterm, STR_REGEXP, '/', "\00""] 1083: self.yacc_value = "/" 1084: return :tREGEXP_BEG 1085: end 1086: end 1087: 1088: self.fix_arg_lex_state 1089: self.yacc_value = "/" 1090: 1091: return :tDIVIDE 1092: elsif src.scan(/\^=/) then 1093: self.lex_state = :expr_beg 1094: self.yacc_value = "^" 1095: return :tOP_ASGN 1096: elsif src.scan(/\^/) then 1097: self.fix_arg_lex_state 1098: self.yacc_value = "^" 1099: return :tCARET 1100: elsif src.scan(/\;/) then 1101: self.command_start = true 1102: self.lex_state = :expr_beg 1103: self.yacc_value = ";" 1104: return :tSEMI 1105: elsif src.scan(/\~/) then 1106: if lex_state == :expr_fname || lex_state == :expr_dot then 1107: src.scan(/@/) 1108: end 1109: 1110: self.fix_arg_lex_state 1111: self.yacc_value = "~" 1112: 1113: return :tTILDE 1114: elsif src.scan(/\\/) then 1115: if src.scan(/\n/) then 1116: self.lineno = nil 1117: space_seen = true 1118: next 1119: end 1120: rb_compile_error "bare backslash only allowed before newline" 1121: elsif src.scan(/\%/) then 1122: if lex_state == :expr_beg || lex_state == :expr_mid then 1123: return parse_quote 1124: end 1125: 1126: if src.scan(/\=/) then 1127: self.lex_state = :expr_beg 1128: self.yacc_value = "%" 1129: return :tOP_ASGN 1130: end 1131: 1132: if lex_state.is_argument && space_seen && ! src.check(/\s/) then 1133: return parse_quote 1134: end 1135: 1136: self.fix_arg_lex_state 1137: self.yacc_value = "%" 1138: 1139: return :tPERCENT 1140: elsif src.check(/\$/) then 1141: if src.scan(/(\$_)(\w+)/) then 1142: self.lex_state = :expr_end 1143: self.token = src.matched 1144: return process_token(command_state) 1145: elsif src.scan(/\$_/) then 1146: self.lex_state = :expr_end 1147: self.token = src.matched 1148: self.yacc_value = src.matched 1149: return :tGVAR 1150: elsif src.scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then 1151: self.lex_state = :expr_end 1152: self.yacc_value = src.matched 1153: return :tGVAR 1154: elsif src.scan(/\$([\&\`\'\+])/) then 1155: self.lex_state = :expr_end 1156: # Explicit reference to these vars as symbols... 1157: if last_state == :expr_fname then 1158: self.yacc_value = src.matched 1159: return :tGVAR 1160: else 1161: self.yacc_value = src[1].to_sym 1162: return :tBACK_REF 1163: end 1164: elsif src.scan(/\$([1-9]\d*)/) then 1165: self.lex_state = :expr_end 1166: if last_state == :expr_fname then 1167: self.yacc_value = src.matched 1168: return :tGVAR 1169: else 1170: self.yacc_value = src[1].to_i 1171: return :tNTH_REF 1172: end 1173: elsif src.scan(/\$0/) then 1174: self.lex_state = :expr_end 1175: self.token = src.matched 1176: return process_token(command_state) 1177: elsif src.scan(/\$\W|\$\z/) then # TODO: remove? 1178: self.lex_state = :expr_end 1179: self.yacc_value = "$" 1180: return "$" 1181: elsif src.scan(/\$\w+/) 1182: self.lex_state = :expr_end 1183: self.token = src.matched 1184: return process_token(command_state) 1185: end 1186: elsif src.check(/\_/) then 1187: if src.beginning_of_line? && src.scan(/\__END__(\n|\Z)/) then 1188: self.lineno = nil 1189: return RubyLexer::EOF 1190: elsif src.scan(/\_\w*/) then 1191: self.token = src.matched 1192: return process_token(command_state) 1193: end 1194: end 1195: end # END OF CASE 1196: 1197: if src.scan(/\0004|\0032|\0000/) || src.eos? then # ^D, ^Z, EOF 1198: return RubyLexer::EOF 1199: else # alpha check 1200: if src.scan(/\W/) then 1201: rb_compile_error "Invalid char #{src.matched.inspect} in expression" 1202: end 1203: end 1204: 1205: self.token = src.matched if self.src.scan(/\w+/) 1206: 1207: return process_token(command_state) 1208: end 1209: end
# File lib/ruby_lexer.rb, line 1298 1298: def yylex_string # 23 lines 1299: token = if lex_strterm[0] == :heredoc then 1300: self.heredoc lex_strterm 1301: else 1302: self.parse_string lex_strterm 1303: end 1304: 1305: if token == :tSTRING_END || token == :tREGEXP_END then 1306: self.lineno = nil 1307: self.lex_strterm = nil 1308: self.lex_state = :expr_end 1309: end 1310: 1311: return token 1312: end
Disabled; run with --debug to generate this.
Generated with the Darkfish Rdoc Generator 1.1.6.