Parent

RubyLexer

Constants

ESC_RE
EOF
STR_FUNC_BORING

ruby constants for strings (should this be moved somewhere else?)

STR_FUNC_ESCAPE
STR_FUNC_EXPAND
STR_FUNC_REGEXP
STR_FUNC_AWORDS
STR_FUNC_SYMBOL
STR_FUNC_INDENT
STR_SQUOTE
STR_DQUOTE
STR_XQUOTE
STR_REGEXP
STR_SSYM
STR_DSYM
TOKENS

Attributes

command_start[RW]
cmdarg[RW]
cond[RW]
nest[RW]
lex_state[R]

Additional context surrounding tokens that both the lexer and grammar use.

lex_strterm[RW]
parser[RW]
src[R]

Stream of data that yylex examines.

token[RW]

Last token read via yylex.

string_buffer[RW]
yacc_value[RW]

Value of last token which had a value associated with it.

warnings[RW]

What handles warnings

lineno[W]

Public Class Methods

new() click to toggle source
     # File lib/ruby_lexer.rb, line 219
219:   def initialize
220:     self.cond = RubyParser::StackState.new(:cond)
221:     self.cmdarg = RubyParser::StackState.new(:cmdarg)
222:     self.nest = 0
223:     @comments = []
224: 
225:     reset
226:   end

Public Instance Methods

advance() click to toggle source

How the parser advances to the next token.

@return true if not at end of file (EOF).

    # File lib/ruby_lexer.rb, line 67
67:   def advance
68:     r = yylex
69:     self.token = r
70: 
71:     raise "yylex returned nil" unless r
72: 
73:     return RubyLexer::EOF != r
74:   end
arg_ambiguous() click to toggle source
    # File lib/ruby_lexer.rb, line 76
76:   def arg_ambiguous
77:     self.warning("Ambiguous first argument. make sure.")
78:   end
comments() click to toggle source
    # File lib/ruby_lexer.rb, line 80
80:   def comments
81:     c = @comments.join
82:     @comments.clear
83:     c
84:   end
expr_beg_push(val) click to toggle source
    # File lib/ruby_lexer.rb, line 86
86:   def expr_beg_push val
87:     cond.push false
88:     cmdarg.push false
89:     self.lex_state = :expr_beg
90:     self.yacc_value = val
91:   end
fix_arg_lex_state() click to toggle source
    # File lib/ruby_lexer.rb, line 93
93:   def fix_arg_lex_state
94:     self.lex_state = if lex_state == :expr_fname || lex_state == :expr_dot
95:                        :expr_arg
96:                      else
97:                        :expr_beg
98:                      end
99:   end
heredoc(here) click to toggle source
     # File lib/ruby_lexer.rb, line 101
101:   def heredoc here # 63 lines
102:     _, eos, func, last_line = here
103: 
104:     indent  = (func & STR_FUNC_INDENT) != 0
105:     expand  = (func & STR_FUNC_EXPAND) != 0
106:     eos_re  = indent ? /[ \t]*#{eos}(\r?\n|\z)/ : /#{eos}(\r?\n|\z)/
107:     err_msg = "can't match #{eos_re.inspect} anywhere in "
108: 
109:     rb_compile_error err_msg if
110:       src.eos?
111: 
112:     if src.beginning_of_line? && src.scan(eos_re) then
113:       src.unread_many last_line # TODO: figure out how to remove this
114:       self.yacc_value = eos
115:       return :tSTRING_END
116:     end
117: 
118:     self.string_buffer = []
119: 
120:     if expand then
121:       case
122:       when src.scan(/#[$@]/) then
123:         src.pos -= 1 # FIX omg stupid
124:         self.yacc_value = src.matched
125:         return :tSTRING_DVAR
126:       when src.scan(/#[{]/) then
127:         self.yacc_value = src.matched
128:         return :tSTRING_DBEG
129:       when src.scan(/#/) then
130:         string_buffer << '#'
131:       end
132: 
133:       until src.scan(eos_re) do
134:         c = tokadd_string func, "\n", nil
135: 
136:         rb_compile_error err_msg if
137:           c == RubyLexer::EOF
138: 
139:         if c != "\n" then
140:           self.yacc_value = string_buffer.join.delete("\r")
141:           return :tSTRING_CONTENT
142:         else
143:           string_buffer << src.scan(/\n/)
144:         end
145: 
146:         rb_compile_error err_msg if
147:           src.eos?
148:       end
149: 
150:       # tack on a NL after the heredoc token - FIX NL should not be needed
151:       src.unread_many(eos + "\n") # TODO: remove this... stupid stupid stupid
152:     else
153:       until src.check(eos_re) do
154:         string_buffer << src.scan(/.*(\n|\z)/)
155:         rb_compile_error err_msg if
156:           src.eos?
157:       end
158:     end
159: 
160:     self.lex_strterm = [:heredoc, eos, func, last_line]
161:     self.yacc_value = string_buffer.join.delete("\r")
162: 
163:     return :tSTRING_CONTENT
164:   end
heredoc_identifier() click to toggle source
     # File lib/ruby_lexer.rb, line 166
166:   def heredoc_identifier # 51 lines
167:     term, func = nil, STR_FUNC_BORING
168:     self.string_buffer = []
169: 
170:     case
171:     when src.scan(/(-?)(['"`])(.*?)\22//) then
172:       term = src[2]
173:       unless src[1].empty? then
174:         func |= STR_FUNC_INDENT
175:       end
176:       func |= case term
177:               when "\'" then
178:                 STR_SQUOTE
179:               when '"' then
180:                 STR_DQUOTE
181:               else
182:                 STR_XQUOTE
183:               end
184:       string_buffer << src[3]
185:     when src.scan(/-?(['"`])(?!\11**\Z)/) then
186:       rb_compile_error "unterminated here document identifier"
187:     when src.scan(/(-?)(\w+)/) then
188:       term = '"'
189:       func |= STR_DQUOTE
190:       unless src[1].empty? then
191:         func |= STR_FUNC_INDENT
192:       end
193:       string_buffer << src[2]
194:     else
195:       return nil
196:     end
197: 
198:     if src.check(/.*\n/) then
199:       # TODO: think about storing off the char range instead
200:       line = src.string[src.pos, src.matched_size]
201:       src.string[src.pos, src.matched_size] = "\n"
202:       src.extra_lines_added += 1
203:       src.pos += 1
204:     else
205:       line = nil
206:     end
207: 
208:     self.lex_strterm = [:heredoc, string_buffer.join, func, line]
209: 
210:     if term == '`' then
211:       self.yacc_value = "`"
212:       return :tXSTRING_BEG
213:     else
214:       self.yacc_value = "\""
215:       return :tSTRING_BEG
216:     end
217:   end
int_with_base(base) click to toggle source
     # File lib/ruby_lexer.rb, line 228
228:   def int_with_base base
229:     rb_compile_error "Invalid numeric format" if src.matched =~ /__/
230:     self.yacc_value = src.matched.to_i(base)
231:     return :tINTEGER
232:   end
lex_state=(o) click to toggle source
     # File lib/ruby_lexer.rb, line 234
234:   def lex_state= o
235:     raise "wtf\?" unless Symbol === o
236:     @lex_state = o
237:   end
lineno() click to toggle source
     # File lib/ruby_lexer.rb, line 240
240:   def lineno
241:     @lineno ||= src.lineno
242:   end
parse_number() click to toggle source
 Parse a number from the input stream.

@param c The first character of the number. @return A int constant wich represents a token.

     # File lib/ruby_lexer.rb, line 250
250:   def parse_number
251:     self.lex_state = :expr_end
252: 
253:     case
254:     when src.scan(/[+-]?0[xbd]\b/) then
255:       rb_compile_error "Invalid numeric format"
256:     when src.scan(/[+-]?0x[a-f0-9_]+/) then
257:       int_with_base(16)
258:     when src.scan(/[+-]?0b[01_]+/) then
259:       int_with_base(2)
260:     when src.scan(/[+-]?0d[0-9_]+/) then
261:       int_with_base(10)
262:     when src.scan(/[+-]?0[Oo]?[0-7_]*[89]/) then
263:       rb_compile_error "Illegal octal digit."
264:     when src.scan(/[+-]?0[Oo]?[0-7_]+|0[Oo]/) then
265:       int_with_base(8)
266:     when src.scan(/[+-]?[\d_]+_(e|\.)/) then
267:       rb_compile_error "Trailing '_' in number."
268:     when src.scan(/[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/) then
269:       number = src.matched
270:       if number =~ /__/ then
271:         rb_compile_error "Invalid numeric format"
272:       end
273:       self.yacc_value = number.to_f
274:       :tFLOAT
275:     when src.scan(/[+-]?0\b/) then
276:       int_with_base(10)
277:     when src.scan(/[+-]?[\d_]+\b/) then
278:       int_with_base(10)
279:     else
280:       rb_compile_error "Bad number format"
281:     end
282:   end
parse_quote() click to toggle source
     # File lib/ruby_lexer.rb, line 284
284:   def parse_quote # 58 lines
285:     beg, nnd, short_hand, c = nil, nil, false, nil
286: 
287:     if src.scan(/[a-z0-9]{1,2}/) then # Long-hand (e.g. %Q{}).
288:       rb_compile_error "unknown type of %string" if src.matched_size == 2
289:       c, beg, short_hand = src.matched, src.getch, false
290:     else                               # Short-hand (e.g. %{, %., %!, etc)
291:       c, beg, short_hand = 'Q', src.getch, true
292:     end
293: 
294:     if src.eos? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
295:       rb_compile_error "unterminated quoted string meets end of file"
296:     end
297: 
298:     # Figure nnd-char.  "\0" is special to indicate beg=nnd and that no nesting?
299:     nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
300:     nnd, beg = beg, "\00"" if nnd.nil?
301: 
302:     token_type, self.yacc_value = nil, "%#{c}#{beg}"
303:     token_type, string_type = case c
304:                               when 'Q' then
305:                                 ch = short_hand ? nnd : c + beg
306:                                 self.yacc_value = "%#{ch}"
307:                                 [:tSTRING_BEG,   STR_DQUOTE]
308:                               when 'q' then
309:                                 [:tSTRING_BEG,   STR_SQUOTE]
310:                               when 'W' then
311:                                 src.scan(/\s*/)
312:                                 [:tWORDS_BEG,    STR_DQUOTE | STR_FUNC_AWORDS]
313:                               when 'w' then
314:                                 src.scan(/\s*/)
315:                                 [:tAWORDS_BEG,   STR_SQUOTE | STR_FUNC_AWORDS]
316:                               when 'x' then
317:                                 [:tXSTRING_BEG,  STR_XQUOTE]
318:                               when 'r' then
319:                                 [:tREGEXP_BEG,   STR_REGEXP]
320:                               when 's' then
321:                                 self.lex_state  = :expr_fname
322:                                 [:tSYMBEG,       STR_SSYM]
323:                               end
324: 
325:     rb_compile_error "Bad %string type. Expected [Qqwxr\W], found '#{c}'." if
326:       token_type.nil?
327: 
328:     self.lex_strterm = [:strterm, string_type, nnd, beg]
329: 
330:     return token_type
331:   end
parse_string(quote) click to toggle source
     # File lib/ruby_lexer.rb, line 333
333:   def parse_string(quote) # 65 lines
334:     _, string_type, term, open = quote
335: 
336:     space = false # FIX: remove these
337:     func = string_type
338:     paren = open
339:     term_re = Regexp.escape term
340: 
341:     awords = (func & STR_FUNC_AWORDS) != 0
342:     regexp = (func & STR_FUNC_REGEXP) != 0
343:     expand = (func & STR_FUNC_EXPAND) != 0
344: 
345:     unless func then # FIX: impossible, prolly needs == 0
346:       self.lineno = nil
347:       return :tSTRING_END
348:     end
349: 
350:     space = true if awords and src.scan(/\s+/)
351: 
352:     if self.nest == 0 && src.scan(/#{term_re}/) then
353:       if awords then
354:         quote[1] = nil
355:         return :tSPACE
356:       elsif regexp then
357:         self.yacc_value = self.regx_options
358:         self.lineno = nil
359:         return :tREGEXP_END
360:       else
361:         self.yacc_value = term
362:         self.lineno = nil
363:         return :tSTRING_END
364:       end
365:     end
366: 
367:     if space then
368:       return :tSPACE
369:     end
370: 
371:     self.string_buffer = []
372: 
373:     if expand
374:       case
375:       when src.scan(/#(?=[$@])/) then
376:         return :tSTRING_DVAR
377:       when src.scan(/#[{]/) then
378:         return :tSTRING_DBEG
379:       when src.scan(/#/) then
380:         string_buffer << '#'
381:       end
382:     end
383: 
384:     if tokadd_string(func, term, paren) == RubyLexer::EOF then
385:       rb_compile_error "unterminated string meets end of file"
386:     end
387: 
388:     self.yacc_value = string_buffer.join
389: 
390:     return :tSTRING_CONTENT
391:   end
process_token(command_state) click to toggle source
      # File lib/ruby_lexer.rb, line 1211
1211:   def process_token(command_state)
1212: 
1213:     token << src.matched if token =~ /^\w/ && src.scan(/[\!\?](?!=)/)
1214: 
1215:     result = nil
1216:     last_state = lex_state
1217: 
1218: 
1219:     case token
1220:     when /^\$/ then
1221:       self.lex_state, result = :expr_end, :tGVAR
1222:     when /^@@/ then
1223:       self.lex_state, result = :expr_end, :tCVAR
1224:     when /^@/ then
1225:       self.lex_state, result = :expr_end, :tIVAR
1226:     else
1227:       if token =~ /[!?]$/ then
1228:         result = :tFID
1229:       else
1230:         if lex_state == :expr_fname then
1231:           # ident=, not =~ => == or followed by =>
1232:           # TODO test lexing of a=>b vs a==>b
1233:           if src.scan(/=(?:(?![~>=])|(?==>))/) then
1234:             result = :tIDENTIFIER
1235:             token << src.matched
1236:           end
1237:         end
1238: 
1239:         result ||= if token =~ /^[A-Z]/ then
1240:                      :tCONSTANT
1241:                    else
1242:                      :tIDENTIFIER
1243:                    end
1244:       end
1245: 
1246:       unless lex_state == :expr_dot then
1247:         # See if it is a reserved word.
1248:         keyword = RubyParser::Keyword.keyword token
1249: 
1250:         if keyword then
1251:           state           = lex_state
1252:           self.lex_state  = keyword.state
1253:           self.yacc_value = [token, src.lineno]
1254: 
1255:           if state == :expr_fname then
1256:             self.yacc_value = keyword.name
1257:             return keyword.id0
1258:           end
1259: 
1260:           if keyword.id0 == :kDO then
1261:             self.command_start = true
1262:             return :kDO_COND  if cond.is_in_state
1263:             return :kDO_BLOCK if cmdarg.is_in_state && state != :expr_cmdarg
1264:             return :kDO_BLOCK if state == :expr_endarg
1265:             return :kDO
1266:           end
1267: 
1268:           return keyword.id0 if state == :expr_beg or state == :expr_value
1269: 
1270:           self.lex_state = :expr_beg if keyword.id0 != keyword.id1
1271: 
1272:           return keyword.id1
1273:         end
1274:       end
1275: 
1276:       if (lex_state == :expr_beg || lex_state == :expr_mid ||
1277:           lex_state == :expr_dot || lex_state == :expr_arg ||
1278:           lex_state == :expr_cmdarg) then
1279:         if command_state then
1280:           self.lex_state = :expr_cmdarg
1281:         else
1282:           self.lex_state = :expr_arg
1283:         end
1284:       else
1285:         self.lex_state = :expr_end
1286:       end
1287:     end
1288: 
1289:     self.yacc_value = token
1290: 
1291: 
1292:     self.lex_state = :expr_end if
1293:       last_state != :expr_dot && self.parser.env[token.to_sym] == :lvar
1294: 
1295:     return result
1296:   end
rb_compile_error(msg) click to toggle source
     # File lib/ruby_lexer.rb, line 393
393:   def rb_compile_error msg
394:     msg += ". near line #{self.lineno}: #{src.rest[/^.*/].inspect}"
395:     raise SyntaxError, msg
396:   end
read_escape() click to toggle source
     # File lib/ruby_lexer.rb, line 398
398:   def read_escape # 51 lines
399:     case
400:     when src.scan(/\\/) then                  # Backslash
401:       '\'
402:     when src.scan(/n/) then                   # newline
403:       "\n"
404:     when src.scan(/t/) then                   # horizontal tab
405:       "\t"
406:     when src.scan(/r/) then                   # carriage-return
407:       "\r"
408:     when src.scan(/f/) then                   # form-feed
409:       "\f"
410:     when src.scan(/v/) then                   # vertical tab
411:       "\113""
412:     when src.scan(/a/) then                   # alarm(bell)
413:       "\0007"
414:     when src.scan(/e/) then                   # escape
415:       "\0033"
416:     when src.scan(/b/) then                   # backspace
417:       "\0010"
418:     when src.scan(/s/) then                   # space
419:       " "
420:     when src.scan(/[0-7]{1,3}/) then          # octal constant
421:       src.matched.to_i(8).chr
422:     when src.scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
423:       src[1].to_i(16).chr
424:     when src.check(/M-\\[\\MCc]/) then
425:       src.scan(/M-\\/) # eat it
426:       c = self.read_escape
427:       c[0] = (c[0].ord | 0x80).chr
428:       c
429:     when src.scan(/M-(.)/) then
430:       c = src[1]
431:       c[0] = (c[0].ord | 0x80).chr
432:       c
433:     when src.check(/(C-|c)\\[\\MCc]/) then
434:       src.scan(/(C-|c)\\/) # eat it
435:       c = self.read_escape
436:       c[0] = (c[0].ord & 0x9f).chr
437:       c
438:     when src.scan(/C-\?|c\?/) then
439:       127.chr
440:     when src.scan(/(C-|c)(.)/) then
441:       c = src[2]
442:       c[0] = (c[0].ord & 0x9f).chr
443:       c
444:     when src.scan(/[McCx0-9]/) || src.eos? then
445:       rb_compile_error("Invalid escape character syntax")
446:     else
447:       src.getch
448:     end
449:   end
regx_options() click to toggle source
     # File lib/ruby_lexer.rb, line 451
451:   def regx_options # 15 lines
452:     good, bad = [], []
453: 
454:     if src.scan(/[a-z]+/) then
455:       good, bad = src.matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
456:     end
457: 
458:     unless bad.empty? then
459:       rb_compile_error("unknown regexp option%s - %s" %
460:                        [(bad.size > 1 ? "s" : ""), bad.join.inspect])
461:     end
462: 
463:     return good.join
464:   end
reset() click to toggle source
     # File lib/ruby_lexer.rb, line 466
466:   def reset
467:     self.command_start = true
468:     self.lex_strterm   = nil
469:     self.token         = nil
470:     self.yacc_value    = nil
471: 
472:     @src       = nil
473:     @lex_state = nil
474:   end
src=(src) click to toggle source
     # File lib/ruby_lexer.rb, line 476
476:   def src= src
477:     raise "bad src: #{src.inspect}" unless String === src
478:     @src = RPStringScanner.new(src)
479:   end
tokadd_escape(term) click to toggle source
     # File lib/ruby_lexer.rb, line 481
481:   def tokadd_escape term # 20 lines
482:     case
483:     when src.scan(/\\\n/) then
484:       # just ignore
485:     when src.scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
486:       self.string_buffer << src.matched
487:     when src.scan(/\\([MC]-|c)(?=\\)/) then
488:       self.string_buffer << src.matched
489:       self.tokadd_escape term
490:     when src.scan(/\\([MC]-|c)(.)/) then
491:       self.string_buffer << src.matched
492:     when src.scan(/\\[McCx]/) then
493:       rb_compile_error "Invalid escape character syntax"
494:     when src.scan(/\\(.)/) then
495:       self.string_buffer << src.matched
496:     else
497:       rb_compile_error "Invalid escape character syntax"
498:     end
499:   end
tokadd_string(func, term, paren) click to toggle source
     # File lib/ruby_lexer.rb, line 501
501:   def tokadd_string(func, term, paren) # 105 lines
502:     awords = (func & STR_FUNC_AWORDS) != 0
503:     escape = (func & STR_FUNC_ESCAPE) != 0
504:     expand = (func & STR_FUNC_EXPAND) != 0
505:     regexp = (func & STR_FUNC_REGEXP) != 0
506:     symbol = (func & STR_FUNC_SYMBOL) != 0
507: 
508:     paren_re = paren.nil? ? nil : Regexp.new(Regexp.escape(paren))
509:     term_re  = Regexp.new(Regexp.escape(term))
510: 
511:     until src.eos? do
512:       c = nil
513:       handled = true
514:       case
515:       when self.nest == 0 && src.scan(term_re) then
516:         src.pos -= 1
517:         break
518:       when paren_re && src.scan(paren_re) then
519:         self.nest += 1
520:       when src.scan(term_re) then
521:         self.nest -= 1
522:       when awords && src.scan(/\s/) then
523:         src.pos -= 1
524:         break
525:       when expand && src.scan(/#(?=[\$\@\{])/) then
526:         src.pos -= 1
527:         break
528:       when expand && src.scan(/#(?!\n)/) then
529:         # do nothing
530:       when src.check(/\\/) then
531:         case
532:         when awords && src.scan(/\\\n/) then
533:           string_buffer << "\n"
534:           next
535:         when awords && src.scan(/\\\s/) then
536:           c = ' '
537:         when expand && src.scan(/\\\n/) then
538:           next
539:         when regexp && src.check(/\\/) then
540:           self.tokadd_escape term
541:           next
542:         when expand && src.scan(/\\/) then
543:           c = self.read_escape
544:         when src.scan(/\\\n/) then
545:           # do nothing
546:         when src.scan(/\\\\/) then
547:           string_buffer << '\' if escape
548:           c = '\'
549:         when src.scan(/\\/) then
550:           unless src.scan(term_re) || paren.nil? || src.scan(paren_re) then
551:             string_buffer << "\\"
552:           end
553:         else
554:           handled = false
555:         end
556:       else
557:         handled = false
558:       end # case
559: 
560:       unless handled then
561: 
562:         t = Regexp.escape term
563:         x = Regexp.escape(paren) if paren && paren != "\0000"
564:         re = if awords then
565:                /[^#{t}#{x}\#\00\\\\n\ ]+|./ # |. to pick up whatever
566:              else
567:                /[^#{t}#{x}\#\00\\\]+|./
568:              end
569: 
570:         src.scan re
571:         c = src.matched
572: 
573:         rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\00//
574:       end # unless handled
575: 
576:       c ||= src.matched
577:       string_buffer << c
578:     end # until
579: 
580:     c ||= src.matched
581:     c = RubyLexer::EOF if src.eos?
582: 
583: 
584:     return c
585:   end
unescape(s) click to toggle source
     # File lib/ruby_lexer.rb, line 587
587:   def unescape s
588: 
589:     r = {
590:       "a"    => "\0007",
591:       "b"    => "\0010",
592:       "e"    => "\0033",
593:       "f"    => "\f",
594:       "n"    => "\n",
595:       "r"    => "\r",
596:       "s"    => " ",
597:       "t"    => "\t",
598:       "v"    => "\113"",
599:       "\\"   => '\',
600:       "\n"   => "",
601:       "C-\?" => 127.chr,
602:       "c\?"  => 127.chr,
603:     }[s]
604: 
605:     return r if r
606: 
607:     case s
608:     when /^[0-7]{1,3}/ then
609:       $&.to_i(8).chr
610:     when /^x([0-9a-fA-F]{1,2})/ then
611:       $1.to_i(16).chr
612:     when /^M-(.)/ then
613:       ($1[0].ord | 0x80).chr
614:     when /^(C-|c)(.)/ then
615:       ($2[0].ord & 0x9f).chr
616:     when /^[McCx0-9]/ then
617:       rb_compile_error("Invalid escape character syntax")
618:     else
619:       s
620:     end
621:   end
warning(s) click to toggle source
     # File lib/ruby_lexer.rb, line 623
623:   def warning s
624:     # do nothing for now
625:   end
yylex() click to toggle source

Returns the next token. Also sets yy_val is needed.

@return Description of the Returned Value

      # File lib/ruby_lexer.rb, line 632
 632:   def yylex # 826 lines
 633: 
 634:     c = ''
 635:     space_seen = false
 636:     command_state = false
 637:     src = self.src
 638: 
 639:     self.token = nil
 640:     self.yacc_value = nil
 641: 
 642:     return yylex_string if lex_strterm
 643: 
 644:     command_state = self.command_start
 645:     self.command_start = false
 646: 
 647:     last_state = lex_state
 648: 
 649:     loop do # START OF CASE
 650:       if src.scan(/[\ \t\r\f\v]/) then # \s - \n + \v
 651:         space_seen = true
 652:         next
 653:       elsif src.check(/[^a-zA-Z]/) then
 654:         if src.scan(/\n|#/) then
 655:           self.lineno = nil
 656:           c = src.matched
 657:           if c == '#' then
 658:             src.pos -= 1
 659: 
 660:             while src.scan(/\s*#.*(\n+|\z)/) do
 661:               @comments << src.matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
 662:             end
 663: 
 664:             if src.eos? then
 665:               return RubyLexer::EOF
 666:             end
 667:           end
 668: 
 669:           # Replace a string of newlines with a single one
 670:           src.scan(/\n+/)
 671: 
 672:           if [:expr_beg, :expr_fname,
 673:               :expr_dot, :expr_class].include? lex_state then
 674:             next
 675:           end
 676: 
 677:           self.command_start = true
 678:           self.lex_state = :expr_beg
 679:           return :tNL
 680:         elsif src.scan(/[\]\)\}]/) then
 681:           cond.lexpop
 682:           cmdarg.lexpop
 683:           self.lex_state = :expr_end
 684:           self.yacc_value = src.matched
 685:           result = {
 686:             ")" => :tRPAREN,
 687:             "]" => :tRBRACK,
 688:             "}" => :tRCURLY
 689:           }[src.matched]
 690:           return result
 691:         elsif src.scan(/\.\.\.?|,|![=~]?/) then
 692:           self.lex_state = :expr_beg
 693:           tok = self.yacc_value = src.matched
 694:           return TOKENS[tok]
 695:         elsif src.check(/\./) then
 696:           if src.scan(/\.\d/) then
 697:             rb_compile_error "no .<digit> floating literal anymore put 0 before dot"
 698:           elsif src.scan(/\./) then
 699:             self.lex_state = :expr_dot
 700:             self.yacc_value = "."
 701:             return :tDOT
 702:           end
 703:         elsif src.scan(/\(/) then
 704:           result = :tLPAREN2
 705:           self.command_start = true
 706: 
 707:           if lex_state == :expr_beg || lex_state == :expr_mid then
 708:             result = :tLPAREN
 709:           elsif space_seen then
 710:             if lex_state == :expr_cmdarg then
 711:               result = :tLPAREN_ARG
 712:             elsif lex_state == :expr_arg then
 713:               warning("don't put space before argument parentheses")
 714:               result = :tLPAREN2
 715:             end
 716:           end
 717: 
 718:           self.expr_beg_push "("
 719: 
 720:           return result
 721:         elsif src.check(/\=/) then
 722:           if src.scan(/\=\=\=|\=\=|\=~|\=>|\=(?!begin\b)/) then
 723:             self.fix_arg_lex_state
 724:             tok = self.yacc_value = src.matched
 725:             return TOKENS[tok]
 726:           elsif src.scan(/\=begin(?=\s)/) then
 727:             # @comments << '=' << src.matched
 728:             @comments << src.matched
 729: 
 730:             unless src.scan(/.*?\n=end( |\t|\f)*[^\n]*(\n|\z)/) then
 731:               @comments.clear
 732:               rb_compile_error("embedded document meets end of file")
 733:             end
 734: 
 735:             @comments << src.matched
 736: 
 737:             next
 738:           else
 739:             raise "you shouldn't be able to get here"
 740:           end
 741:         elsif src.scan(/\"(#{ESC_RE}|#(#{ESC_RE}|[^\{\#\@\$\"\\])|[^\"\\\#])*\"/) then
 742:           self.yacc_value = src.matched[1..2].gsub(ESC_RE) { unescape $1 }
 743:           self.lex_state = :expr_end
 744:           return :tSTRING
 745:         elsif src.scan(/\"/) then # FALLBACK
 746:           self.lex_strterm = [:strterm, STR_DQUOTE, '"', "\00""] # TODO: question this
 747:           self.yacc_value = "\""
 748:           return :tSTRING_BEG
 749:         elsif src.scan(/\@\@?\w*/) then
 750:           self.token = src.matched
 751: 
 752:           rb_compile_error "`#{token}` is not allowed as a variable name" if
 753:             token =~ /\@\d/
 754: 
 755:           return process_token(command_state)
 756:         elsif src.scan(/\:\:/) then
 757:           if (lex_state == :expr_beg ||
 758:               lex_state == :expr_mid ||
 759:               lex_state == :expr_class ||
 760:               (lex_state.is_argument && space_seen)) then
 761:             self.lex_state = :expr_beg
 762:             self.yacc_value = "::"
 763:             return :tCOLON3
 764:           end
 765: 
 766:           self.lex_state = :expr_dot
 767:           self.yacc_value = "::"
 768:           return :tCOLON2
 769:         elsif lex_state != :expr_end && lex_state != :expr_endarg && src.scan(/:([a-zA-Z_]\w*(?:[?!]|=(?!>))?)/) then
 770:           self.yacc_value = src[1]
 771:           self.lex_state = :expr_end
 772:           return :tSYMBOL
 773:         elsif src.scan(/\:/) then
 774:           # ?: / then / when
 775:           if (lex_state == :expr_end || lex_state == :expr_endarg||
 776:               src.check(/\s/)) then
 777:             self.lex_state = :expr_beg
 778:             self.yacc_value = ":"
 779:             return :tCOLON
 780:           end
 781: 
 782:           case
 783:           when src.scan(/\'/) then
 784:             self.lex_strterm = [:strterm, STR_SSYM, src.matched, "\00""]
 785:           when src.scan(/\"/) then
 786:             self.lex_strterm = [:strterm, STR_DSYM, src.matched, "\00""]
 787:           end
 788: 
 789:           self.lex_state = :expr_fname
 790:           self.yacc_value = ":"
 791:           return :tSYMBEG
 792:         elsif src.check(/[0-9]/) then
 793:           return parse_number
 794:         elsif src.scan(/\[/) then
 795:           result = src.matched
 796: 
 797:           if lex_state == :expr_fname || lex_state == :expr_dot then
 798:             self.lex_state = :expr_arg
 799:             case
 800:             when src.scan(/\]\=/) then
 801:               self.yacc_value = "[]="
 802:               return :tASET
 803:             when src.scan(/\]/) then
 804:               self.yacc_value = "[]"
 805:               return :tAREF
 806:             else
 807:               rb_compile_error "unexpected '['"
 808:             end
 809:           elsif lex_state == :expr_beg || lex_state == :expr_mid then
 810:             result = :tLBRACK
 811:           elsif lex_state.is_argument && space_seen then
 812:             result = :tLBRACK
 813:           end
 814: 
 815:           self.expr_beg_push "["
 816: 
 817:           return result
 818:         elsif src.scan(/\'(\\.|[^\'])*\'/) then
 819:           self.yacc_value = src.matched[1..2].gsub(/\\\\/, "\\").gsub(/\\'/, "'")
 820:           self.lex_state = :expr_end
 821:           return :tSTRING
 822:         elsif src.check(/\|/) then
 823:           if src.scan(/\|\|\=/) then
 824:             self.lex_state = :expr_beg
 825:             self.yacc_value = "||"
 826:             return :tOP_ASGN
 827:           elsif src.scan(/\|\|/) then
 828:             self.lex_state = :expr_beg
 829:             self.yacc_value = "||"
 830:             return :tOROP
 831:           elsif src.scan(/\|\=/) then
 832:             self.lex_state = :expr_beg
 833:             self.yacc_value = "|"
 834:             return :tOP_ASGN
 835:           elsif src.scan(/\|/) then
 836:             self.fix_arg_lex_state
 837:             self.yacc_value = "|"
 838:             return :tPIPE
 839:           end
 840:         elsif src.scan(/\{/) then
 841:           result = if lex_state.is_argument || lex_state == :expr_end then
 842:                      :tLCURLY      #  block (primary)
 843:                    elsif lex_state == :expr_endarg then
 844:                      :tLBRACE_ARG  #  block (expr)
 845:                    else
 846:                      :tLBRACE      #  hash
 847:                    end
 848: 
 849:           self.expr_beg_push "{"
 850:           self.command_start = true unless result == :tLBRACE
 851: 
 852:           return result
 853:         elsif src.scan(/[+-]/) then
 854:           sign = src.matched
 855:           utype, type = if sign == "+" then
 856:                           [:tUPLUS, :tPLUS]
 857:                         else
 858:                           [:tUMINUS, :tMINUS]
 859:                         end
 860: 
 861:           if lex_state == :expr_fname || lex_state == :expr_dot then
 862:             self.lex_state = :expr_arg
 863:             if src.scan(/@/) then
 864:               self.yacc_value = "#{sign}@"
 865:               return utype
 866:             else
 867:               self.yacc_value = sign
 868:               return type
 869:             end
 870:           end
 871: 
 872:           if src.scan(/\=/) then
 873:             self.lex_state = :expr_beg
 874:             self.yacc_value = sign
 875:             return :tOP_ASGN
 876:           end
 877: 
 878:           if (lex_state == :expr_beg || lex_state == :expr_mid ||
 879:               (lex_state.is_argument && space_seen && !src.check(/\s/))) then
 880:             if lex_state.is_argument then
 881:               arg_ambiguous
 882:             end
 883: 
 884:             self.lex_state = :expr_beg
 885:             self.yacc_value = sign
 886: 
 887:             if src.check(/\d/) then
 888:               if utype == :tUPLUS then
 889:                 return self.parse_number
 890:               else
 891:                 return :tUMINUS_NUM
 892:               end
 893:             end
 894: 
 895:             return utype
 896:           end
 897: 
 898:           self.lex_state = :expr_beg
 899:           self.yacc_value = sign
 900:           return type
 901:         elsif src.check(/\*/) then
 902:           if src.scan(/\*\*=/) then
 903:             self.lex_state = :expr_beg
 904:             self.yacc_value = "**"
 905:             return :tOP_ASGN
 906:           elsif src.scan(/\*\*/) then
 907:             self.yacc_value = "**"
 908:             self.fix_arg_lex_state
 909:             return :tPOW
 910:           elsif src.scan(/\*\=/) then
 911:             self.lex_state = :expr_beg
 912:             self.yacc_value = "*"
 913:             return :tOP_ASGN
 914:           elsif src.scan(/\*/) then
 915:             result = if lex_state.is_argument && space_seen && src.check(/\S/) then
 916:                        warning("`*' interpreted as argument prefix")
 917:                        :tSTAR
 918:                      elsif lex_state == :expr_beg || lex_state == :expr_mid then
 919:                        :tSTAR
 920:                      else
 921:                        :tSTAR2
 922:                      end
 923:             self.yacc_value = "*"
 924:             self.fix_arg_lex_state
 925: 
 926:             return result
 927:           end
 928:         elsif src.check(/\</) then
 929:           if src.scan(/\<\=\>/) then
 930:             self.fix_arg_lex_state
 931:             self.yacc_value = "<=>"
 932:             return :tCMP
 933:           elsif src.scan(/\<\=/) then
 934:             self.fix_arg_lex_state
 935:             self.yacc_value = "<="
 936:             return :tLEQ
 937:           elsif src.scan(/\<\<\=/) then
 938:             self.fix_arg_lex_state
 939:             self.lex_state = :expr_beg
 940:             self.yacc_value = "\<\<"
 941:             return :tOP_ASGN
 942:           elsif src.scan(/\<\</) then
 943:             if (! [:expr_end,    :expr_dot,
 944:                    :expr_endarg, :expr_class].include?(lex_state) &&
 945:                 (!lex_state.is_argument || space_seen)) then
 946:               tok = self.heredoc_identifier
 947:               if tok then
 948:                 return tok
 949:               end
 950:             end
 951: 
 952:             self.fix_arg_lex_state
 953:             self.yacc_value = "\<\<"
 954:             return :tLSHFT
 955:           elsif src.scan(/\</) then
 956:             self.fix_arg_lex_state
 957:             self.yacc_value = "<"
 958:             return :tLT
 959:           end
 960:         elsif src.check(/\>/) then
 961:           if src.scan(/\>\=/) then
 962:             self.fix_arg_lex_state
 963:             self.yacc_value = ">="
 964:             return :tGEQ
 965:           elsif src.scan(/\>\>=/) then
 966:             self.fix_arg_lex_state
 967:             self.lex_state = :expr_beg
 968:             self.yacc_value = ">>"
 969:             return :tOP_ASGN
 970:           elsif src.scan(/\>\>/) then
 971:             self.fix_arg_lex_state
 972:             self.yacc_value = ">>"
 973:             return :tRSHFT
 974:           elsif src.scan(/\>/) then
 975:             self.fix_arg_lex_state
 976:             self.yacc_value = ">"
 977:             return :tGT
 978:           end
 979:         elsif src.scan(/\`/) then
 980:           self.yacc_value = "`"
 981:           case lex_state
 982:           when :expr_fname then
 983:             self.lex_state = :expr_end
 984:             return :tBACK_REF2
 985:           when :expr_dot then
 986:             self.lex_state = if command_state then
 987:                                :expr_cmdarg
 988:                              else
 989:                                :expr_arg
 990:                              end
 991:             return :tBACK_REF2
 992:           end
 993:           self.lex_strterm = [:strterm, STR_XQUOTE, '`', "\00""]
 994:           return :tXSTRING_BEG
 995:         elsif src.scan(/\?/) then
 996:           if lex_state == :expr_end || lex_state == :expr_endarg then
 997:             self.lex_state = :expr_beg
 998:             self.yacc_value = "?"
 999:             return :tEH
1000:           end
1001: 
1002:           if src.eos? then
1003:             rb_compile_error "incomplete character syntax"
1004:           end
1005: 
1006:           if src.check(/\s|\v/) then
1007:             unless lex_state.is_argument then
1008:               c2 = { " " => 's',
1009:                     "\n" => 'n',
1010:                     "\t" => 't',
1011:                     "\v" => 'v',
1012:                     "\r" => 'r',
1013:                     "\f" => 'f' }[src.matched]
1014: 
1015:               if c2 then
1016:                 warning("invalid character syntax; use ?\\" + c2)
1017:               end
1018:             end
1019: 
1020:             # ternary
1021:             self.lex_state = :expr_beg
1022:             self.yacc_value = "?"
1023:             return :tEH
1024:           elsif src.check(/\w(?=\w)/) then # ternary, also
1025:             self.lex_state = :expr_beg
1026:             self.yacc_value = "?"
1027:             return :tEH
1028:           end
1029: 
1030:           c = if src.scan(/\\/) then
1031:                 self.read_escape
1032:               else
1033:                 src.getch
1034:               end
1035:           self.lex_state = :expr_end
1036:           self.yacc_value = c[0].ord & 0xff
1037:           return :tINTEGER
1038:         elsif src.check(/\&/) then
1039:           if src.scan(/\&\&\=/) then
1040:             self.yacc_value = "&&"
1041:             self.lex_state = :expr_beg
1042:             return :tOP_ASGN
1043:           elsif src.scan(/\&\&/) then
1044:             self.lex_state = :expr_beg
1045:             self.yacc_value = "&&"
1046:             return :tANDOP
1047:           elsif src.scan(/\&\=/) then
1048:             self.yacc_value = "&"
1049:             self.lex_state = :expr_beg
1050:             return :tOP_ASGN
1051:           elsif src.scan(/&/) then
1052:             result = if lex_state.is_argument && space_seen &&
1053:                          !src.check(/\s/) then
1054:                        warning("`&' interpreted as argument prefix")
1055:                        :tAMPER
1056:                      elsif lex_state == :expr_beg || lex_state == :expr_mid then
1057:                        :tAMPER
1058:                      else
1059:                        :tAMPER2
1060:                      end
1061: 
1062:             self.fix_arg_lex_state
1063:             self.yacc_value = "&"
1064:             return result
1065:           end
1066:         elsif src.scan(/\//) then
1067:           if lex_state == :expr_beg || lex_state == :expr_mid then
1068:             self.lex_strterm = [:strterm, STR_REGEXP, '/', "\00""]
1069:             self.yacc_value = "/"
1070:             return :tREGEXP_BEG
1071:           end
1072: 
1073:           if src.scan(/\=/) then
1074:             self.yacc_value = "/"
1075:             self.lex_state = :expr_beg
1076:             return :tOP_ASGN
1077:           end
1078: 
1079:           if lex_state.is_argument && space_seen then
1080:             unless src.scan(/\s/) then
1081:               arg_ambiguous
1082:               self.lex_strterm = [:strterm, STR_REGEXP, '/', "\00""]
1083:               self.yacc_value = "/"
1084:               return :tREGEXP_BEG
1085:             end
1086:           end
1087: 
1088:           self.fix_arg_lex_state
1089:           self.yacc_value = "/"
1090: 
1091:           return :tDIVIDE
1092:         elsif src.scan(/\^=/) then
1093:           self.lex_state = :expr_beg
1094:           self.yacc_value = "^"
1095:           return :tOP_ASGN
1096:         elsif src.scan(/\^/) then
1097:           self.fix_arg_lex_state
1098:           self.yacc_value = "^"
1099:           return :tCARET
1100:         elsif src.scan(/\;/) then
1101:           self.command_start = true
1102:           self.lex_state = :expr_beg
1103:           self.yacc_value = ";"
1104:           return :tSEMI
1105:         elsif src.scan(/\~/) then
1106:           if lex_state == :expr_fname || lex_state == :expr_dot then
1107:             src.scan(/@/)
1108:           end
1109: 
1110:           self.fix_arg_lex_state
1111:           self.yacc_value = "~"
1112: 
1113:           return :tTILDE
1114:         elsif src.scan(/\\/) then
1115:           if src.scan(/\n/) then
1116:             self.lineno = nil
1117:             space_seen = true
1118:             next
1119:           end
1120:           rb_compile_error "bare backslash only allowed before newline"
1121:         elsif src.scan(/\%/) then
1122:           if lex_state == :expr_beg || lex_state == :expr_mid then
1123:             return parse_quote
1124:           end
1125: 
1126:           if src.scan(/\=/) then
1127:             self.lex_state = :expr_beg
1128:             self.yacc_value = "%"
1129:             return :tOP_ASGN
1130:           end
1131: 
1132:           if lex_state.is_argument && space_seen && ! src.check(/\s/) then
1133:             return parse_quote
1134:           end
1135: 
1136:           self.fix_arg_lex_state
1137:           self.yacc_value = "%"
1138: 
1139:           return :tPERCENT
1140:         elsif src.check(/\$/) then
1141:           if src.scan(/(\$_)(\w+)/) then
1142:             self.lex_state = :expr_end
1143:             self.token = src.matched
1144:             return process_token(command_state)
1145:           elsif src.scan(/\$_/) then
1146:             self.lex_state = :expr_end
1147:             self.token = src.matched
1148:             self.yacc_value = src.matched
1149:             return :tGVAR
1150:           elsif src.scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
1151:             self.lex_state = :expr_end
1152:             self.yacc_value = src.matched
1153:             return :tGVAR
1154:           elsif src.scan(/\$([\&\`\'\+])/) then
1155:             self.lex_state = :expr_end
1156:             # Explicit reference to these vars as symbols...
1157:             if last_state == :expr_fname then
1158:               self.yacc_value = src.matched
1159:               return :tGVAR
1160:             else
1161:               self.yacc_value = src[1].to_sym
1162:               return :tBACK_REF
1163:             end
1164:           elsif src.scan(/\$([1-9]\d*)/) then
1165:             self.lex_state = :expr_end
1166:             if last_state == :expr_fname then
1167:               self.yacc_value = src.matched
1168:               return :tGVAR
1169:             else
1170:               self.yacc_value = src[1].to_i
1171:               return :tNTH_REF
1172:             end
1173:           elsif src.scan(/\$0/) then
1174:             self.lex_state = :expr_end
1175:             self.token = src.matched
1176:             return process_token(command_state)
1177:           elsif src.scan(/\$\W|\$\z/) then # TODO: remove?
1178:             self.lex_state = :expr_end
1179:             self.yacc_value = "$"
1180:             return "$"
1181:           elsif src.scan(/\$\w+/)
1182:             self.lex_state = :expr_end
1183:             self.token = src.matched
1184:             return process_token(command_state)
1185:           end
1186:         elsif src.check(/\_/) then
1187:           if src.beginning_of_line? && src.scan(/\__END__(\n|\Z)/) then
1188:             self.lineno = nil
1189:             return RubyLexer::EOF
1190:           elsif src.scan(/\_\w*/) then
1191:             self.token = src.matched
1192:             return process_token(command_state)
1193:           end
1194:         end
1195:       end # END OF CASE
1196: 
1197:       if src.scan(/\0004|\0032|\0000/) || src.eos? then # ^D, ^Z, EOF
1198:         return RubyLexer::EOF
1199:       else # alpha check
1200:         if src.scan(/\W/) then
1201:           rb_compile_error "Invalid char #{src.matched.inspect} in expression"
1202:         end
1203:       end
1204: 
1205:       self.token = src.matched if self.src.scan(/\w+/)
1206: 
1207:       return process_token(command_state)
1208:     end
1209:   end
yylex_string() click to toggle source
      # File lib/ruby_lexer.rb, line 1298
1298:   def yylex_string # 23 lines
1299:     token = if lex_strterm[0] == :heredoc then
1300:               self.heredoc lex_strterm
1301:             else
1302:               self.parse_string lex_strterm
1303:             end
1304: 
1305:     if token == :tSTRING_END || token == :tREGEXP_END then
1306:       self.lineno      = nil
1307:       self.lex_strterm = nil
1308:       self.lex_state   = :expr_end
1309:     end
1310: 
1311:     return token
1312:   end

Disabled; run with --debug to generate this.

[Validate]

Generated with the Darkfish Rdoc Generator 1.1.6.