CodeRay::Scanners::HTML

HTML Scanner

Alias: xhtml

See also: Scanners::XML

Constants

EVENT_ATTRIBUTES
IN_ATTRIBUTE

Public Instance Methods

reset() click to toggle source
    # File lib/coderay/scanners/html.rb, line 61
61:     def reset
62:       super
63:       @state = :initial
64:       @plain_string_content = nil
65:     end

Protected Instance Methods

scan_java_script(encoder, code) click to toggle source
    # File lib/coderay/scanners/html.rb, line 74
74:     def scan_java_script encoder, code
75:       if code && !code.empty?
76:         @java_script_scanner ||= Scanners::JavaScript.new '', :keep_tokens => true
77:         # encoder.begin_group :inline
78:         @java_script_scanner.tokenize code, :tokens => encoder
79:         # encoder.end_group :inline
80:       end
81:     end
scan_tokens(encoder, options) click to toggle source
     # File lib/coderay/scanners/html.rb, line 83
 83:     def scan_tokens encoder, options
 84:       state = options[:state] || @state
 85:       plain_string_content = @plain_string_content
 86:       in_tag = in_attribute = nil
 87:       
 88:       encoder.begin_group :string if state == :attribute_value_string
 89:       
 90:       until eos?
 91:         
 92:         if state != :in_special_tag && match = scan(/\s+/)
 93:           encoder.text_token match, :space
 94:           
 95:         else
 96:           
 97:           case state
 98:           
 99:           when :initial
100:             if match = scan(/<!--(?:.*?-->|.*)/)
101:               encoder.text_token match, :comment
102:             elsif match = scan(/<!DOCTYPE(?:.*?>|.*)/)
103:               encoder.text_token match, :doctype
104:             elsif match = scan(/<\?xml(?:.*?\?>|.*)/)
105:               encoder.text_token match, :preprocessor
106:             elsif match = scan(/<\?(?:.*?\?>|.*)/)
107:               encoder.text_token match, :comment
108:             elsif match = scan(/<\/[-\w.:]*>?/)
109:               in_tag = nil
110:               encoder.text_token match, :tag
111:             elsif match = scan(/<(?:(script)|[-\w.:]+)(>)?/)
112:               encoder.text_token match, :tag
113:               in_tag = self[1]
114:               if self[2]
115:                 state = :in_special_tag if in_tag
116:               else
117:                 state = :attribute
118:               end
119:             elsif match = scan(/[^<>&]+/)
120:               encoder.text_token match, :plain
121:             elsif match = scan(/#{ENTITY}/x)
122:               encoder.text_token match, :entity
123:             elsif match = scan(/[<>&]/)
124:               in_tag = nil
125:               encoder.text_token match, :error
126:             else
127:               raise_inspect '[BUG] else-case reached with state %p' % [state], encoder
128:             end
129:             
130:           when :attribute
131:             if match = scan(/#{TAG_END}/)
132:               encoder.text_token match, :tag
133:               in_attribute = nil
134:               if in_tag
135:                 state = :in_special_tag
136:               else
137:                 state = :initial
138:               end
139:             elsif match = scan(/#{ATTR_NAME}/)
140:               in_attribute = IN_ATTRIBUTE[match]
141:               encoder.text_token match, :attribute_name
142:               state = :attribute_equal
143:             else
144:               in_tag = nil
145:               encoder.text_token getch, :error
146:             end
147:             
148:           when :attribute_equal
149:             if match = scan(/=/)  #/
150:               encoder.text_token match, :operator
151:               state = :attribute_value
152:             elsif scan(/#{ATTR_NAME}/) || scan(/#{TAG_END}/)
153:               state = :attribute
154:               next
155:             else
156:               encoder.text_token getch, :error
157:               state = :attribute
158:             end
159:             
160:           when :attribute_value
161:             if match = scan(/#{ATTR_NAME}/)
162:               encoder.text_token match, :attribute_value
163:               state = :attribute
164:             elsif match = scan(/["']/)
165:               if in_attribute == :script
166:                 encoder.begin_group :inline
167:                 encoder.text_token match, :inline_delimiter
168:                 if scan(/javascript:[ \t]*/)
169:                   encoder.text_token matched, :comment
170:                 end
171:                 code = scan_until(match == '"' ? /(?="|\z)/ : /(?='|\z)/)
172:                 scan_java_script encoder, code
173:                 match = scan(/["']/)
174:                 encoder.text_token match, :inline_delimiter if match
175:                 encoder.end_group :inline
176:                 state = :attribute
177:                 in_attribute = nil
178:               else
179:                 encoder.begin_group :string
180:                 state = :attribute_value_string
181:                 plain_string_content = PLAIN_STRING_CONTENT[match]
182:                 encoder.text_token match, :delimiter
183:               end
184:             elsif match = scan(/#{TAG_END}/)
185:               encoder.text_token match, :tag
186:               state = :initial
187:             else
188:               encoder.text_token getch, :error
189:             end
190:             
191:           when :attribute_value_string
192:             if match = scan(plain_string_content)
193:               encoder.text_token match, :content
194:             elsif match = scan(/['"]/)
195:               encoder.text_token match, :delimiter
196:               encoder.end_group :string
197:               state = :attribute
198:             elsif match = scan(/#{ENTITY}/x)
199:               encoder.text_token match, :entity
200:             elsif match = scan(/&/)
201:               encoder.text_token match, :content
202:             elsif match = scan(/[\n>]/)
203:               encoder.end_group :string
204:               state = :initial
205:               encoder.text_token match, :error
206:             end
207:             
208:           when :in_special_tag
209:             case in_tag
210:             when 'script'
211:               encoder.text_token match, :space if match = scan(/[ \t]*\n/)
212:               if scan(/(\s*<!--)(?:(.*?)(-->)|(.*))/)
213:                 code = self[2] || self[4]
214:                 closing = self[3]
215:                 encoder.text_token self[1], :comment
216:               else
217:                 code = scan_until(/(?=(?:\n\s*)?<\/script>)|\z/)
218:                 closing = false
219:               end
220:               unless code.empty?
221:                 encoder.begin_group :inline
222:                 scan_java_script encoder, code
223:                 encoder.end_group :inline
224:               end
225:               encoder.text_token closing, :comment if closing
226:               state = :initial
227:             else
228:               raise 'unknown special tag: %p' % [in_tag]
229:             end
230:             
231:           else
232:             raise_inspect 'Unknown state: %p' % [state], encoder
233:             
234:           end
235:           
236:         end
237:         
238:       end
239:       
240:       if options[:keep_state]
241:         @state = state
242:         @plain_string_content = plain_string_content
243:       end
244:       
245:       encoder.end_group :string if state == :attribute_value_string
246:       
247:       encoder
248:     end
setup() click to toggle source
    # File lib/coderay/scanners/html.rb, line 69
69:     def setup
70:       @state = :initial
71:       @plain_string_content = nil
72:     end

Disabled; run with --debug to generate this.

[Validate]

Generated with the Darkfish Rdoc Generator 1.1.6.