This class encapsulates an HTML page. If Mechanize finds a content type of ‘text/html’, this class will be instantiated and returned.
Example:
require 'mechanize' agent = Mechanize.new agent.get('http://google.com/').class # => Mechanize::Page
# File lib/mechanize/page.rb, line 406 406: def charset content_type 407: charset = content_type[/;(?:\s*,)?\s*charset\s*=\s*([^()<>@,;:\\\"\/\[\]?={}\s]+)/, 1] 408: return nil if charset == 'none' 409: charset 410: end
Retrieves all charsets from meta tags in body
# File lib/mechanize/page.rb, line 428 428: def self.meta_charset body 429: # HACK use .map 430: body.scan(/<meta .*?>/).map do |meta| 431: if meta =~ /charset\s*=\s*(["'])?\s*(.+)\s*\11// then 432: $2 433: elsif meta =~ /http-equiv\s*=\s*(["'])?content-type\11// then 434: meta =~ /content\s*=\s*(["'])?(.*?)\11// 435: 436: m_charset = charset $2 if $2 437: 438: m_charset if m_charset 439: end 440: end.compact 441: end
Retrieves the last content-type set by a meta tag in body
# File lib/mechanize/page.rb, line 446 446: def self.meta_content_type body 447: body.scan(/<meta .*?>/).reverse.map do |meta| 448: if meta =~ /http-equiv\s*=\s*(["'])?content-type\11// then 449: meta =~ /content=(["'])?(.*?)\11// 450: 451: return $2 452: end 453: end 454: 455: nil 456: end
# File lib/mechanize/page.rb, line 27 27: def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil) 28: response ||= DEFAULT_RESPONSE 29: 30: @meta_content_type = nil 31: @encoding = nil 32: @encodings = [nil] 33: raise 'no' if mech and not Mechanize === mech 34: @mech = mech 35: 36: reset 37: 38: @encodings << Mechanize::Util.detect_charset(body) if body 39: 40: @encodings.concat self.class.response_header_charset(response) 41: 42: if body 43: # Force the encoding to be 8BIT so we can perform regular expressions. 44: # We'll set it to the detected encoding later 45: body.force_encoding 'ASCII-8BIT' if body.respond_to? :force_encoding 46: 47: @encodings.concat self.class.meta_charset body 48: 49: meta_content_type = self.class.meta_content_type body 50: @meta_content_type = meta_content_type if meta_content_type 51: end 52: 53: @encodings << mech.default_encoding if mech and mech.default_encoding 54: 55: super uri, response, body, code 56: end
# File lib/mechanize/page.rb, line 415 415: def self.response_header_charset response 416: charsets = [] 417: response.each do |header, value| 418: next unless header == 'content-type' 419: next unless value =~ /charset/ 420: charsets << charset(value) 421: end 422: charsets 423: end
Search through the page for path under namespace using Nokogiri’s #. The path may be either a CSS or XPath expression.
See also Nokogiri::XML::Node#at
# File lib/mechanize/page.rb, line 212 212: def_delegator :parser, :at, :at
Find a single base tag matching criteria. Example:
page.base_with(:href => /foo/).click
# File lib/mechanize/page.rb, line 259
259:
Return a list of all base tags
# File lib/mechanize/page.rb, line 357 357: def bases 358: @bases ||= 359: search('base').map { |node| Base.new(node, @mech, self) } 360: end
Find all base tags matching criteria. Example:
page.bases_with(:href => /foo/).each do |base| puts base.href end
# File lib/mechanize/page.rb, line 268 268: elements_with :base
Return the canonical URI for the page if there is a link tag with href=“canonical”.
# File lib/mechanize/page.rb, line 177 177: def canonical_uri 178: link = at('link[@rel="canonical"][@href]') 179: return unless link 180: href = link['href'] 181: 182: URI href 183: rescue URI::InvalidURIError 184: URI Mechanize::Util.uri_escape href 185: end
Get the content type
# File lib/mechanize/page.rb, line 188 188: def content_type 189: @meta_content_type || response['content-type'] 190: end
# File lib/mechanize/page.rb, line 74 74: def detected_encoding 75: Mechanize::Util.detect_charset(body) 76: end
# File lib/mechanize/page.rb, line 94 94: def encoding 95: parser.respond_to?(:encoding) ? parser.encoding : nil 96: end
# File lib/mechanize/page.rb, line 78 78: def encoding=(encoding) 79: reset 80: 81: @encoding = encoding 82: 83: if @parser 84: parser_encoding = @parser.encoding 85: if parser_encoding && encoding && parser_encoding.casecmp(encoding) != 0 86: # lazy reinitialize the parser with the new encoding 87: @parser = nil 88: end 89: end 90: 91: encoding 92: end
Return whether parser result has errors related to encoding or not. false indicates just parser has no encoding errors, not encoding is vaild.
# File lib/mechanize/page.rb, line 100 100: def encoding_error?(parser=nil) 101: parser = self.parser unless parser 102: return false if parser.errors.empty? 103: parser.errors.any? do |error| 104: error.message =~ /(indicate\ encoding)| 105: (Invalid\ char)| 106: (input\ conversion\ failed)/ 107: end 108: end
Find a single form matching criteria. Example:
page.form_with(:action => '/post/login.php') do |f| ... end
# File lib/mechanize/page.rb, line 223
223:
Return a list of all form tags
# File lib/mechanize/page.rb, line 336 336: def forms 337: @forms ||= search('form').map do |html_form| 338: form = Mechanize::Form.new(html_form, @mech, self) 339: form.action ||= @uri.to_s 340: form 341: end 342: end
Find all forms form matching criteria. Example:
page.forms_with(:action => '/post/login.php').each do |f| ... end
# File lib/mechanize/page.rb, line 232 232: elements_with :form
Find a single frame tag matching criteria. Example:
page.frame_with(:src => /foo/).click
# File lib/mechanize/page.rb, line 277
277:
Return a list of all frame tags
# File lib/mechanize/page.rb, line 364 364: def frames 365: @frames ||= 366: search('frame').map { |node| Frame.new(node, @mech, self) } 367: end
Find all frame tags matching criteria. Example:
page.frames_with(:src => /foo/).each do |frame| p frame.src end
# File lib/mechanize/page.rb, line 286 286: elements_with :frame
Find a single iframe tag matching criteria. Example:
page.iframe_with(:src => /foo/).click
# File lib/mechanize/page.rb, line 295
295:
Return a list of all iframe tags
# File lib/mechanize/page.rb, line 371 371: def iframes 372: @iframes ||= 373: search('iframe').map { |node| Frame.new(node, @mech, self) } 374: end
Find all iframe tags matching criteria. Example:
page.iframes_with(:src => /foo/).each do |iframe| p iframe.src end
# File lib/mechanize/page.rb, line 304 304: elements_with :iframe
# File lib/mechanize/page.rb, line 383 383: def image_urls 384: @image_urls ||= images.map(&:url).uniq 385: end
Find a single image matching criteria. Example:
page.image_with(:alt => /main/).fetch.save
# File lib/mechanize/page.rb, line 313
313:
Return a list of all img tags
# File lib/mechanize/page.rb, line 378 378: def images 379: @images ||= 380: search('img').map { |node| Image.new(node, self) } 381: end
Find all images matching criteria. Example:
page.images_with(:src => /jpg\Z/).each do |img| img.fetch.save end
# File lib/mechanize/page.rb, line 322 322: elements_with :image
Return a list of all label tags
# File lib/mechanize/page.rb, line 389 389: def labels 390: @labels ||= 391: search('label').map { |node| Label.new(node, self) } 392: end
# File lib/mechanize/page.rb, line 394 394: def labels_hash 395: unless @labels_hash 396: hash = {} 397: labels.each do |label| 398: hash[label.node['for']] = label if label.for 399: end 400: @labels_hash = hash 401: end 402: return @labels_hash 403: end
Find a single link matching criteria. Example:
page.link_with(:href => /foo/).click
# File lib/mechanize/page.rb, line 241
241:
Return a list of all link and area tags
# File lib/mechanize/page.rb, line 326 326: def links 327: @links ||= %{ a area }.map do |tag| 328: search(tag).map do |node| 329: Link.new(node, @mech, self) 330: end 331: end.flatten 332: end
Find all links matching criteria. Example:
page.links_with(:href => /foo/).each do |link| puts link.href end
# File lib/mechanize/page.rb, line 250 250: elements_with :link
# File lib/mechanize/page.rb, line 70 70: def meta_charset 71: self.class.meta_charset(body) 72: end
Return a list of all meta refresh elements
# File lib/mechanize/page.rb, line 347 347: def meta_refresh 348: query = @mech.follow_meta_refresh == :anywhere ? 'meta' : 'head > meta' 349: 350: @meta_refresh ||= search(query).map do |node| 351: MetaRefresh.from_node node, self 352: end.compact 353: end
# File lib/mechanize/page.rb, line 110 110: def parser 111: return @parser if @parser 112: return nil unless @body 113: 114: if @encoding then 115: @parser = @mech.html_parser.parse html_body, nil, @encoding 116: elsif mech.force_default_encoding then 117: @parser = @mech.html_parser.parse html_body, nil, @mech.default_encoding 118: else 119: @encodings.reverse_each do |encoding| 120: @parser = @mech.html_parser.parse html_body, nil, encoding 121: 122: break unless encoding_error? @parser 123: end 124: end 125: 126: @parser 127: end
# File lib/mechanize/page.rb, line 162 162: def reset 163: @bases = nil 164: @forms = nil 165: @frames = nil 166: @iframes = nil 167: @links = nil 168: @labels = nil 169: @labels_hash = nil 170: @meta_refresh = nil 171: @parser = nil 172: @title = nil 173: end
# File lib/mechanize/page.rb, line 66 66: def response_header_charset 67: self.class.response_header_charset(response) 68: end
Search for paths in the page using Nokogiri’s #. The paths can be XPath or CSS and an optional Hash of namespaces may be appended.
See Nokogiri::XML::Node#search for further details.
# File lib/mechanize/page.rb, line 200 200: def_delegator :parser, :search, :search
Disabled; run with --debug to generate this.
Generated with the Darkfish Rdoc Generator 1.1.6.