Create a new document
static VALUE new(int argc, VALUE *argv, VALUE klass) { VALUE uri, external_id, rest, rb_doc; htmlDocPtr doc; rb_scan_args(argc, argv, "0*", &rest); uri = rb_ary_entry(rest, (long)0); external_id = rb_ary_entry(rest, (long)1); doc = htmlNewDoc( RTEST(uri) ? (const xmlChar *)StringValuePtr(uri) : NULL, RTEST(external_id) ? (const xmlChar *)StringValuePtr(external_id) : NULL ); rb_doc = Nokogiri_wrap_xml_document(klass, doc); rb_obj_call_init(rb_doc, argc, argv); return rb_doc ; }
Parse HTML. string_or_io may be a String, or any object that responds to read and close such as an IO, or StringIO. url is resource where this document is located. encoding is the encoding that should be used when processing the document. options is a number that sets options in the parser, such as Nokogiri::XML::ParseOptions::RECOVER. See the constants in Nokogiri::XML::ParseOptions.
# File lib/nokogiri/html/document.rb, line 83 83: def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML 84: 85: options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options 86: # Give the options to the user 87: yield options if block_given? 88: 89: if string_or_io.respond_to?(:encoding) 90: unless string_or_io.encoding.name == "ASCII-8BIT" 91: encoding ||= string_or_io.encoding.name 92: end 93: end 94: 95: if string_or_io.respond_to?(:read) 96: url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil 97: if !encoding 98: # Libxml2's parser has poor support for encoding 99: # detection. First, it does not recognize the HTML5 100: # style meta charset declaration. Secondly, even if it 101: # successfully detects an encoding hint, it does not 102: # re-decode or re-parse the preceding part which may be 103: # garbled. 104: # 105: # EncodingReader aims to perform advanced encoding 106: # detection beyond what Libxml2 does, and to emulate 107: # rewinding of a stream and make Libxml2 redo parsing 108: # from the start when an encoding hint is found. 109: string_or_io = EncodingReader.new(string_or_io) 110: begin 111: return read_io(string_or_io, url, encoding, options.to_i) 112: rescue EncodingFound => e 113: encoding = e.found_encoding 114: end 115: end 116: return read_io(string_or_io, url, encoding, options.to_i) 117: end 118: 119: # read_memory pukes on empty docs 120: return new if string_or_io.nil? or string_or_io.empty? 121: 122: encoding ||= EncodingReader.detect_encoding(string_or_io) 123: 124: read_memory(string_or_io, url, encoding, options.to_i) 125: end
Read the HTML document from io with given url, encoding, and options. See Nokogiri::HTML.parse
static VALUE read_io( VALUE klass, VALUE io, VALUE url, VALUE encoding, VALUE options ) { const char * c_url = NIL_P(url) ? NULL : StringValuePtr(url); const char * c_enc = NIL_P(encoding) ? NULL : StringValuePtr(encoding); VALUE error_list = rb_ary_new(); VALUE document; htmlDocPtr doc; xmlResetLastError(); xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher); doc = htmlReadIO( io_read_callback, io_close_callback, (void *)io, c_url, c_enc, (int)NUM2INT(options) ); xmlSetStructuredErrorFunc(NULL, NULL); /* * If EncodingFound has occurred in EncodingReader, make sure to do * a cleanup and propagate the error. */ if (rb_respond_to(io, id_encoding_found)) { VALUE encoding_found = rb_funcall(io, id_encoding_found, 0); if (!NIL_P(encoding_found)) { xmlFreeDoc(doc); rb_exc_raise(encoding_found); } } if(doc == NULL) { xmlErrorPtr error; xmlFreeDoc(doc); error = xmlGetLastError(); if(error) rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)); else rb_raise(rb_eRuntimeError, "Could not parse document"); return Qnil; } document = Nokogiri_wrap_xml_document(klass, doc); rb_iv_set(document, "@errors", error_list); return document; }
Read the HTML document contained in string with given url, encoding, and options. See Nokogiri::HTML.parse
static VALUE read_memory( VALUE klass, VALUE string, VALUE url, VALUE encoding, VALUE options ) { const char * c_buffer = StringValuePtr(string); const char * c_url = NIL_P(url) ? NULL : StringValuePtr(url); const char * c_enc = NIL_P(encoding) ? NULL : StringValuePtr(encoding); int len = (int)RSTRING_LEN(string); VALUE error_list = rb_ary_new(); VALUE document; htmlDocPtr doc; xmlResetLastError(); xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher); doc = htmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options)); xmlSetStructuredErrorFunc(NULL, NULL); if(doc == NULL) { xmlErrorPtr error; xmlFreeDoc(doc); error = xmlGetLastError(); if(error) rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)); else rb_raise(rb_eRuntimeError, "Could not parse document"); return Qnil; } document = Nokogiri_wrap_xml_document(klass, doc); rb_iv_set(document, "@errors", error_list); return document; }
Create a Nokogiri::XML::DocumentFragment from tags
# File lib/nokogiri/html/document.rb, line 70 70: def fragment tags = nil 71: DocumentFragment.new(self, tags, self.root) 72: end
Get the meta tag encoding for this document. If there is no meta tag, then nil is returned.
# File lib/nokogiri/html/document.rb, line 7 7: def meta_encoding 8: meta = meta_content_type and 9: match = /charset\s*=\s*([\w-]+)/.match(meta['content']) and 10: match[1] 11: end
Set the meta tag encoding for this document. If there is no meta content tag, the encoding is not set.
# File lib/nokogiri/html/document.rb, line 16 16: def meta_encoding= encoding 17: meta = meta_content_type and 18: meta['content'] = "text/html; charset=%s" % encoding 19: end
Serialize Node using options. Save options can also be set using a block. See SaveOptions.
These two statements are equivalent:
node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
or
node.serialize(:encoding => 'UTF-8') do |config| config.format.as_xml end
# File lib/nokogiri/html/document.rb, line 63 63: def serialize options = {} 64: options[:save_with] ||= XML::Node::SaveOptions::DEFAULT_HTML 65: super 66: end
Get the title string of this document. Return nil if there is no title tag.
# File lib/nokogiri/html/document.rb, line 33 33: def title 34: title = at('title') and title.inner_text 35: end
Set the title string of this document. If there is no head element, the title is not set.
# File lib/nokogiri/html/document.rb, line 40 40: def title=(text) 41: unless title = at('title') 42: head = at('head') or return nil 43: title = Nokogiri::XML::Node.new('title', self) 44: head << title 45: end 46: title.children = XML::Text.new(text, self) 47: end
Disabled; run with --debug to generate this.
Generated with the Darkfish Rdoc Generator 1.1.6.