Class Index [+]

Quicksearch

Nokogiri::XML::Document

Nokogiri::XML::Document is the main entry point for dealing with XML documents. The Document is created by parsing an XML document. See Nokogiri::XML::Document.parse() for more information on parsing.

For searching a Document, see Nokogiri::XML::Node#css and Nokogiri::XML::Node#xpath

Constants

NCNAME_START_CHAR

I’m ignoring unicode characters here. See www.w3.org/TR/REC-xml-names/#ns-decl for more details.

NCNAME_CHAR
NCNAME_RE

Attributes

errors[RW]

A list of Nokogiri::XML::SyntaxError found when parsing a document

Public Class Methods

new(version = default) click to toggle source

Create a new document with version (defaults to “1.0“)

static VALUE new(int argc, VALUE *argv, VALUE klass)
{
  xmlDocPtr doc;
  VALUE version, rest, rb_doc ;

  rb_scan_args(argc, argv, "0*", &rest);
  version = rb_ary_entry(rest, (long)0);
  if (NIL_P(version)) version = rb_str_new2("1.0");

  doc = xmlNewDoc((xmlChar *)StringValuePtr(version));
  rb_doc = Nokogiri_wrap_xml_document(klass, doc);
  rb_obj_call_init(rb_doc, argc, argv);
  return rb_doc ;
}
parse(string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block) click to toggle source

Parse an XML file.

string_or_io may be a String, or any object that responds to read and close such as an IO, or StringIO.

url (optional) is the URI where this document is located.

encoding (optional) is the encoding that should be used when processing the document.

options (optional) is a configuration object that sets options during parsing, such as Nokogiri::XML::ParseOptions::RECOVER. See the Nokogiri::XML::ParseOptions for more information.

block (optional) is passed a configuration object on which parse options may be set.

When parsing untrusted documents, it’s recommended that the nonet option be used, as shown in this example code:

  Nokogiri::XML::Document.parse(xml_string) { |config| config.nonet }

Nokogiri.XML() is a convenience method which will call this method.

    # File lib/nokogiri/xml/document.rb, line 43
43:       def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block
44:         options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
45:         # Give the options to the user
46:         yield options if block_given?
47: 
48:         doc = if string_or_io.respond_to?(:read)
49:           url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
50:           read_io(string_or_io, url, encoding, options.to_i)
51:         else
52:           # read_memory pukes on empty docs
53:           return new if string_or_io.nil? or string_or_io.empty?
54:           read_memory(string_or_io, url, encoding, options.to_i)
55:         end
56: 
57:         # do xinclude processing
58:         doc.do_xinclude(options) if options.xinclude?
59: 
60:         return doc
61:       end
read_io(io, url, encoding, options) click to toggle source

Create a new document from an IO object

static VALUE read_io( VALUE klass,
                      VALUE io,
                      VALUE url,
                      VALUE encoding,
                      VALUE options )
{
  const char * c_url    = NIL_P(url)      ? NULL : StringValuePtr(url);
  const char * c_enc    = NIL_P(encoding) ? NULL : StringValuePtr(encoding);
  VALUE error_list      = rb_ary_new();
  VALUE document;
  xmlDocPtr doc;

  xmlResetLastError();
  xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);

  doc = xmlReadIO(
      (xmlInputReadCallback)io_read_callback,
      (xmlInputCloseCallback)io_close_callback,
      (void *)io,
      c_url,
      c_enc,
      (int)NUM2INT(options)
  );
  xmlSetStructuredErrorFunc(NULL, NULL);

  if(doc == NULL) {
    xmlErrorPtr error;

    xmlFreeDoc(doc);

    error = xmlGetLastError();
    if(error)
      rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
    else
      rb_raise(rb_eRuntimeError, "Could not parse document");

    return Qnil;
  }

  document = Nokogiri_wrap_xml_document(klass, doc);
  rb_iv_set(document, "@errors", error_list);
  return document;
}
read_memory(string, url, encoding, options) click to toggle source

Create a new document from a String

static VALUE read_memory( VALUE klass,
                          VALUE string,
                          VALUE url,
                          VALUE encoding,
                          VALUE options )
{
  const char * c_buffer = StringValuePtr(string);
  const char * c_url    = NIL_P(url)      ? NULL : StringValuePtr(url);
  const char * c_enc    = NIL_P(encoding) ? NULL : StringValuePtr(encoding);
  int len               = (int)RSTRING_LEN(string);
  VALUE error_list      = rb_ary_new();
  VALUE document;
  xmlDocPtr doc;

  xmlResetLastError();
  xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
  doc = xmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options));
  xmlSetStructuredErrorFunc(NULL, NULL);

  if(doc == NULL) {
    xmlErrorPtr error;

    xmlFreeDoc(doc);

    error = xmlGetLastError();
    if(error)
      rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
    else
      rb_raise(rb_eRuntimeError, "Could not parse document");

    return Qnil;
  }

  document = Nokogiri_wrap_xml_document(klass, doc);
  rb_iv_set(document, "@errors", error_list);
  return document;
}
wrap(document) click to toggle source

JRuby Wraps Java’s org.w3c.dom.document and returns Nokogiri::XML::Document

     # File lib/nokogiri/xml/document.rb, line 244
244:       def self.wrap document
245:         raise "JRuby only method" unless Nokogiri.jruby?
246:         return wrapJavaDocument(document)
247:       end

Public Instance Methods

<<(node_or_tags) click to toggle source
Alias for: add_child
add_child(node_or_tags) click to toggle source
     # File lib/nokogiri/xml/document.rb, line 229
229:       def add_child node_or_tags
230:         raise "Document already has a root node" if root
231:         node_or_tags = coerce(node_or_tags)
232:         if node_or_tags.is_a?(XML::NodeSet)
233:           raise "Document cannot have multiple root nodes" if node_or_tags.size > 1
234:           super(node_or_tags.first)
235:         else
236:           super
237:         end
238:       end
Also aliased as: <<
} click to toggle source

Canonicalize a document and return the results. Takes an optional block that takes two parameters: the obj and that node’s parent. The obj will be either a Nokogiri::XML::Node, or a Nokogiri::XML::Namespace The block must return a non-nil, non-false value if the obj passed in should be included in the canonicalized document.

static VALUE canonicalize(int argc, VALUE* argv, VALUE self)
{
  VALUE mode;
  VALUE incl_ns;
  VALUE with_comments;
  xmlChar **ns;
  long ns_len, i;

  xmlDocPtr doc;
  xmlOutputBufferPtr buf;
  xmlC14NIsVisibleCallback cb = NULL;
  void * ctx = NULL;

  VALUE rb_cStringIO;
  VALUE io;

  rb_scan_args(argc, argv, "03", &mode, &incl_ns, &with_comments);

  Data_Get_Struct(self, xmlDoc, doc);

  rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
  io           = rb_class_new_instance(0, 0, rb_cStringIO);
  buf          = xmlAllocOutputBuffer(NULL);

  buf->writecallback = (xmlOutputWriteCallback)io_write_callback;
  buf->closecallback = (xmlOutputCloseCallback)io_close_callback;
  buf->context       = (void *)io;

  if(rb_block_given_p()) {
    cb = block_caller;
    ctx = (void *)rb_block_proc();
  }

  if(NIL_P(incl_ns)){
    ns = NULL;
  }
  else{
    ns_len = RARRAY_LEN(incl_ns);
    ns = calloc((size_t)ns_len+1, sizeof(xmlChar *));
    for (i = 0 ; i < ns_len ; i++) {
      VALUE entry = rb_ary_entry(incl_ns, i);
      const char * ptr = StringValuePtr(entry);
      ns[i] = (xmlChar*) ptr;
    }
  }


  xmlC14NExecute(doc, cb, ctx, 
    (int)      (NIL_P(mode)        ? 0 : NUM2INT(mode)), 
    ns,
    (int)      (NIL_P(with_comments)        ? 0 : 1),
    buf);

  xmlOutputBufferClose(buf);

  return rb_funcall(io, rb_intern("string"), 0);
}
clone(...) click to toggle source
Alias for: dup
collect_namespaces() click to toggle source

Recursively get all namespaces from this node and its subtree and return them as a hash.

For example, given this document:

  <root xmlns:foo="bar">
    <bar xmlns:hello="world" />
  </root>

This method will return:

  { 'xmlns:foo' => 'bar', 'xmlns:hello' => 'world' }

WARNING: this method will clobber duplicate names in the keys. For example, given this document:

  <root xmlns:foo="bar">
    <bar xmlns:foo="baz" />
  </root>

The hash returned will look like this: { ‘xmlns:foo’ => ‘bar’ }

Non-prefixed default namespaces (as in “xmlns=”) are not included in the hash.

Note this is a very expensive operation in current implementation, as it traverses the entire graph, and also has to bring each node across the libxml bridge into a ruby object.

     # File lib/nokogiri/xml/document.rb, line 155
155:       def collect_namespaces
156:         ns = {}
157:         traverse { |j| ns.merge!(j.namespaces) }
158:         ns
159:       end
create_cdata(string, &block) click to toggle source

Create a CDATA Node containing string

     # File lib/nokogiri/xml/document.rb, line 107
107:       def create_cdata string, &block
108:         Nokogiri::XML::CDATA.new self, string.to_s, &block
109:       end
create_comment(string, &block) click to toggle source

Create a Comment Node containing string

     # File lib/nokogiri/xml/document.rb, line 112
112:       def create_comment string, &block
113:         Nokogiri::XML::Comment.new self, string.to_s, &block
114:       end
create_element(name, *args, &block) click to toggle source

Create an element with name, and optionally setting the content and attributes.

  doc.create_element "div" # <div></div>
  doc.create_element "div", :class => "container" # <div class='container'></div>
  doc.create_element "div", "contents" # <div>contents</div>
  doc.create_element "div", "contents", :class => "container" # <div class='container'>contents</div>
  doc.create_element "div" { |node| node['class'] = "container" } # <div class='container'></div>
    # File lib/nokogiri/xml/document.rb, line 80
80:       def create_element name, *args, &block
81:         elm = Nokogiri::XML::Element.new(name, self, &block)
82:         args.each do |arg|
83:           case arg
84:           when Hash
85:             arg.each { |k,v|
86:               key = k.to_s
87:               if key =~ NCNAME_RE
88:                 ns_name = key.split(":", 2)[1]
89:                 elm.add_namespace_definition ns_name, v
90:                 next
91:               end
92:               elm[k.to_s] = v.to_s
93:             }
94:           else
95:             elm.content = arg
96:           end
97:         end
98:         elm
99:       end
create_entity(name, type, external_id, system_id, content) click to toggle source

Create a new entity named name.

type is an integer representing the type of entity to be created, and it defaults to Nokogiri::XML::EntityDecl::INTERNAL_GENERAL. See the constants on Nokogiri::XML::EntityDecl for more information.

external_id, system_id, and content set the External ID, System ID, and content respectively. All of these parameters are optional.

static VALUE create_entity(int argc, VALUE *argv, VALUE self)
{
  VALUE name;
  VALUE type;
  VALUE external_id;
  VALUE system_id;
  VALUE content;
  xmlEntityPtr ptr;
  xmlDocPtr doc ;

  Data_Get_Struct(self, xmlDoc, doc);

  rb_scan_args(argc, argv, "14", &name, &type, &external_id, &system_id,
      &content);

  xmlResetLastError();
  ptr = xmlAddDocEntity(
      doc,
      (xmlChar *)(NIL_P(name)        ? NULL                        : StringValuePtr(name)),
      (int)      (NIL_P(type)        ? XML_INTERNAL_GENERAL_ENTITY : NUM2INT(type)),
      (xmlChar *)(NIL_P(external_id) ? NULL                        : StringValuePtr(external_id)),
      (xmlChar *)(NIL_P(system_id)   ? NULL                        : StringValuePtr(system_id)),
      (xmlChar *)(NIL_P(content)     ? NULL                        : StringValuePtr(content))
    );

  if(NULL == ptr) {
    xmlErrorPtr error = xmlGetLastError();
    if(error)
      rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
    else
      rb_raise(rb_eRuntimeError, "Could not create entity");

    return Qnil;
  }

  return Nokogiri_wrap_xml_node(cNokogiriXmlEntityDecl, (xmlNodePtr)ptr);
}
create_text_node(string, &block) click to toggle source

Create a Text Node with string

     # File lib/nokogiri/xml/document.rb, line 102
102:       def create_text_node string, &block
103:         Nokogiri::XML::Text.new string.to_s, self, &block
104:       end
decorate(node) click to toggle source

Apply any decorators to node

     # File lib/nokogiri/xml/document.rb, line 202
202:       def decorate node
203:         return unless @decorators
204:         @decorators.each { |klass,list|
205:           next unless node.is_a?(klass)
206:           list.each { |moodule| node.extend(moodule) }
207:         }
208:       end
decorators(key) click to toggle source

Get the list of decorators given key

     # File lib/nokogiri/xml/document.rb, line 162
162:       def decorators key
163:         @decorators ||= Hash.new
164:         @decorators[key] ||= []
165:       end
document() click to toggle source

A reference to self

     # File lib/nokogiri/xml/document.rb, line 122
122:       def document
123:         self
124:       end
dup click to toggle source

Copy this Document. An optional depth may be passed in, but it defaults to a deep copy. 0 is a shallow copy, 1 is a deep copy.

static VALUE duplicate_node(int argc, VALUE *argv, VALUE self)
{
  xmlDocPtr doc, dup;
  VALUE level;

  if(rb_scan_args(argc, argv, "01", &level) == 0)
    level = INT2NUM((long)1);

  Data_Get_Struct(self, xmlDoc, doc);

  dup = xmlCopyDoc(doc, (int)NUM2INT(level));
  if(dup == NULL) return Qnil;

  dup->type = doc->type;
  return Nokogiri_wrap_xml_document(rb_obj_class(self), dup);
}
Also aliased as: clone
encoding click to toggle source

Get the encoding for this Document

static VALUE encoding(VALUE self)
{
  xmlDocPtr doc;
  Data_Get_Struct(self, xmlDoc, doc);

  if(!doc->encoding) return Qnil;
  return NOKOGIRI_STR_NEW2(doc->encoding);
}
encoding= encoding click to toggle source

Set the encoding string for this Document

static VALUE set_encoding(VALUE self, VALUE encoding)
{
  xmlDocPtr doc;
  Data_Get_Struct(self, xmlDoc, doc);

  if (doc->encoding)
      free((char *) doc->encoding); // this may produce a gcc cast warning

  doc->encoding = xmlStrdup((xmlChar *)StringValuePtr(encoding));

  return encoding;
}
fragment(tags = nil) click to toggle source

Create a Nokogiri::XML::DocumentFragment from tags Returns an empty fragment if tags is nil.

     # File lib/nokogiri/xml/document.rb, line 221
221:       def fragment tags = nil
222:         DocumentFragment.new(self, tags, self.root)
223:       end
name() click to toggle source

The name of this document. Always returns “document“

     # File lib/nokogiri/xml/document.rb, line 117
117:       def name
118:         'document'
119:       end
namespaces() click to toggle source

Get the hash of namespaces on the root Nokogiri::XML::Node

     # File lib/nokogiri/xml/document.rb, line 214
214:       def namespaces
215:         root ? root.namespaces : {}
216:       end
remove_namespaces! click to toggle source

Remove all namespaces from all nodes in the document.

This could be useful for developers who either don’t understand namespaces or don’t care about them.

The following example shows a use case, and you can decide for yourself whether this is a good thing or not:

  doc = Nokogiri::XML <<-EOXML
     <root>
       <car xmlns:part="http://general-motors.com/">
         <part:tire>Michelin Model XGV</part:tire>
       </car>
       <bicycle xmlns:part="http://schwinn.com/">
         <part:tire>I'm a bicycle tire!</part:tire>
       </bicycle>
     </root>
     EOXML
  
  doc.xpath("//tire").to_s # => ""
  doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => "<part:tire>Michelin Model XGV</part:tire>"
  doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => "<part:tire>I'm a bicycle tire!</part:tire>"
  
  doc.remove_namespaces!
  
  doc.xpath("//tire").to_s # => "<tire>Michelin Model XGV</tire><tire>I'm a bicycle tire!</tire>"
  doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => ""
  doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => ""

For more information on why this probably is not a good thing in general, please direct your browser to tenderlovemaking.com/2009/04/23/namespaces-in-xml/

VALUE remove_namespaces_bang(VALUE self)
{
  xmlDocPtr doc ;
  Data_Get_Struct(self, xmlDoc, doc);

  recursively_remove_namespaces_from_node((xmlNodePtr)doc);
  return self;
}
root click to toggle source

Get the root node for this document.

static VALUE root(VALUE self)
{
  xmlDocPtr doc;
  xmlNodePtr root;

  Data_Get_Struct(self, xmlDoc, doc);

  root = xmlDocGetRootElement(doc);

  if(!root) return Qnil;
  return Nokogiri_wrap_xml_node(Qnil, root) ;
}
root= click to toggle source

Set the root element on this document

static VALUE set_root(VALUE self, VALUE root)
{
  xmlDocPtr doc;
  xmlNodePtr new_root;
  xmlNodePtr old_root;

  Data_Get_Struct(self, xmlDoc, doc);

  old_root = NULL;

  if(NIL_P(root)) {
    old_root = xmlDocGetRootElement(doc);

    if(old_root) {
      xmlUnlinkNode(old_root);
      nokogiri_root_node(old_root);
    }

    return root;
  }

  Data_Get_Struct(root, xmlNode, new_root);


  /* If the new root's document is not the same as the current document,
   * then we need to dup the node in to this document. */
  if(new_root->doc != doc) {
    old_root = xmlDocGetRootElement(doc);
    if (!(new_root = xmlDocCopyNode(new_root, doc, 1))) {
      rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
    }
  }

  xmlDocSetRootElement(doc, new_root);
  if(old_root) nokogiri_root_node(old_root);
  return root;
}
slop!() click to toggle source

Explore a document with shortcut methods. See Nokogiri::Slop for details.

Note that any nodes that have been instantiated before # is called will not be decorated with sloppy behavior. So, if you’re in irb, the preferred idiom is:

  irb> doc = Nokogiri::Slop my_markup

and not

  irb> doc = Nokogiri::HTML my_markup
  ... followed by irb's implicit inspect (and therefore instantiation of every node) ...
  irb> doc.slop!
  ... which does absolutely nothing.
     # File lib/nokogiri/xml/document.rb, line 191
191:       def slop!
192:         unless decorators(XML::Node).include? Nokogiri::Decorators::Slop
193:           decorators(XML::Node) << Nokogiri::Decorators::Slop
194:           decorate!
195:         end
196: 
197:         self
198:       end
to_java() click to toggle source

JRuby Returns Java’s org.w3c.dom.document of this Document.

     # File lib/nokogiri/xml/document.rb, line 252
252:       def to_java
253:         raise "JRuby only method" unless Nokogiri.jruby?
254:         return toJavaDocument()
255:       end
url click to toggle source

Get the url name for this document.

static VALUE url(VALUE self)
{
  xmlDocPtr doc;
  Data_Get_Struct(self, xmlDoc, doc);

  if(doc->URL) return NOKOGIRI_STR_NEW2(doc->URL);

  return Qnil;
}
validate() click to toggle source

Validate this Document against it’s DTD. Returns a list of errors on the document or nil when there is no DTD.

     # File lib/nokogiri/xml/document.rb, line 170
170:       def validate
171:         return nil unless internal_subset
172:         internal_subset.validate self
173:       end
version click to toggle source

Get the XML version for this Document

static VALUE version(VALUE self)
{
  xmlDocPtr doc;
  Data_Get_Struct(self, xmlDoc, doc);

  if(!doc->version) return Qnil;
  return NOKOGIRI_STR_NEW2(doc->version);
}

Private Instance Methods

implied_xpath_context() click to toggle source
     # File lib/nokogiri/xml/document.rb, line 258
258:       def implied_xpath_context
259:         "/"
260:       end
inspect_attributes() click to toggle source
     # File lib/nokogiri/xml/document.rb, line 262
262:       def inspect_attributes
263:         [:name, :children]
264:       end

Disabled; run with --debug to generate this.

[Validate]

Generated with the Darkfish Rdoc Generator 1.1.6.