Convert a Nokogiri document to a Ruby Hash
If you want to convert a Nokogiri XML document to a hash, just do the following:
require 'active_support/core_ext/hash/conversions'hash = Hash.from_xml(nokogiri_document.to_s)
Here's a far simpler version that creates a robust Hash that includes namespace information, both for elements and attributes:
require 'nokogiri'class Nokogiri::XML::Node TYPENAMES = {1=>'element',2=>'attribute',3=>'text',4=>'cdata',8=>'comment'} def to_hash {kind:TYPENAMES[node_type],name:name}.tap do |h| h.merge! nshref:namespace.href, nsprefix:namespace.prefix if namespace h.merge! text:text h.merge! attr:attribute_nodes.map(&:to_hash) if element? h.merge! kids:children.map(&:to_hash) if element? end endendclass Nokogiri::XML::Document def to_hash; root.to_hash; endend
Seen in action:
xml = '<r a="b" xmlns:z="foo"><z:a>Hello <b z:m="n" x="y">World</b>!</z:a></r>'doc = Nokogiri::XML(xml)p doc.to_hash#=> {#=> :kind=>"element",#=> :name=>"r",#=> :text=>"Hello World!",#=> :attr=>[#=> {#=> :kind=>"attribute",#=> :name=>"a", #=> :text=>"b"#=> }#=> ], #=> :kids=>[#=> {#=> :kind=>"element", #=> :name=>"a", #=> :nshref=>"foo", #=> :nsprefix=>"z", #=> :text=>"Hello World!", #=> :attr=>[], #=> :kids=>[#=> {#=> :kind=>"text", #=> :name=>"text", #=> :text=>"Hello "#=> },#=> {#=> :kind=>"element", #=> :name=>"b", #=> :text=>"World", #=> :attr=>[#=> {#=> :kind=>"attribute", #=> :name=>"m", #=> :nshref=>"foo", #=> :nsprefix=>"z", #=> :text=>"n"#=> },#=> {#=> :kind=>"attribute", #=> :name=>"x", #=> :text=>"y"#=> }#=> ], #=> :kids=>[#=> {#=> :kind=>"text", #=> :name=>"text", #=> :text=>"World"#=> }#=> ]#=> },#=> {#=> :kind=>"text", #=> :name=>"text", #=> :text=>"!"#=> }#=> ]#=> }#=> ]#=> }
I use this code with libxml-ruby (1.1.3). I have not used nokogiri myself, but I understand that it uses libxml-ruby anyway. I would also encourage you to look at ROXML (http://github.com/Empact/roxml/tree) which maps xml elements to ruby objects; it is built atop libxml.
# USAGE: Hash.from_libxml(YOUR_XML_STRING)require 'xml/libxml'# adapted from # http://movesonrails.com/articles/2008/02/25/libxml-for-active-resource-2-0class Hash class << self def from_libxml(xml, strict=true) begin XML.default_load_external_dtd = false XML.default_pedantic_parser = strict result = XML::Parser.string(xml).parse return { result.root.name.to_s => xml_node_to_hash(result.root)} rescue Exception => e # raise your custom exception here end end def xml_node_to_hash(node) # If we are at the root of the document, start the hash if node.element? if node.children? result_hash = {} node.each_child do |child| result = xml_node_to_hash(child) if child.name == "text" if !child.next? and !child.prev? return result end elsif result_hash[child.name.to_sym] if result_hash[child.name.to_sym].is_a?(Object::Array) result_hash[child.name.to_sym] << result else result_hash[child.name.to_sym] = [result_hash[child.name.to_sym]] << result end else result_hash[child.name.to_sym] = result end end return result_hash else return nil end else return node.content.to_s end end endend