Convert a Nokogiri document to a Ruby Hash Convert a Nokogiri document to a Ruby Hash ruby ruby

Convert a Nokogiri document to a Ruby Hash


If you want to convert a Nokogiri XML document to a hash, just do the following:

require 'active_support/core_ext/hash/conversions'hash = Hash.from_xml(nokogiri_document.to_s)


Here's a far simpler version that creates a robust Hash that includes namespace information, both for elements and attributes:

require 'nokogiri'class Nokogiri::XML::Node  TYPENAMES = {1=>'element',2=>'attribute',3=>'text',4=>'cdata',8=>'comment'}  def to_hash    {kind:TYPENAMES[node_type],name:name}.tap do |h|      h.merge! nshref:namespace.href, nsprefix:namespace.prefix if namespace      h.merge! text:text      h.merge! attr:attribute_nodes.map(&:to_hash) if element?      h.merge! kids:children.map(&:to_hash) if element?    end  endendclass Nokogiri::XML::Document  def to_hash; root.to_hash; endend

Seen in action:

xml = '<r a="b" xmlns:z="foo"><z:a>Hello <b z:m="n" x="y">World</b>!</z:a></r>'doc = Nokogiri::XML(xml)p doc.to_hash#=> {#=>   :kind=>"element",#=>   :name=>"r",#=>   :text=>"Hello World!",#=>   :attr=>[#=>     {#=>       :kind=>"attribute",#=>       :name=>"a", #=>       :text=>"b"#=>     }#=>   ], #=>   :kids=>[#=>     {#=>       :kind=>"element", #=>       :name=>"a", #=>       :nshref=>"foo", #=>       :nsprefix=>"z", #=>       :text=>"Hello World!", #=>       :attr=>[], #=>       :kids=>[#=>         {#=>           :kind=>"text", #=>           :name=>"text", #=>           :text=>"Hello "#=>         },#=>         {#=>           :kind=>"element", #=>           :name=>"b", #=>           :text=>"World", #=>           :attr=>[#=>             {#=>               :kind=>"attribute", #=>               :name=>"m", #=>               :nshref=>"foo", #=>               :nsprefix=>"z", #=>               :text=>"n"#=>             },#=>             {#=>               :kind=>"attribute", #=>               :name=>"x", #=>               :text=>"y"#=>             }#=>           ], #=>           :kids=>[#=>             {#=>               :kind=>"text", #=>               :name=>"text", #=>               :text=>"World"#=>             }#=>           ]#=>         },#=>         {#=>           :kind=>"text", #=>           :name=>"text", #=>           :text=>"!"#=>         }#=>       ]#=>     }#=>   ]#=> }


I use this code with libxml-ruby (1.1.3). I have not used nokogiri myself, but I understand that it uses libxml-ruby anyway. I would also encourage you to look at ROXML (http://github.com/Empact/roxml/tree) which maps xml elements to ruby objects; it is built atop libxml.

# USAGE: Hash.from_libxml(YOUR_XML_STRING)require 'xml/libxml'# adapted from # http://movesonrails.com/articles/2008/02/25/libxml-for-active-resource-2-0class Hash   class << self        def from_libxml(xml, strict=true)           begin            XML.default_load_external_dtd = false            XML.default_pedantic_parser = strict            result = XML::Parser.string(xml).parse             return { result.root.name.to_s => xml_node_to_hash(result.root)}           rescue Exception => e            # raise your custom exception here          end        end         def xml_node_to_hash(node)           # If we are at the root of the document, start the hash           if node.element?            if node.children?               result_hash = {}               node.each_child do |child|                 result = xml_node_to_hash(child)                 if child.name == "text"                  if !child.next? and !child.prev?                    return result                  end                elsif result_hash[child.name.to_sym]                    if result_hash[child.name.to_sym].is_a?(Object::Array)                      result_hash[child.name.to_sym] << result                    else                      result_hash[child.name.to_sym] = [result_hash[child.name.to_sym]] << result                    end                  else                     result_hash[child.name.to_sym] = result                  end                end              return result_hash             else               return nil            end            else             return node.content.to_s           end         end              endend