---
-- Internalize XML into LUA.
--
-- This module allows you to transform an XML string into a Txml lua table.
-- But first, what is Txml? Another tag based language? NO.
-- Im' not an XML guru, so take my personal opinions with the right doubts.
-- XML is a pretty flexible language, good for doing nothig, bad for doing all.
-- Since it is really generic it may allow the reuse of of some tools...
-- But what do you think?! Isn't grep a reused peace of code?
-- Anyway it seems that doing XML stuff is for real men.
-- So everithing is done in XML nowadays.
-- Why I don't like it much? Simple, there is no way to access it
-- with the language. You have to parse it and transform it in a
-- more handy format for your internal purpose, elaborate it and
-- then retransform you data structure in XML.
-- So this module is
-- for the step of internalizing an XML tree into a LUA table.
-- This means that you will be able to travferse the tree in the
-- same way you traverse a table.
--
-- Txml = { tag_name = "father",
-- name = "Mario",
-- { tag_name = "son",
-- {"son content"}
-- }
-- }
--
-- This table represents this xml-- <?xml version="1.0"?> -- <father name="Mario"> -- <son>son content</son> -- </father> ---- This modules is able to convert the XML into the Txml format in -- a quite smart way. -- For example this code is valid:
-- father = xml2table.xml2table(the xml string you have seen before) -- print(father.name) -- print(father.son._content) ---- and the result will be "Mario" and "son content". -- And now a more complex example with namespaces.
-- <?xml version="1.0"?> -- <D:multistatus xmlns:D="Dav:"> -- <D:response> -- <D:href>http://ref1</D:href> -- <D:propstat> -- <D:status>HTTP/1.1 200</D:status> -- </D:propstat> -- </D:response> -- <D:response> -- <D:href>http://ref2</D:href> -- <D:propstat> -- <D:status>HTTP/1.1 404</D:status> -- </D:propstat> -- </D:response> -- </D:multistatus> ---- we can convert this to Txml in tree ways: --
-- tml1 = xml2table.xml2table(xml2)
-- tml2 = xml2table.xml2table(xml2,{})
-- tml3 = xml2table.xml2table(xml2,{["Dav:"]="d"})
--
-- The information stored is the same, but only the third is the good one.
-- The first has "Dav:" aded to each tag name. Impossiblo to type it in LUA.
-- The second doesn't do anithing. The third one trnasforms each
-- shortcut to his mapped. Each shortcut to "Dav:" will be replaced with
-- a shortcut to "d". This is nice for two reasons. First it converts the ":"
-- to __ and so you can type it in LUA. Second you can forget what
-- the XML uses as a shortcut, it will be replaced with "d" in any case.
--
-- -- > print(tml1.tag_name) -- Dav::multistatus -- > print(tml1[1].tag_name) -- Dav::response -- -- > print(tml2.tag_name) -- D:multistatus -- > print(tml2[1].tag_name) -- D:response -- -- > print(tml3.tag_name) -- d__multistatus -- > print(tml3[1].tag_name) -- d__response -- > print(tml3.d__response.d__href._content) -- http://ref1 -- -- > xml2table.forach_son(tml3,"d__response", -- >> function(k) -- >> print(k.d__href._content) -- >> end) -- http://ref1 -- http://ref2 --local Private = {} --============================================================================-- -- This is part of FreePOPs (http://www.freepops.org) released under GNU/GPL --============================================================================-- Private.stack = {} function Private.stack.mt(t) return { __index = { push = function(_,e) table.insert(t,1,e) end, pop = function() table.remove(t,1) end, top = function() return t[1] end }, __newindex = function(t,k,v) error("Internal error") end } end Private.stack.new = function() local x = {} setmetatable(x,Private.stack.mt({})) return x end function Private.lpx_cb_factory(t) return { CharacterData = function(p,s) if t.curnode[1] == nil then t.curnode[1] = {s} end end, EndElement = function(p, elementName) local tmp = t.s:top() if tmp ~= nil then table.insert(tmp,t.curnode) t.curnode = tmp t.s:pop() else t.root = t.curnode t.curnode = nil t.s = nil end end, StartElement= function(p, elementName, attributes) if t.curnode ~= nil and t.curnode[1] ~= nil and t.curnode[1].tag_name == nil then table.remove(t.curnode,1) end t.s:push(t.curnode) t.curnode = {tag_name=elementName} setmetatable(t.curnode,Private.smart_metatable) table.foreach(attributes,function(k,v) if type(k) ~= "number" then t.curnode[k] = v end end) end, } end Private.smart_metatable = { __index = function(t,k) if k == "_content" then local son = t[1] if son ~= nil and son.tag_name == nil then return son[1] else return nil end else local i = table.foreachi(t,function(i,son) if son.tag_name == k then return i end end) if i ~= nil then return t[i] else return nil end end end } function Private.map_namespaces(t,m,abbr) if t.tag_name == nil then return true end abbr = abbr or {} m = m or {} -- find namespaces abbreviations table.foreach(t, function (k,v) if type(k) ~= "number" then local _,_,tok = string.find(k,"^xmlns:(%w+)") local _,_,c = string.find(k,"^(xmlns)") if c then if tok then abbr[tok] = m[v] else abbr["_"] = m[v] end end end end) local _,_,x,tok = string.find(t.tag_name,"^(%w+):(%w+)") local replace = abbr[x] or m[x] if replace ~= nil or abbr["_"] ~= nil then if replace then t.tag_name = replace .. "__" .. tok else t.tag_name = abbr["_"] .. "__" .. (tok or t.tag_name) end end table.foreachi(t,function(_,k) Private.map_namespaces(k,m,abbr) end) end --==========================================================================-- -- extern function --==========================================================================-- module("xml2table") --- -- Converts XML data in a table. -- If m is {} namespaces are not expanded -- If m is { ["namespace"] = "xxx" } than namespaces will be -- expanded according to m rules, but not listed namespaces will -- not be expanded -- If m is nil namespaces are expanded every time it is possible. -- @param s string the xml data. -- @param m table the map. -- @param force_encoding string To force the encoding of the XML, -- putting "UTF-8" solves some problems with strange encodings. -- @return table the resulting table or nil follwed by msg,line,col. function xml2table(s,m,force_encoding) local tab = {s=Private.stack.new()} local handle_namespaces = nil if m == nil then handle_namespaces = ":" end local p = lxp.new(Private.lpx_cb_factory(tab),handle_namespaces) if force_encoding then if type(p.setencoding) == "function" then p:setencoding(force_encoding) else s = string.gsub(s,'encoding="[%w%-]+"', 'encoding="'..force_encoding..'"') end end local ok, msg, line, col, pos = p:parse(s) if not ok then return nil,msg.." line="..line.." col="..col.." pos="..pos.. " '"..string.sub(s, math.max(pos-20,0), math.min(pos+20,string.len(s))).. "...'" end if m ~= nil then Private.map_namespaces(tab.root,m) end return tab.root end --- -- This is a selective table-foreach. -- A correct usage is forach_son(t,"D__resposnse",f). function forach_son(t,sonname,f) table.foreachi(t,function(_,v) if v.tag_name == sonname then f(v) end end) end