fallbacks/lxp.lua
author Kim Alvefur <zash@zash.se>
Sun, 14 Apr 2024 14:06:57 +0200
changeset 13483 d1b7edf4e2de
parent 5776 bd0ff8ae98a8
permissions -rw-r--r--
net.unbound: Show canonical name in textual format (e.g. in shell) libunbound does not tell us the whole chain of CNAMEs, only the final canonical name. This is to aid in debugging since it will only be shown in the shell.


local coroutine = coroutine;
local tonumber = tonumber;
local string = string;
local setmetatable, getmetatable = setmetatable, getmetatable;
local pairs = pairs;

local deadroutine = coroutine.create(function() end);
coroutine.resume(deadroutine);

module("lxp")

local entity_map = setmetatable({
	["amp"] = "&";
	["gt"] = ">";
	["lt"] = "<";
	["apos"] = "'";
	["quot"] = "\"";
}, {__index = function(_, s)
		if s:sub(1,1) == "#" then
			if s:sub(2,2) == "x" then
				return string.char(tonumber(s:sub(3), 16));
			else
				return string.char(tonumber(s:sub(2)));
			end
		end
	end
});
local function xml_unescape(str)
	return (str:gsub("&(.-);", entity_map));
end
local function parse_tag(s)
	local name,sattr=(s):gmatch("([^%s]+)(.*)")();
	local attr = {};
	for a,b in (sattr):gmatch("([^=%s]+)=['\"]([^'\"]*)['\"]") do attr[a] = xml_unescape(b); end
	return name, attr;
end

local function parser(data, handlers, ns_separator)
	local function read_until(str)
		local pos = data:find(str, nil, true);
		while not pos do
			data = data..coroutine.yield();
			pos = data:find(str, nil, true);
		end
		local r = data:sub(1, pos);
		data = data:sub(pos+1);
		return r;
	end
	local function read_before(str)
		local pos = data:find(str, nil, true);
		while not pos do
			data = data..coroutine.yield();
			pos = data:find(str, nil, true);
		end
		local r = data:sub(1, pos-1);
		data = data:sub(pos);
		return r;
	end
	local function peek()
		while #data == 0 do data = coroutine.yield(); end
		return data:sub(1,1);
	end

	local ns = { xml = "http://www.w3.org/XML/1998/namespace" };
	ns.__index = ns;
	local function apply_ns(name, dodefault)
		local prefix,n = name:match("^([^:]*):(.*)$");
		if prefix and ns[prefix] then
			return ns[prefix]..ns_separator..n;
		end
		if dodefault and ns[""] then
			return ns[""]..ns_separator..name;
		end
		return name;
	end
	local function push(tag, attr)
		ns = setmetatable({}, ns);
		for k,v in pairs(attr) do
			local xmlns = k == "xmlns" and "" or k:match("^xmlns:(.*)$");
			if xmlns then
				ns[xmlns] = v;
				attr[k] = nil;
			end
		end
		local newattr, n = {}, 0;
		for k,v in pairs(attr) do
			n = n+1;
			k = apply_ns(k);
			newattr[n] = k;
			newattr[k] = v;
		end
		tag = apply_ns(tag, true);
		ns[0] = tag;
		ns.__index = ns;
		return tag, newattr;
	end
	local function pop()
		local tag = ns[0];
		ns = getmetatable(ns);
		return tag;
	end

	while true do
		if peek() == "<" then
			local elem = read_until(">"):sub(2,-2);
			if elem:sub(1,1) == "!" or elem:sub(1,1) == "?" then -- neglect comments and processing-instructions
			elseif elem:sub(1,1) == "/" then -- end tag
				elem = elem:sub(2);
				local name = pop();
				handlers:EndElement(name); -- TODO check for start-end tag name match
			elseif elem:sub(-1,-1) == "/" then -- empty tag
				elem = elem:sub(1,-2);
				local name,attr = parse_tag(elem);
				name,attr = push(name,attr);
				handlers:StartElement(name,attr);
				name = pop();
				handlers:EndElement(name);
			else -- start tag
				local name,attr = parse_tag(elem);
				name,attr = push(name,attr);
				handlers:StartElement(name,attr);
			end
		else
			local text = read_before("<");
			handlers:CharacterData(xml_unescape(text));
		end
	end
end

function new(handlers, ns_separator)
	local co = coroutine.create(parser);
	return {
		parse = function(self, data)
			if not data then
				co = deadroutine;
				return true; -- eof
			end
			local success, result = coroutine.resume(co, data, handlers, ns_separator);
			if result then
				co = deadroutine;
				return nil, result; -- error
			end
			return true; -- success
		end;
	};
end

return _M;