fallbacks/lxp.lua
author Jonas Schäfer <jonas@wielicki.name>
Mon, 10 Jan 2022 18:23:54 +0100
branch0.11
changeset 12185 783056b4e448
parent 5776 bd0ff8ae98a8
permissions -rw-r--r--
util.xml: Do not allow doctypes, comments or processing instructions Yes. This is as bad as it sounds. CVE pending. In Prosody itself, this only affects mod_websocket, which uses util.xml to parse the <open/> frame, thus allowing unauthenticated remote DoS using Billion Laughs. However, third-party modules using util.xml may also be affected by this. This commit installs handlers which disallow the use of doctype declarations and processing instructions without any escape hatch. It, by default, also introduces such a handler for comments, however, there is a way to enable comments nontheless. This is because util.xml is used to parse human-facing data, where comments are generally a desirable feature, and also because comments are generally harmless.


local coroutine = coroutine;
local tonumber = tonumber;
local string = string;
local setmetatable, getmetatable = setmetatable, getmetatable;
local pairs = pairs;

local deadroutine = coroutine.create(function() end);
coroutine.resume(deadroutine);

module("lxp")

local entity_map = setmetatable({
	["amp"] = "&";
	["gt"] = ">";
	["lt"] = "<";
	["apos"] = "'";
	["quot"] = "\"";
}, {__index = function(_, s)
		if s:sub(1,1) == "#" then
			if s:sub(2,2) == "x" then
				return string.char(tonumber(s:sub(3), 16));
			else
				return string.char(tonumber(s:sub(2)));
			end
		end
	end
});
local function xml_unescape(str)
	return (str:gsub("&(.-);", entity_map));
end
local function parse_tag(s)
	local name,sattr=(s):gmatch("([^%s]+)(.*)")();
	local attr = {};
	for a,b in (sattr):gmatch("([^=%s]+)=['\"]([^'\"]*)['\"]") do attr[a] = xml_unescape(b); end
	return name, attr;
end

local function parser(data, handlers, ns_separator)
	local function read_until(str)
		local pos = data:find(str, nil, true);
		while not pos do
			data = data..coroutine.yield();
			pos = data:find(str, nil, true);
		end
		local r = data:sub(1, pos);
		data = data:sub(pos+1);
		return r;
	end
	local function read_before(str)
		local pos = data:find(str, nil, true);
		while not pos do
			data = data..coroutine.yield();
			pos = data:find(str, nil, true);
		end
		local r = data:sub(1, pos-1);
		data = data:sub(pos);
		return r;
	end
	local function peek()
		while #data == 0 do data = coroutine.yield(); end
		return data:sub(1,1);
	end

	local ns = { xml = "http://www.w3.org/XML/1998/namespace" };
	ns.__index = ns;
	local function apply_ns(name, dodefault)
		local prefix,n = name:match("^([^:]*):(.*)$");
		if prefix and ns[prefix] then
			return ns[prefix]..ns_separator..n;
		end
		if dodefault and ns[""] then
			return ns[""]..ns_separator..name;
		end
		return name;
	end
	local function push(tag, attr)
		ns = setmetatable({}, ns);
		for k,v in pairs(attr) do
			local xmlns = k == "xmlns" and "" or k:match("^xmlns:(.*)$");
			if xmlns then
				ns[xmlns] = v;
				attr[k] = nil;
			end
		end
		local newattr, n = {}, 0;
		for k,v in pairs(attr) do
			n = n+1;
			k = apply_ns(k);
			newattr[n] = k;
			newattr[k] = v;
		end
		tag = apply_ns(tag, true);
		ns[0] = tag;
		ns.__index = ns;
		return tag, newattr;
	end
	local function pop()
		local tag = ns[0];
		ns = getmetatable(ns);
		return tag;
	end

	while true do
		if peek() == "<" then
			local elem = read_until(">"):sub(2,-2);
			if elem:sub(1,1) == "!" or elem:sub(1,1) == "?" then -- neglect comments and processing-instructions
			elseif elem:sub(1,1) == "/" then -- end tag
				elem = elem:sub(2);
				local name = pop();
				handlers:EndElement(name); -- TODO check for start-end tag name match
			elseif elem:sub(-1,-1) == "/" then -- empty tag
				elem = elem:sub(1,-2);
				local name,attr = parse_tag(elem);
				name,attr = push(name,attr);
				handlers:StartElement(name,attr);
				name = pop();
				handlers:EndElement(name);
			else -- start tag
				local name,attr = parse_tag(elem);
				name,attr = push(name,attr);
				handlers:StartElement(name,attr);
			end
		else
			local text = read_before("<");
			handlers:CharacterData(xml_unescape(text));
		end
	end
end

function new(handlers, ns_separator)
	local co = coroutine.create(parser);
	return {
		parse = function(self, data)
			if not data then
				co = deadroutine;
				return true; -- eof
			end
			local success, result = coroutine.resume(co, data, handlers, ns_separator);
			if result then
				co = deadroutine;
				return nil, result; -- error
			end
			return true; -- success
		end;
	};
end

return _M;