tools/erlparse.lua
author Jonas Schäfer <jonas@wielicki.name>
Mon, 10 Jan 2022 18:23:54 +0100
branch0.11
changeset 12185 783056b4e448
parent 7822 ad709ee7d3d8
permissions -rw-r--r--
util.xml: Do not allow doctypes, comments or processing instructions Yes. This is as bad as it sounds. CVE pending. In Prosody itself, this only affects mod_websocket, which uses util.xml to parse the <open/> frame, thus allowing unauthenticated remote DoS using Billion Laughs. However, third-party modules using util.xml may also be affected by this. This commit installs handlers which disallow the use of doctype declarations and processing instructions without any escape hatch. It, by default, also introduces such a handler for comments, however, there is a way to enable comments nontheless. This is because util.xml is used to parse human-facing data, where comments are generally a desirable feature, and also because comments are generally harmless.

-- Prosody IM
-- Copyright (C) 2008-2010 Matthew Wild
-- Copyright (C) 2008-2010 Waqas Hussain
--
-- This project is MIT/X11 licensed. Please see the
-- COPYING file in the source package for more information.
--

local string_byte, string_char = string.byte, string.char;
local t_concat, t_insert = table.concat, table.insert;
local type, tonumber, tostring = type, tonumber, tostring;

local file = nil;
local last = nil;
local line = 1;
local function read(expected)
	local ch;
	if last then
		ch = last; last = nil;
	else
		ch = file:read(1);
		if ch == "\n" then line = line + 1; end
	end
	if expected and ch ~= expected then error("expected: "..expected.."; got: "..(ch or "nil").." on line "..line); end
	return ch;
end
local function pushback(ch)
	if last then error(); end
	last = ch;
end
local function peek()
	if not last then last = read(); end
	return last;
end

local _A, _a, _Z, _z, _0, _9, __, _at, _space, _minus = string_byte("AaZz09@_ -", 1, 10);
local function isLowerAlpha(ch)
	ch = string_byte(ch) or 0;
	return (ch >= _a and ch <= _z);
end
local function isNumeric(ch)
	ch = string_byte(ch) or 0;
	return (ch >= _0 and ch <= _9) or ch == _minus;
end
local function isAtom(ch)
	ch = string_byte(ch) or 0;
	return (ch >= _A and ch <= _Z) or (ch >= _a and ch <= _z) or (ch >= _0 and ch <= _9) or ch == __ or ch == _at;
end
local function isSpace(ch)
	ch = string_byte(ch) or "x";
	return ch <= _space;
end

local escapes = {["\\b"]="\b", ["\\d"]="\127", ["\\e"]="\27", ["\\f"]="\f", ["\\n"]="\n", ["\\r"]="\r", ["\\s"]=" ", ["\\t"]="\t", ["\\v"]="\v", ["\\\""]="\"", ["\\'"]="'", ["\\\\"]="\\"};
local function readString()
	read("\""); -- skip quote
	local slash = nil;
	local str = {};
	while true do
		local ch = read();
		if slash then
			slash = slash..ch;
			if not escapes[slash] then error("Unknown escape sequence: "..slash); end
			str[#str+1] = escapes[slash];
			slash = nil;
		elseif ch == "\"" then
			break;
		elseif ch == "\\" then
			slash = ch;
		else
			str[#str+1] = ch;
		end
	end
	return t_concat(str);
end
local function readAtom1()
	local var = { read() };
	while isAtom(peek()) do
		var[#var+1] = read();
	end
	return t_concat(var);
end
local function readAtom2()
	local str = { read("'") };
	local slash = nil;
	while true do
		local ch = read();
		str[#str+1] = ch;
		if ch == "'" and not slash then break; end
	end
	return t_concat(str);
end
local function readNumber()
	local num = { read() };
	while isNumeric(peek()) do
		num[#num+1] = read();
	end
	if peek() == "." then
		num[#num+1] = read();
		while isNumeric(peek()) do
			num[#num+1] = read();
		end
	end
	return tonumber(t_concat(num));
end
local readItem = nil;
local function readTuple()
	local t = {};
	local s = {}; -- string representation
	read(); -- read {, or [, or <
	while true do
		local item = readItem();
		if not item then break; end
		if type(item) ~= "number" or item > 255 then
			s = nil;
		elseif s then
			s[#s+1] = string_char(item);
		end
		t_insert(t, item);
	end
	read(); -- read }, or ], or >
	if s and #s > 0  then
		return t_concat(s)
	else
		return t
	end;
end
local function readBinary()
	read("<"); -- read <
	-- Discard PIDs
	if isNumeric(peek()) then
		while peek() ~= ">" do read(); end
		read(">");
		return {};
	end
	local t = readTuple();
	read(">") -- read >
	local ch = peek();
	if type(t) == "string" then
		-- binary is a list of integers
		return t;
	elseif type(t) == "table" then
		if t[1] then
			-- binary contains string
			return t[1];
		else
			-- binary is empty
			return "";
		end;
	else
		error();
	end
end
readItem = function()
	local ch = peek();
	if ch == nil then return nil end
	if ch == "{" or ch == "[" then
		return readTuple();
	elseif isLowerAlpha(ch) then
		return readAtom1();
	elseif ch == "'" then
		return readAtom2();
	elseif isNumeric(ch) then
		return readNumber();
	elseif ch == "\"" then
		return readString();
	elseif ch == "<" then
		return readBinary();
	elseif isSpace(ch) or ch == "," or ch == "|" then
		read();
		return readItem();
	else
		--print("Unknown char: "..ch);
		return nil;
	end
end
local function readChunk()
	local x = readItem();
	if x then read("."); end
	return x;
end
local function readFile(filename)
	file = io.open(filename);
	if not file then error("File not found: "..filename); os.exit(0); end
	return function()
		local x = readChunk();
		if not x and peek() then error("Invalid char: "..peek()); end
		return x;
	end;
end

local _M = {};

function _M.parseFile(file)
	return readFile(file);
end

return _M;