util/xmppstream.lua
author Matthew Wild <mwild1@gmail.com>
Sun, 17 Mar 2024 10:10:24 +0000
changeset 13464 a688947fab1e
parent 12979 d10957394a3c
permissions -rw-r--r--
mod_bosh: Set base_type on session This fixes a traceback with mod_saslauth. Ideally we move this to util.session at some point, though.

-- Prosody IM
-- Copyright (C) 2008-2010 Matthew Wild
-- Copyright (C) 2008-2010 Waqas Hussain
--
-- This project is MIT/X11 licensed. Please see the
-- COPYING file in the source package for more information.
--

local lxp = require "lxp";
local st = require "prosody.util.stanza";
local stanza_mt = st.stanza_mt;

local error = error;
local tostring = tostring;
local t_insert = table.insert;
local t_concat = table.concat;
local t_remove = table.remove;
local setmetatable = setmetatable;

-- COMPAT: w/LuaExpat 1.1.0
local lxp_supports_doctype = pcall(lxp.new, { StartDoctypeDecl = false });
local lxp_supports_xmldecl = pcall(lxp.new, { XmlDecl = false });
local lxp_supports_bytecount = not not lxp.new({}).getcurrentbytecount;

local default_stanza_size_limit = 1024*1024*1; -- 1MB

local _ENV = nil;
-- luacheck: std none

local new_parser = lxp.new;

local xml_namespace = {
	["http://www.w3.org/XML/1998/namespace\1lang"] = "xml:lang";
	["http://www.w3.org/XML/1998/namespace\1space"] = "xml:space";
	["http://www.w3.org/XML/1998/namespace\1base"] = "xml:base";
	["http://www.w3.org/XML/1998/namespace\1id"] = "xml:id";
};

local xmlns_streams = "http://etherx.jabber.org/streams";

local ns_separator = "\1";
local ns_pattern = "^([^"..ns_separator.."]*)"..ns_separator.."?(.*)$";

local function dummy_cb() end

local function new_sax_handlers(session, stream_callbacks, cb_handleprogress)
	local xml_handlers = {};

	local cb_streamopened = stream_callbacks.streamopened;
	local cb_streamclosed = stream_callbacks.streamclosed;
	local cb_error = stream_callbacks.error or
		function(_, e, stanza)
			error("XML stream error: "..tostring(e)..(stanza and ": "..tostring(stanza) or ""),2);
		end;
	local cb_handlestanza = stream_callbacks.handlestanza;
	cb_handleprogress = cb_handleprogress or dummy_cb;

	local stream_ns = stream_callbacks.stream_ns or xmlns_streams;
	local stream_tag = stream_callbacks.stream_tag or "stream";
	if stream_ns ~= "" then
		stream_tag = stream_ns..ns_separator..stream_tag;
	end
	local stream_error_tag = stream_ns..ns_separator..(stream_callbacks.error_tag or "error");

	local stream_default_ns = stream_callbacks.default_ns;

	local stream_lang = "en";

	local stack = {};
	local chardata, stanza = {};
	local stanza_size = 0;
	local non_streamns_depth = 0;
	function xml_handlers:StartElement(tagname, attr)
		if stanza and #chardata > 0 then
			-- We have some character data in the buffer
			t_insert(stanza, t_concat(chardata));
			chardata = {};
		end
		local curr_ns,name = tagname:match(ns_pattern);
		if name == "" then
			curr_ns, name = "", curr_ns;
		end

		if curr_ns ~= stream_default_ns or non_streamns_depth > 0 then
			attr.xmlns = curr_ns;
			non_streamns_depth = non_streamns_depth + 1;
		end

		for i=1,#attr do
			local k = attr[i];
			attr[i] = nil;
			local xmlk = xml_namespace[k];
			if xmlk then
				attr[xmlk] = attr[k];
				attr[k] = nil;
			end
		end

		if not stanza then --if we are not currently inside a stanza
			if lxp_supports_bytecount then
				stanza_size = self:getcurrentbytecount();
			end
			if session.notopen then
				if tagname == stream_tag then
					non_streamns_depth = 0;
					stream_lang = attr["xml:lang"] or stream_lang;
					if cb_streamopened then
						if lxp_supports_bytecount then
							cb_handleprogress(stanza_size);
							stanza_size = 0;
						end
						cb_streamopened(session, attr);
					end
				else
					-- Garbage before stream?
					cb_error(session, "no-stream", tagname);
				end
				return;
			end
			if curr_ns == "jabber:client" and name ~= "iq" and name ~= "presence" and name ~= "message" then
				cb_error(session, "invalid-top-level-element");
			end

			stanza = setmetatable({ name = name, attr = attr, tags = {} }, stanza_mt);
		else -- we are inside a stanza, so add a tag
			if lxp_supports_bytecount then
				stanza_size = stanza_size + self:getcurrentbytecount();
			end
			t_insert(stack, stanza);
			local oldstanza = stanza;
			stanza = setmetatable({ name = name, attr = attr, tags = {} }, stanza_mt);
			t_insert(oldstanza, stanza);
			t_insert(oldstanza.tags, stanza);
		end
	end

	function xml_handlers:StartCdataSection()
		if lxp_supports_bytecount then
			if stanza then
				stanza_size = stanza_size + self:getcurrentbytecount();
			else
				cb_handleprogress(self:getcurrentbytecount());
			end
		end
	end
	function xml_handlers:EndCdataSection()
		if lxp_supports_bytecount then
			if stanza then
				stanza_size = stanza_size + self:getcurrentbytecount();
			else
				cb_handleprogress(self:getcurrentbytecount());
			end
		end
	end
	function xml_handlers:CharacterData(data)
		if stanza then
			if lxp_supports_bytecount then
				stanza_size = stanza_size + self:getcurrentbytecount();
			end
			t_insert(chardata, data);
		elseif lxp_supports_bytecount then
			cb_handleprogress(self:getcurrentbytecount());
		end
	end
	function xml_handlers:EndElement(tagname)
		if lxp_supports_bytecount then
			stanza_size = stanza_size + self:getcurrentbytecount()
		end
		if non_streamns_depth > 0 then
			non_streamns_depth = non_streamns_depth - 1;
		end
		if stanza then
			if #chardata > 0 then
				-- We have some character data in the buffer
				t_insert(stanza, t_concat(chardata));
				chardata = {};
			end
			-- Complete stanza
			if #stack == 0 then
				if lxp_supports_bytecount then
					cb_handleprogress(stanza_size);
				end
				stanza_size = 0;
				if stanza.attr["xml:lang"] == nil then
					stanza.attr["xml:lang"] = stream_lang;
				end
				if tagname ~= stream_error_tag then
					cb_handlestanza(session, stanza);
				else
					cb_error(session, "stream-error", stanza);
				end
				stanza = nil;
			else
				stanza = t_remove(stack);
			end
		else
			if lxp_supports_bytecount then
				cb_handleprogress(stanza_size);
			end
			if cb_streamclosed then
				cb_streamclosed(session);
			end
		end
	end

	local function restricted_handler(parser)
		cb_error(session, "parse-error", "restricted-xml", "Restricted XML, see RFC 6120 section 11.1.");
		if not parser.stop or not parser:stop() then
			error("Failed to abort parsing");
		end
	end

	if lxp_supports_xmldecl then
		function xml_handlers:XmlDecl(version, encoding, standalone)
			if lxp_supports_bytecount then
				cb_handleprogress(self:getcurrentbytecount());
			end
			if (encoding and encoding:lower() ~= "utf-8")
			or (standalone == "no")
			or (version and version ~= "1.0") then
				return restricted_handler(self);
			end
		end
	end
	if lxp_supports_doctype then
		xml_handlers.StartDoctypeDecl = restricted_handler;
	end
	xml_handlers.Comment = restricted_handler;
	xml_handlers.ProcessingInstruction = restricted_handler;

	local function reset()
		stanza, chardata, stanza_size = nil, {}, 0;
		stack = {};
	end

	local function set_session(stream, new_session) -- luacheck: ignore 212/stream
		session = new_session;
	end

	return xml_handlers, { reset = reset, set_session = set_session };
end

local function new(session, stream_callbacks, stanza_size_limit)
	-- Used to track parser progress (e.g. to enforce size limits)
	local n_outstanding_bytes = 0;
	local handle_progress;
	if lxp_supports_bytecount then
		function handle_progress(n_parsed_bytes)
			n_outstanding_bytes = n_outstanding_bytes - n_parsed_bytes;
		end
		stanza_size_limit = stanza_size_limit or default_stanza_size_limit;
	elseif stanza_size_limit then
		error("Stanza size limits are not supported on this version of LuaExpat")
	end

	local handlers, meta = new_sax_handlers(session, stream_callbacks, handle_progress);
	local parser = new_parser(handlers, ns_separator, false);
	local parse = parser.parse;

	function session.open_stream(session, from, to) -- luacheck: ignore 432/session
		local send = session.sends2s or session.send;

		local attr = {
			["xmlns:stream"] = "http://etherx.jabber.org/streams",
			["xml:lang"] = "en",
			xmlns = stream_callbacks.default_ns,
			version = session.version and (session.version > 0 and "1.0" or nil),
			id = session.streamid or "",
			from = from or session.host, to = to,
		};
		if session.stream_attrs then
			session:stream_attrs(from, to, attr)
		end
		send("<?xml version='1.0'?>"..st.stanza("stream:stream", attr):top_tag());
		return true;
	end

	return {
		reset = function ()
			parser = new_parser(handlers, ns_separator, false);
			parse = parser.parse;
			n_outstanding_bytes = 0;
			meta.reset();
		end,
		feed = function (self, data) -- luacheck: ignore 212/self
			if lxp_supports_bytecount then
				n_outstanding_bytes = n_outstanding_bytes + #data;
			end
			local _parser = parser;
			local ok, err = parse(_parser, data);
			if lxp_supports_bytecount and n_outstanding_bytes > stanza_size_limit then
				return nil, "stanza-too-large";
			end
			if parser ~= _parser then
				_parser:parse();
				_parser:close();
			end
			return ok, err;
		end,
		set_session = meta.set_session;
		set_stanza_size_limit = function (_, new_stanza_size_limit)
			stanza_size_limit = new_stanza_size_limit;
		end;
	};
end

return {
	ns_separator = ns_separator;
	ns_pattern = ns_pattern;
	new_sax_handlers = new_sax_handlers;
	new = new;
};