util/xml.lua
author Matthew Wild <mwild1@gmail.com>
Wed, 27 Mar 2024 15:35:15 +0000
branch0.12
changeset 13469 54a936345aaa
parent 12274 c78639ee6ccb
child 12979 d10957394a3c
permissions -rw-r--r--
prosodyctl check: Warn about invalid domain names in the config file This ensures that domain names of virtual hosts and components are valid in XMPP, and that they are encoded correctly.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
5213
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
     1
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
     2
local st = require "util.stanza";
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
     3
local lxp = require "lxp";
7242
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6981
diff changeset
     4
local t_insert = table.insert;
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6981
diff changeset
     5
local t_remove = table.remove;
12185
783056b4e448 util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents: 8558
diff changeset
     6
local error = error;
5213
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
     7
6780
5de6b93d0190 util.*: Remove use of module() function, make all module functions local and return them in a table at the end
Kim Alvefur <zash@zash.se>
parents: 6672
diff changeset
     8
local _ENV = nil;
8558
4f0f5b49bb03 vairious: Add annotation when an empty environment is set [luacheck]
Kim Alvefur <zash@zash.se>
parents: 7242
diff changeset
     9
-- luacheck: std none
5213
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    10
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    11
local parse_xml = (function()
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    12
	local ns_prefixes = {
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    13
		["http://www.w3.org/XML/1998/namespace"] = "xml";
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    14
	};
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    15
	local ns_separator = "\1";
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    16
	local ns_pattern = "^([^"..ns_separator.."]*)"..ns_separator.."?(.*)$";
12185
783056b4e448 util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents: 8558
diff changeset
    17
	return function(xml, options)
6666
d3023dd07cb6 portmanager, s2smanager, sessionmanager, stanza_router, storagemanager, usermanager, util.xml: Add luacheck annotations
Matthew Wild <mwild1@gmail.com>
parents: 5776
diff changeset
    18
		--luacheck: ignore 212/self
5213
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    19
		local handler = {};
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    20
		local stanza = st.stanza("root");
7242
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6981
diff changeset
    21
		local namespaces = {};
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6981
diff changeset
    22
		local prefixes = {};
6981
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6780
diff changeset
    23
		function handler:StartNamespaceDecl(prefix, url)
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6780
diff changeset
    24
			if prefix ~= nil then
7242
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6981
diff changeset
    25
				t_insert(namespaces, url);
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6981
diff changeset
    26
				t_insert(prefixes, prefix);
6981
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6780
diff changeset
    27
			end
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6780
diff changeset
    28
		end
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6780
diff changeset
    29
		function handler:EndNamespaceDecl(prefix)
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6780
diff changeset
    30
			if prefix ~= nil then
7242
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6981
diff changeset
    31
				-- we depend on each StartNamespaceDecl having a paired EndNamespaceDecl
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6981
diff changeset
    32
				t_remove(namespaces);
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6981
diff changeset
    33
				t_remove(prefixes);
6981
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6780
diff changeset
    34
			end
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6780
diff changeset
    35
		end
5213
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    36
		function handler:StartElement(tagname, attr)
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    37
			local curr_ns,name = tagname:match(ns_pattern);
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    38
			if name == "" then
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    39
				curr_ns, name = "", curr_ns;
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    40
			end
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    41
			if curr_ns ~= "" then
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    42
				attr.xmlns = curr_ns;
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    43
			end
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    44
			for i=1,#attr do
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    45
				local k = attr[i];
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    46
				attr[i] = nil;
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    47
				local ns, nm = k:match(ns_pattern);
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    48
				if nm ~= "" then
5776
bd0ff8ae98a8 Remove all trailing whitespace
Florian Zeitz <florob@babelmonkeys.de>
parents: 5223
diff changeset
    49
					ns = ns_prefixes[ns];
bd0ff8ae98a8 Remove all trailing whitespace
Florian Zeitz <florob@babelmonkeys.de>
parents: 5223
diff changeset
    50
					if ns then
5213
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    51
						attr[ns..":"..nm] = attr[k];
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    52
						attr[k] = nil;
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    53
					end
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    54
				end
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    55
			end
6981
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6780
diff changeset
    56
			local n = {}
7242
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6981
diff changeset
    57
			for i=1,#namespaces do
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6981
diff changeset
    58
				n[prefixes[i]] = namespaces[i];
6981
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6780
diff changeset
    59
			end
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6780
diff changeset
    60
			stanza:tag(name, attr, n);
5213
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    61
		end
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    62
		function handler:CharacterData(data)
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    63
			stanza:text(data);
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    64
		end
6672
7da8b6bc0966 util.xml: Remove unused parameter (thanks, luacheck)
Matthew Wild <mwild1@gmail.com>
parents: 6666
diff changeset
    65
		function handler:EndElement()
5213
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    66
			stanza:up();
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    67
		end
12185
783056b4e448 util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents: 8558
diff changeset
    68
		-- SECURITY: These two handlers, especially the Doctype one, are required to prevent exploits such as Billion Laughs.
12206
ebeb4d959fb3 util.xml: Deduplicate handlers for restricted XML
Kim Alvefur <zash@zash.se>
parents: 12205
diff changeset
    69
		local function restricted_handler(parser)
ebeb4d959fb3 util.xml: Deduplicate handlers for restricted XML
Kim Alvefur <zash@zash.se>
parents: 12205
diff changeset
    70
			if not parser.stop or not parser:stop() then
12185
783056b4e448 util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents: 8558
diff changeset
    71
				error("Failed to abort parsing");
783056b4e448 util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents: 8558
diff changeset
    72
			end
783056b4e448 util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents: 8558
diff changeset
    73
		end
12206
ebeb4d959fb3 util.xml: Deduplicate handlers for restricted XML
Kim Alvefur <zash@zash.se>
parents: 12205
diff changeset
    74
		handler.StartDoctypeDecl = restricted_handler;
12185
783056b4e448 util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents: 8558
diff changeset
    75
		if not options or not options.allow_comments then
783056b4e448 util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents: 8558
diff changeset
    76
			-- NOTE: comments are generally harmless and can be useful when parsing configuration files or other data, even user-provided data
12206
ebeb4d959fb3 util.xml: Deduplicate handlers for restricted XML
Kim Alvefur <zash@zash.se>
parents: 12205
diff changeset
    77
			handler.Comment = restricted_handler;
12185
783056b4e448 util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents: 8558
diff changeset
    78
		end
12274
c78639ee6ccb util.xml: Add an option to allow <?processing instructions?>
Kim Alvefur <zash@zash.se>
parents: 12207
diff changeset
    79
		if not options or not options.allow_processing_instructions then
c78639ee6ccb util.xml: Add an option to allow <?processing instructions?>
Kim Alvefur <zash@zash.se>
parents: 12207
diff changeset
    80
			-- Processing instructions should generally be safe to just ignore
c78639ee6ccb util.xml: Add an option to allow <?processing instructions?>
Kim Alvefur <zash@zash.se>
parents: 12207
diff changeset
    81
			handler.ProcessingInstruction = restricted_handler;
c78639ee6ccb util.xml: Add an option to allow <?processing instructions?>
Kim Alvefur <zash@zash.se>
parents: 12207
diff changeset
    82
		end
12205
e5e0ab93d7f4 util.xml: Break reference to help the GC (fix #1711)
Kim Alvefur <zash@zash.se>
parents: 12185
diff changeset
    83
		local parser = lxp.new(handler, ns_separator);
5213
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    84
		local ok, err, line, col = parser:parse(xml);
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    85
		if ok then ok, err, line, col = parser:parse(); end
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    86
		--parser:close();
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    87
		if ok then
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    88
			return stanza.tags[1];
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    89
		else
11131
1d9cd1abc660 util.xml: Fix float formatting of line and columns in error (on Lua 5.3+)
Kim Alvefur <zash@zash.se>
parents: 8558
diff changeset
    90
			return ok, ("%s (line %d, col %d))"):format(err, line, col);
5213
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    91
		end
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    92
	end;
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    93
end)();
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    94
6780
5de6b93d0190 util.*: Remove use of module() function, make all module functions local and return them in a table at the end
Kim Alvefur <zash@zash.se>
parents: 6672
diff changeset
    95
return {
5de6b93d0190 util.*: Remove use of module() function, make all module functions local and return them in a table at the end
Kim Alvefur <zash@zash.se>
parents: 6672
diff changeset
    96
	parse = parse_xml;
5de6b93d0190 util.*: Remove use of module() function, make all module functions local and return them in a table at the end
Kim Alvefur <zash@zash.se>
parents: 6672
diff changeset
    97
};