util/xml.lua
author Kim Alvefur <zash@zash.se>
Thu, 20 Jan 2022 10:51:46 +0100
branch0.11
changeset 12206 ebeb4d959fb3
parent 12205 e5e0ab93d7f4
child 12207 320de3e4b579
permissions -rw-r--r--
util.xml: Deduplicate handlers for restricted XML Makes the code more like util.xmppstream, allowing easier comparisons if we ever need to apply fixes in the future.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
5213
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
     1
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
     2
local st = require "util.stanza";
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
     3
local lxp = require "lxp";
7242
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6981
diff changeset
     4
local t_insert = table.insert;
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6981
diff changeset
     5
local t_remove = table.remove;
12185
783056b4e448 util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents: 8558
diff changeset
     6
local error = error;
5213
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
     7
6780
5de6b93d0190 util.*: Remove use of module() function, make all module functions local and return them in a table at the end
Kim Alvefur <zash@zash.se>
parents: 6672
diff changeset
     8
local _ENV = nil;
8558
4f0f5b49bb03 vairious: Add annotation when an empty environment is set [luacheck]
Kim Alvefur <zash@zash.se>
parents: 7242
diff changeset
     9
-- luacheck: std none
5213
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    10
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    11
local parse_xml = (function()
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    12
	local ns_prefixes = {
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    13
		["http://www.w3.org/XML/1998/namespace"] = "xml";
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    14
	};
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    15
	local ns_separator = "\1";
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    16
	local ns_pattern = "^([^"..ns_separator.."]*)"..ns_separator.."?(.*)$";
12185
783056b4e448 util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents: 8558
diff changeset
    17
	return function(xml, options)
6666
d3023dd07cb6 portmanager, s2smanager, sessionmanager, stanza_router, storagemanager, usermanager, util.xml: Add luacheck annotations
Matthew Wild <mwild1@gmail.com>
parents: 5776
diff changeset
    18
		--luacheck: ignore 212/self
5213
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    19
		local handler = {};
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    20
		local stanza = st.stanza("root");
7242
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6981
diff changeset
    21
		local namespaces = {};
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6981
diff changeset
    22
		local prefixes = {};
6981
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6780
diff changeset
    23
		function handler:StartNamespaceDecl(prefix, url)
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6780
diff changeset
    24
			if prefix ~= nil then
7242
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6981
diff changeset
    25
				t_insert(namespaces, url);
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6981
diff changeset
    26
				t_insert(prefixes, prefix);
6981
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6780
diff changeset
    27
			end
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6780
diff changeset
    28
		end
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6780
diff changeset
    29
		function handler:EndNamespaceDecl(prefix)
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6780
diff changeset
    30
			if prefix ~= nil then
7242
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6981
diff changeset
    31
				-- we depend on each StartNamespaceDecl having a paired EndNamespaceDecl
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6981
diff changeset
    32
				t_remove(namespaces);
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6981
diff changeset
    33
				t_remove(prefixes);
6981
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6780
diff changeset
    34
			end
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6780
diff changeset
    35
		end
5213
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    36
		function handler:StartElement(tagname, attr)
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    37
			local curr_ns,name = tagname:match(ns_pattern);
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    38
			if name == "" then
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    39
				curr_ns, name = "", curr_ns;
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    40
			end
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    41
			if curr_ns ~= "" then
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    42
				attr.xmlns = curr_ns;
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    43
			end
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    44
			for i=1,#attr do
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    45
				local k = attr[i];
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    46
				attr[i] = nil;
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    47
				local ns, nm = k:match(ns_pattern);
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    48
				if nm ~= "" then
5776
bd0ff8ae98a8 Remove all trailing whitespace
Florian Zeitz <florob@babelmonkeys.de>
parents: 5223
diff changeset
    49
					ns = ns_prefixes[ns];
bd0ff8ae98a8 Remove all trailing whitespace
Florian Zeitz <florob@babelmonkeys.de>
parents: 5223
diff changeset
    50
					if ns then
5213
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    51
						attr[ns..":"..nm] = attr[k];
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    52
						attr[k] = nil;
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    53
					end
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    54
				end
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    55
			end
6981
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6780
diff changeset
    56
			local n = {}
7242
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6981
diff changeset
    57
			for i=1,#namespaces do
c9af793b2d8f util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents: 6981
diff changeset
    58
				n[prefixes[i]] = namespaces[i];
6981
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6780
diff changeset
    59
			end
30c96a5db360 util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents: 6780
diff changeset
    60
			stanza:tag(name, attr, n);
5213
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    61
		end
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    62
		function handler:CharacterData(data)
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    63
			stanza:text(data);
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    64
		end
6672
7da8b6bc0966 util.xml: Remove unused parameter (thanks, luacheck)
Matthew Wild <mwild1@gmail.com>
parents: 6666
diff changeset
    65
		function handler:EndElement()
5213
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    66
			stanza:up();
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    67
		end
12185
783056b4e448 util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents: 8558
diff changeset
    68
		-- SECURITY: These two handlers, especially the Doctype one, are required to prevent exploits such as Billion Laughs.
12206
ebeb4d959fb3 util.xml: Deduplicate handlers for restricted XML
Kim Alvefur <zash@zash.se>
parents: 12205
diff changeset
    69
		local function restricted_handler(parser)
ebeb4d959fb3 util.xml: Deduplicate handlers for restricted XML
Kim Alvefur <zash@zash.se>
parents: 12205
diff changeset
    70
			if not parser.stop or not parser:stop() then
12185
783056b4e448 util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents: 8558
diff changeset
    71
				error("Failed to abort parsing");
783056b4e448 util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents: 8558
diff changeset
    72
			end
783056b4e448 util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents: 8558
diff changeset
    73
		end
12206
ebeb4d959fb3 util.xml: Deduplicate handlers for restricted XML
Kim Alvefur <zash@zash.se>
parents: 12205
diff changeset
    74
		handler.StartDoctypeDecl = restricted_handler;
ebeb4d959fb3 util.xml: Deduplicate handlers for restricted XML
Kim Alvefur <zash@zash.se>
parents: 12205
diff changeset
    75
		handler.ProcessingInstruction = restricted_handler;
12185
783056b4e448 util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents: 8558
diff changeset
    76
		if not options or not options.allow_comments then
783056b4e448 util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents: 8558
diff changeset
    77
			-- NOTE: comments are generally harmless and can be useful when parsing configuration files or other data, even user-provided data
12206
ebeb4d959fb3 util.xml: Deduplicate handlers for restricted XML
Kim Alvefur <zash@zash.se>
parents: 12205
diff changeset
    78
			handler.Comment = restricted_handler;
12185
783056b4e448 util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents: 8558
diff changeset
    79
		end
12205
e5e0ab93d7f4 util.xml: Break reference to help the GC (fix #1711)
Kim Alvefur <zash@zash.se>
parents: 12185
diff changeset
    80
		local parser = lxp.new(handler, ns_separator);
5213
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    81
		local ok, err, line, col = parser:parse(xml);
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    82
		if ok then ok, err, line, col = parser:parse(); end
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    83
		--parser:close();
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    84
		if ok then
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    85
			return stanza.tags[1];
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    86
		else
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    87
			return ok, err.." (line "..line..", col "..col..")";
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    88
		end
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    89
	end;
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    90
end)();
cc487921746b util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff changeset
    91
6780
5de6b93d0190 util.*: Remove use of module() function, make all module functions local and return them in a table at the end
Kim Alvefur <zash@zash.se>
parents: 6672
diff changeset
    92
return {
5de6b93d0190 util.*: Remove use of module() function, make all module functions local and return them in a table at the end
Kim Alvefur <zash@zash.se>
parents: 6672
diff changeset
    93
	parse = parse_xml;
5de6b93d0190 util.*: Remove use of module() function, make all module functions local and return them in a table at the end
Kim Alvefur <zash@zash.se>
parents: 6672
diff changeset
    94
};