author | Kim Alvefur <zash@zash.se> |
Tue, 23 Apr 2024 20:01:41 +0200 | |
changeset 13486 | 4d697961546d |
parent 12979 | d10957394a3c |
permissions | -rw-r--r-- |
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
1 |
|
12979
d10957394a3c
util: Prefix module imports with prosody namespace
Kim Alvefur <zash@zash.se>
parents:
12274
diff
changeset
|
2 |
local st = require "prosody.util.stanza"; |
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
3 |
local lxp = require "lxp"; |
7242
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6981
diff
changeset
|
4 |
local t_insert = table.insert; |
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6981
diff
changeset
|
5 |
local t_remove = table.remove; |
12185
783056b4e448
util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents:
8558
diff
changeset
|
6 |
local error = error; |
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
7 |
|
6780
5de6b93d0190
util.*: Remove use of module() function, make all module functions local and return them in a table at the end
Kim Alvefur <zash@zash.se>
parents:
6672
diff
changeset
|
8 |
local _ENV = nil; |
8558
4f0f5b49bb03
vairious: Add annotation when an empty environment is set [luacheck]
Kim Alvefur <zash@zash.se>
parents:
7242
diff
changeset
|
9 |
-- luacheck: std none |
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
10 |
|
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
11 |
local parse_xml = (function() |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
12 |
local ns_prefixes = { |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
13 |
["http://www.w3.org/XML/1998/namespace"] = "xml"; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
14 |
}; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
15 |
local ns_separator = "\1"; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
16 |
local ns_pattern = "^([^"..ns_separator.."]*)"..ns_separator.."?(.*)$"; |
12185
783056b4e448
util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents:
8558
diff
changeset
|
17 |
return function(xml, options) |
6666
d3023dd07cb6
portmanager, s2smanager, sessionmanager, stanza_router, storagemanager, usermanager, util.xml: Add luacheck annotations
Matthew Wild <mwild1@gmail.com>
parents:
5776
diff
changeset
|
18 |
--luacheck: ignore 212/self |
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
19 |
local handler = {}; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
20 |
local stanza = st.stanza("root"); |
7242
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6981
diff
changeset
|
21 |
local namespaces = {}; |
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6981
diff
changeset
|
22 |
local prefixes = {}; |
6981
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6780
diff
changeset
|
23 |
function handler:StartNamespaceDecl(prefix, url) |
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6780
diff
changeset
|
24 |
if prefix ~= nil then |
7242
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6981
diff
changeset
|
25 |
t_insert(namespaces, url); |
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6981
diff
changeset
|
26 |
t_insert(prefixes, prefix); |
6981
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6780
diff
changeset
|
27 |
end |
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6780
diff
changeset
|
28 |
end |
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6780
diff
changeset
|
29 |
function handler:EndNamespaceDecl(prefix) |
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6780
diff
changeset
|
30 |
if prefix ~= nil then |
7242
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6981
diff
changeset
|
31 |
-- we depend on each StartNamespaceDecl having a paired EndNamespaceDecl |
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6981
diff
changeset
|
32 |
t_remove(namespaces); |
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6981
diff
changeset
|
33 |
t_remove(prefixes); |
6981
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6780
diff
changeset
|
34 |
end |
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6780
diff
changeset
|
35 |
end |
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
36 |
function handler:StartElement(tagname, attr) |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
37 |
local curr_ns,name = tagname:match(ns_pattern); |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
38 |
if name == "" then |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
39 |
curr_ns, name = "", curr_ns; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
40 |
end |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
41 |
if curr_ns ~= "" then |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
42 |
attr.xmlns = curr_ns; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
43 |
end |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
44 |
for i=1,#attr do |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
45 |
local k = attr[i]; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
46 |
attr[i] = nil; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
47 |
local ns, nm = k:match(ns_pattern); |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
48 |
if nm ~= "" then |
5776
bd0ff8ae98a8
Remove all trailing whitespace
Florian Zeitz <florob@babelmonkeys.de>
parents:
5223
diff
changeset
|
49 |
ns = ns_prefixes[ns]; |
bd0ff8ae98a8
Remove all trailing whitespace
Florian Zeitz <florob@babelmonkeys.de>
parents:
5223
diff
changeset
|
50 |
if ns then |
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
51 |
attr[ns..":"..nm] = attr[k]; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
52 |
attr[k] = nil; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
53 |
end |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
54 |
end |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
55 |
end |
6981
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6780
diff
changeset
|
56 |
local n = {} |
7242
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6981
diff
changeset
|
57 |
for i=1,#namespaces do |
c9af793b2d8f
util.xml: Correct stanza.namespaces table construction when duplicate prefix names are encountered in the element tree.
Waqas Hussain <waqas20@gmail.com>
parents:
6981
diff
changeset
|
58 |
n[prefixes[i]] = namespaces[i]; |
6981
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6780
diff
changeset
|
59 |
end |
30c96a5db360
util.stanza, util.xml, util.xmppstream: Add support for tracking defined namespaces and their prefix (stanza.namespaces), knowing/preserving prefix names is required for some applications (thanks daurnimator)
Matthew Wild <mwild1@gmail.com>
parents:
6780
diff
changeset
|
60 |
stanza:tag(name, attr, n); |
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
61 |
end |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
62 |
function handler:CharacterData(data) |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
63 |
stanza:text(data); |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
64 |
end |
6672
7da8b6bc0966
util.xml: Remove unused parameter (thanks, luacheck)
Matthew Wild <mwild1@gmail.com>
parents:
6666
diff
changeset
|
65 |
function handler:EndElement() |
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
66 |
stanza:up(); |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
67 |
end |
12185
783056b4e448
util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents:
8558
diff
changeset
|
68 |
-- SECURITY: These two handlers, especially the Doctype one, are required to prevent exploits such as Billion Laughs. |
12206
ebeb4d959fb3
util.xml: Deduplicate handlers for restricted XML
Kim Alvefur <zash@zash.se>
parents:
12205
diff
changeset
|
69 |
local function restricted_handler(parser) |
ebeb4d959fb3
util.xml: Deduplicate handlers for restricted XML
Kim Alvefur <zash@zash.se>
parents:
12205
diff
changeset
|
70 |
if not parser.stop or not parser:stop() then |
12185
783056b4e448
util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents:
8558
diff
changeset
|
71 |
error("Failed to abort parsing"); |
783056b4e448
util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents:
8558
diff
changeset
|
72 |
end |
783056b4e448
util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents:
8558
diff
changeset
|
73 |
end |
12206
ebeb4d959fb3
util.xml: Deduplicate handlers for restricted XML
Kim Alvefur <zash@zash.se>
parents:
12205
diff
changeset
|
74 |
handler.StartDoctypeDecl = restricted_handler; |
12185
783056b4e448
util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents:
8558
diff
changeset
|
75 |
if not options or not options.allow_comments then |
783056b4e448
util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents:
8558
diff
changeset
|
76 |
-- NOTE: comments are generally harmless and can be useful when parsing configuration files or other data, even user-provided data |
12206
ebeb4d959fb3
util.xml: Deduplicate handlers for restricted XML
Kim Alvefur <zash@zash.se>
parents:
12205
diff
changeset
|
77 |
handler.Comment = restricted_handler; |
12185
783056b4e448
util.xml: Do not allow doctypes, comments or processing instructions
Jonas Schäfer <jonas@wielicki.name>
parents:
8558
diff
changeset
|
78 |
end |
12274
c78639ee6ccb
util.xml: Add an option to allow <?processing instructions?>
Kim Alvefur <zash@zash.se>
parents:
12207
diff
changeset
|
79 |
if not options or not options.allow_processing_instructions then |
c78639ee6ccb
util.xml: Add an option to allow <?processing instructions?>
Kim Alvefur <zash@zash.se>
parents:
12207
diff
changeset
|
80 |
-- Processing instructions should generally be safe to just ignore |
c78639ee6ccb
util.xml: Add an option to allow <?processing instructions?>
Kim Alvefur <zash@zash.se>
parents:
12207
diff
changeset
|
81 |
handler.ProcessingInstruction = restricted_handler; |
c78639ee6ccb
util.xml: Add an option to allow <?processing instructions?>
Kim Alvefur <zash@zash.se>
parents:
12207
diff
changeset
|
82 |
end |
12205
e5e0ab93d7f4
util.xml: Break reference to help the GC (fix #1711)
Kim Alvefur <zash@zash.se>
parents:
12185
diff
changeset
|
83 |
local parser = lxp.new(handler, ns_separator); |
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
84 |
local ok, err, line, col = parser:parse(xml); |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
85 |
if ok then ok, err, line, col = parser:parse(); end |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
86 |
--parser:close(); |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
87 |
if ok then |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
88 |
return stanza.tags[1]; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
89 |
else |
11131
1d9cd1abc660
util.xml: Fix float formatting of line and columns in error (on Lua 5.3+)
Kim Alvefur <zash@zash.se>
parents:
8558
diff
changeset
|
90 |
return ok, ("%s (line %d, col %d))"):format(err, line, col); |
5213
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
91 |
end |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
92 |
end; |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
93 |
end)(); |
cc487921746b
util.xml: Initial commit; exposes parse(), which is now the canonical way to convert a string to a stanza.
Waqas Hussain <waqas20@gmail.com>
parents:
diff
changeset
|
94 |
|
6780
5de6b93d0190
util.*: Remove use of module() function, make all module functions local and return them in a table at the end
Kim Alvefur <zash@zash.se>
parents:
6672
diff
changeset
|
95 |
return { |
5de6b93d0190
util.*: Remove use of module() function, make all module functions local and return them in a table at the end
Kim Alvefur <zash@zash.se>
parents:
6672
diff
changeset
|
96 |
parse = parse_xml; |
5de6b93d0190
util.*: Remove use of module() function, make all module functions local and return them in a table at the end
Kim Alvefur <zash@zash.se>
parents:
6672
diff
changeset
|
97 |
}; |