spec/util_xml_spec.lua
author Kim Alvefur <zash@zash.se>
Tue, 22 Nov 2022 23:56:01 +0100
branch0.11
changeset 12801 be09ac8300a7
parent 12185 783056b4e448
child 12274 c78639ee6ccb
permissions -rw-r--r--
util.stanza: Allow U+7F Allowed by XML despite arguably being a control character. Drops the part of the range meant to rule out octets invalid in UTF-8 (\247 starts a 4-byte sequence), since UTF-8 correctness is validated by util.encodings.utf8.valid().


local xml = require "util.xml";

describe("util.xml", function()
	describe("#parse()", function()
		it("should work", function()
			local x =
[[<x xmlns:a="b">
	<y xmlns:a="c"> <!-- this overwrites 'a' -->
	    <a:z/>
	</y>
	<a:z/> <!-- prefix 'a' is nil here, but should be 'b' -->
</x>
]]
			local stanza = xml.parse(x, {allow_comments = true});
			assert.are.equal(stanza.tags[2].attr.xmlns, "b");
			assert.are.equal(stanza.tags[2].namespaces["a"], "b");
		end);

		it("should reject doctypes", function()
			local x = "<!DOCTYPE foo []><foo/>";
			local ok = xml.parse(x);
			assert.falsy(ok);
		end);

		it("should reject comments by default", function()
			local x = "<foo><!-- foo --></foo>";
			local ok = xml.parse(x);
			assert.falsy(ok);
		end);

		it("should allow comments if asked nicely", function()
			local x = "<foo><!-- foo --></foo>";
			local stanza = xml.parse(x, {allow_comments = true});
			assert.are.equal(stanza.name, "foo");
			assert.are.equal(#stanza, 0);
		end);

		it("should reject processing instructions", function()
			local x = "<foo><?php die(); ?></foo>";
			local ok = xml.parse(x);
			assert.falsy(ok);
		end);

		it("should allow an xml declaration", function()
			local x = "<?xml version='1.0'?><foo/>";
			local stanza = xml.parse(x);
			assert.truthy(stanza);
			assert.are.equal(stanza.name, "foo");
		end);
	end);
end);