# HG changeset patch # User Kim Alvefur # Date 1669157761 -3600 # Node ID be09ac8300a7bde4e81f7cc4e4ee5b0745ab14b7 # Parent 458c5f8d5d3e801291515810012ccc2277851089 util.stanza: Allow U+7F Allowed by XML despite arguably being a control character. Drops the part of the range meant to rule out octets invalid in UTF-8 (\247 starts a 4-byte sequence), since UTF-8 correctness is validated by util.encodings.utf8.valid(). diff -r 458c5f8d5d3e -r be09ac8300a7 util/stanza.lua --- a/util/stanza.lua Mon Jan 24 13:58:04 2022 +0000 +++ b/util/stanza.lua Tue Nov 22 23:56:01 2022 +0100 @@ -45,8 +45,12 @@ local stanza_mt = { __name = "stanza" }; stanza_mt.__index = stanza_mt; +-- Basic check for valid XML character data. +-- Disallow control characters. +-- Tab U+09 and newline U+0A are allowed. +-- For attributes, allow the \1 separator between namespace and name. local function valid_xml_cdata(str, attr) - return not s_find(str, attr and "[^\1\9\10\13\20-~\128-\247]" or "[^\9\10\13\20-~\128-\247]"); + return not s_find(str, attr and "[^\1\9\10\13\20-\255]" or "[^\9\10\13\20-\255]"); end local function check_name(name, name_type)