teal-src/prosody/util/datamapper.tl
changeset 12983 fbbf4f0db8f0
parent 12786 8815d3090928
equal deleted inserted replaced
12982:088d278c75b5 12983:fbbf4f0db8f0
       
     1 -- Copyright (C) 2021 Kim Alvefur
       
     2 --
       
     3 -- This project is MIT/X11 licensed. Please see the
       
     4 -- COPYING file in the source package for more information.
       
     5 --
       
     6 -- Based on
       
     7 -- https://json-schema.org/draft/2020-12/json-schema-core.html
       
     8 -- https://json-schema.org/draft/2020-12/json-schema-validation.html
       
     9 -- http://spec.openapis.org/oas/v3.0.1#xmlObject
       
    10 -- https://github.com/OAI/OpenAPI-Specification/issues/630 (text:true)
       
    11 --
       
    12 -- XML Object Extensions:
       
    13 -- text to refer to the text content at the same time as attributes
       
    14 -- x_name_is_value for enum fields where the <tag-name/> is the value
       
    15 -- x_single_attribute for <tag attr="this"/>
       
    16 --
       
    17 -- TODO pointers
       
    18 -- TODO cleanup / refactor
       
    19 -- TODO s/number/integer/ once we have appropriate math.type() compat
       
    20 --
       
    21 
       
    22 if not math.type then require "prosody.util.mathcompat" end
       
    23 
       
    24 local st = require "prosody.util.stanza";
       
    25 local json = require "prosody.util.json"
       
    26 local pointer = require "prosody.util.jsonpointer";
       
    27 
       
    28 local json_type_name = json.json_type_name;
       
    29 local json_schema_object = require "prosody.util.jsonschema"
       
    30 local type schema_t = boolean | json_schema_object
       
    31 
       
    32 local function toboolean ( s : string ) : boolean
       
    33 	if s == "true" or s == "1" then
       
    34 		return true
       
    35 	elseif s == "false" or s == "0" then
       
    36 		return false
       
    37 	elseif s then
       
    38 		return true
       
    39 	end
       
    40 end
       
    41 
       
    42 local function totype(t : json_type_name, s : string) : any
       
    43 	if not s then return nil end
       
    44 	if t == "string" then
       
    45 		return s;
       
    46 	elseif t == "boolean" then
       
    47 		return toboolean(s)
       
    48 	elseif t == "number" or t == "integer" then
       
    49 		return tonumber(s)
       
    50 	end
       
    51 end
       
    52 
       
    53 local enum value_goes
       
    54 	"in_tag_name"
       
    55 	"in_text"
       
    56 	"in_text_tag"
       
    57 	"in_attribute"
       
    58 	"in_single_attribute"
       
    59 	"in_children"
       
    60 	"in_wrapper"
       
    61 end
       
    62 
       
    63 local function resolve_schema(schema  : schema_t, root : json_schema_object) : schema_t
       
    64 	if schema is json_schema_object then
       
    65 		if schema["$ref"] and schema["$ref"]:sub(1, 1) == "#" then
       
    66 			return pointer.resolve(root as table, schema["$ref"]:sub(2)) as schema_t;
       
    67 		end
       
    68 	end
       
    69 	return schema;
       
    70 end
       
    71 
       
    72 local function guess_schema_type(schema : json_schema_object) : json_type_name
       
    73 	local schema_types = schema.type
       
    74 	if schema_types is json_type_name then
       
    75 		return schema_types
       
    76 	elseif schema_types ~= nil then
       
    77 		error "schema has unsupported 'type' property"
       
    78 	elseif schema.properties then
       
    79 		return "object"
       
    80 	elseif schema.items then
       
    81 		return "array"
       
    82 	end
       
    83 	return "string" -- default assumption
       
    84 end
       
    85 
       
    86 local function unpack_propschema( propschema : schema_t, propname : string, current_ns : string )
       
    87 		: json_type_name, value_goes, string, string, string, string, { any }
       
    88 	local proptype : json_type_name = "string"
       
    89 	local value_where : value_goes = propname and "in_text_tag" or "in_text"
       
    90 	local name = propname
       
    91 	local namespace : string
       
    92 	local prefix : string
       
    93 	local single_attribute : string
       
    94 	local enums : { any }
       
    95 
       
    96 	if propschema is json_schema_object then
       
    97 		proptype = guess_schema_type(propschema);
       
    98 	elseif propschema is string then -- Teal says this can never be a string, but it could before so best be sure
       
    99 		error("schema as string is not supported: "..propschema.." {"..current_ns.."}"..propname)
       
   100 	end
       
   101 
       
   102 	if proptype == "object" or proptype == "array" then
       
   103 		value_where = "in_children"
       
   104 	end
       
   105 
       
   106 	if propschema is json_schema_object then
       
   107 		local xml = propschema.xml
       
   108 		if xml then
       
   109 			if xml.name then
       
   110 				name = xml.name
       
   111 			end
       
   112 			if xml.namespace and xml.namespace ~= current_ns then
       
   113 				namespace = xml.namespace
       
   114 			end
       
   115 			if xml.prefix then
       
   116 				prefix = xml.prefix
       
   117 			end
       
   118 			if proptype == "array" and xml.wrapped then
       
   119 				value_where = "in_wrapper"
       
   120 			elseif xml.attribute then
       
   121 				value_where = "in_attribute"
       
   122 			elseif xml.text then
       
   123 				value_where = "in_text"
       
   124 			elseif xml.x_name_is_value then
       
   125 				value_where = "in_tag_name"
       
   126 			elseif xml.x_single_attribute then
       
   127 				single_attribute = xml.x_single_attribute
       
   128 				value_where = "in_single_attribute"
       
   129 			end
       
   130 		end
       
   131 		if propschema["const"] then
       
   132 			enums = { propschema["const"] }
       
   133 		elseif propschema["enum"] then
       
   134 			enums = propschema["enum"]
       
   135 		end
       
   136 	end
       
   137 
       
   138 	return proptype, value_where, name, namespace, prefix, single_attribute, enums
       
   139 end
       
   140 
       
   141 local parse_object : function (schema : schema_t, s : st.stanza_t, root : json_schema_object) : { string : any }
       
   142 local parse_array : function (schema : schema_t, s : st.stanza_t, root : json_schema_object) : { any }
       
   143 
       
   144 local function extract_value (s : st.stanza_t, value_where : value_goes, proptype : json.json_type_name, name : string, namespace : string, prefix : string, single_attribute : string, enums : { any }) : string
       
   145 	if value_where == "in_tag_name" then
       
   146 		local c : st.stanza_t
       
   147 		if proptype == "boolean" then
       
   148 			c = s:get_child(name, namespace);
       
   149 		elseif enums and proptype == "string" then
       
   150 			-- XXX O(n²) ?
       
   151 			-- Probably better to flip the table and loop over :childtags(nil, ns), should be 2xO(n)
       
   152 			-- BUT works first, optimize later
       
   153 			for i = 1, #enums do
       
   154 				c = s:get_child(enums[i] as string, namespace);
       
   155 				if c then break end
       
   156 			end
       
   157 		else
       
   158 			c = s:get_child(nil, namespace);
       
   159 		end
       
   160 		if c then
       
   161 			return c.name;
       
   162 		end
       
   163 	elseif value_where == "in_attribute" then
       
   164 		local attr = name
       
   165 		if prefix then
       
   166 			attr = prefix .. ':' .. name
       
   167 		elseif namespace and namespace ~= s.attr.xmlns then
       
   168 			attr = namespace .. "\1" .. name
       
   169 		end
       
   170 		return s.attr[attr]
       
   171 
       
   172 	elseif value_where == "in_text" then
       
   173 		return s:get_text()
       
   174 
       
   175 	elseif value_where == "in_single_attribute" then
       
   176 		local c = s:get_child(name, namespace)
       
   177 		return c and c.attr[single_attribute]
       
   178 	elseif value_where == "in_text_tag" then
       
   179 		return s:get_child_text(name, namespace)
       
   180 	end
       
   181 end
       
   182 
       
   183 function parse_object (schema : schema_t, s : st.stanza_t, root : json_schema_object) : { string : any }
       
   184 	local out : { string : any } = {}
       
   185 	schema = resolve_schema(schema, root)
       
   186 	if schema is json_schema_object and schema.properties then
       
   187 		for prop, propschema in pairs(schema.properties) do
       
   188 			propschema = resolve_schema(propschema, root)
       
   189 
       
   190 			local proptype, value_where, name, namespace, prefix, single_attribute, enums = unpack_propschema(propschema, prop, s.attr.xmlns)
       
   191 
       
   192 			if value_where == "in_children" and propschema is json_schema_object then
       
   193 				if proptype == "object" then
       
   194 					local c = s:get_child(name, namespace)
       
   195 					if c then
       
   196 						out[prop] = parse_object(propschema, c, root);
       
   197 					end
       
   198 				elseif proptype == "array" then
       
   199 					local a = parse_array(propschema, s, root);
       
   200 					if a and a[1] ~= nil then
       
   201 						out[prop] = a;
       
   202 					end
       
   203 				else
       
   204 					error "unreachable"
       
   205 				end
       
   206 			elseif value_where == "in_wrapper" and propschema is json_schema_object and proptype == "array" then
       
   207 				local wrapper = s:get_child(name, namespace);
       
   208 				if wrapper then
       
   209 					out[prop] = parse_array(propschema, wrapper, root);
       
   210 				end
       
   211 			else
       
   212 				local value : string = extract_value (s, value_where, proptype, name, namespace, prefix, single_attribute, enums)
       
   213 
       
   214 				out[prop] = totype(proptype, value)
       
   215 			end
       
   216 		end
       
   217 	end
       
   218 
       
   219 	return out
       
   220 end
       
   221 
       
   222 function parse_array (schema : json_schema_object, s : st.stanza_t, root : json_schema_object) : { any }
       
   223 	local itemschema : schema_t = resolve_schema(schema.items, root);
       
   224 	local proptype, value_where, child_name, namespace, prefix, single_attribute, enums = unpack_propschema(itemschema, nil, s.attr.xmlns)
       
   225 	local attr_name : string
       
   226 	if value_where == "in_single_attribute" then -- FIXME this shouldn't be needed
       
   227 		value_where = "in_attribute";
       
   228 		attr_name = single_attribute;
       
   229 	end
       
   230 	local out : { any } = {}
       
   231 
       
   232 	if proptype == "object" then
       
   233 		if itemschema is json_schema_object then
       
   234 			for c in s:childtags(child_name, namespace) do
       
   235 				table.insert(out, parse_object(itemschema, c, root));
       
   236 			end
       
   237 		else
       
   238 			error "array items must be schema object"
       
   239 		end
       
   240 	elseif proptype == "array" then
       
   241 		if itemschema is json_schema_object then
       
   242 			for c in s:childtags(child_name, namespace) do
       
   243 				table.insert(out, parse_array(itemschema, c, root));
       
   244 			end
       
   245 		end
       
   246 	else
       
   247 		for c in s:childtags(child_name, namespace) do
       
   248 			local value : string = extract_value (c, value_where, proptype, attr_name or child_name, namespace, prefix, single_attribute, enums)
       
   249 
       
   250 			table.insert(out, totype(proptype, value));
       
   251 		end
       
   252 	end
       
   253 	return out;
       
   254 end
       
   255 
       
   256 local function parse (schema : json_schema_object, s : st.stanza_t) : table
       
   257 	local s_type = guess_schema_type(schema)
       
   258 	if s_type == "object" then
       
   259 		return parse_object(schema, s, schema)
       
   260 	elseif s_type == "array" then
       
   261 		return parse_array(schema, s, schema)
       
   262 	else
       
   263 		error "top-level scalars unsupported"
       
   264 	end
       
   265 end
       
   266 
       
   267 local function toxmlstring(proptype : json_type_name, v : any) : string
       
   268 	if proptype == "string" and v is string then
       
   269 		return  v
       
   270 	elseif proptype == "number" and v is number then
       
   271 		return  string.format("%g", v)
       
   272 	elseif proptype == "integer" and v is number then -- TODO is integer
       
   273 		return  string.format("%d", v)
       
   274 	elseif proptype == "boolean" then
       
   275 		return  v and "1" or "0"
       
   276 	end
       
   277 end
       
   278 
       
   279 local unparse : function (json_schema_object, table, string, string, st.stanza_t, json_schema_object) : st.stanza_t
       
   280 
       
   281 local function unparse_property(out : st.stanza_t, v : any, proptype : json_type_name, propschema : schema_t, value_where : value_goes, name : string, namespace : string, current_ns : string, prefix : string, single_attribute : string, root : json_schema_object)
       
   282 
       
   283 	if value_where == "in_attribute" then
       
   284 		local attr = name
       
   285 		if prefix then
       
   286 			attr = prefix .. ':' .. name
       
   287 		elseif namespace and namespace ~= current_ns then
       
   288 			attr = namespace .. "\1" .. name
       
   289 		end
       
   290 
       
   291 		out.attr[attr] = toxmlstring(proptype, v)
       
   292 	elseif value_where == "in_text" then
       
   293 		out:text(toxmlstring(proptype, v))
       
   294 	elseif value_where == "in_single_attribute" then
       
   295 		assert(single_attribute)
       
   296 		local propattr : { string : string } = {}
       
   297 
       
   298 		if namespace and namespace ~= current_ns then
       
   299 			propattr.xmlns = namespace
       
   300 		end
       
   301 
       
   302 		propattr[single_attribute] = toxmlstring(proptype, v)
       
   303 		out:tag(name, propattr):up();
       
   304 
       
   305 	else
       
   306 		local propattr : { string : string }
       
   307 		if namespace ~= current_ns then
       
   308 			propattr = { xmlns = namespace }
       
   309 		end
       
   310 		if value_where == "in_tag_name" then
       
   311 			if proptype == "string" and v is string then
       
   312 				out:tag(v, propattr):up();
       
   313 			elseif proptype == "boolean" and v == true then
       
   314 				out:tag(name, propattr):up();
       
   315 			end
       
   316 		elseif proptype == "object" and propschema is json_schema_object and v is table then
       
   317 			local c = unparse(propschema, v, name, namespace, nil, root);
       
   318 			if c then
       
   319 				out:add_direct_child(c);
       
   320 			end
       
   321 		elseif proptype == "array" and propschema is json_schema_object and v is table then
       
   322 			if value_where == "in_wrapper" then
       
   323 				local c = unparse(propschema, v, name, namespace, nil, root);
       
   324 				if c then
       
   325 					out:add_direct_child(c);
       
   326 				end
       
   327 			else
       
   328 				unparse(propschema, v, name, namespace, out, root);
       
   329 			end
       
   330 		else
       
   331 			out:text_tag(name, toxmlstring(proptype, v), propattr)
       
   332 		end
       
   333 	end
       
   334 end
       
   335 
       
   336 function unparse ( schema : json_schema_object, t : table, current_name : string, current_ns : string, ctx : st.stanza_t, root : json_schema_object ) : st.stanza_t
       
   337 
       
   338 	if root == nil then root = schema end
       
   339 
       
   340 	if schema.xml then
       
   341 		if schema.xml.name then
       
   342 			current_name = schema.xml.name
       
   343 		end
       
   344 		if schema.xml.namespace then
       
   345 			current_ns = schema.xml.namespace
       
   346 		end
       
   347 		-- TODO prefix?
       
   348 	end
       
   349 
       
   350 	local out = ctx or st.stanza(current_name, { xmlns = current_ns })
       
   351 
       
   352 	local s_type = guess_schema_type(schema)
       
   353 	if s_type == "object" then
       
   354 
       
   355 		for prop, propschema in pairs(schema.properties) do
       
   356 			propschema = resolve_schema(propschema, root)
       
   357 			local v = t[prop]
       
   358 
       
   359 			if v ~= nil then
       
   360 				local proptype, value_where, name, namespace, prefix, single_attribute = unpack_propschema(propschema, prop, current_ns)
       
   361 				unparse_property(out, v, proptype, propschema, value_where, name, namespace, current_ns, prefix, single_attribute, root)
       
   362 			end
       
   363 		end
       
   364 		return out;
       
   365 
       
   366 	elseif s_type == "array" then
       
   367 		local itemschema = resolve_schema(schema.items, root)
       
   368 		local proptype, value_where, name, namespace, prefix, single_attribute = unpack_propschema(itemschema, current_name, current_ns)
       
   369 		for _, item in ipairs(t as { string }) do
       
   370 			unparse_property(out, item, proptype, itemschema, value_where, name, namespace, current_ns, prefix, single_attribute, root)
       
   371 		end
       
   372 		return out;
       
   373 	end
       
   374 end
       
   375 
       
   376 return {
       
   377 	parse = parse,
       
   378 	unparse = unparse,
       
   379 }