From e700edc50f3bd7f05d45bb4410396178811f3561 Mon Sep 17 00:00:00 2001 From: Kim Alvefur Date: Fri, 8 Jul 2022 14:38:23 +0200 Subject: util.jsonschema: Fix validation to not assume presence of "type" field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MattJ reported a curious issue where validation did not work as expected. Primarily that the "type" field was expected to be mandatory, and thus leaving it out would result in no checks being performed. This was likely caused by misreading during initial development. Spent some time testing against https://github.com/json-schema-org/JSON-Schema-Test-Suite.git and discovered a multitude of issues, far too many to bother splitting into separate commits. More than half of them fail. Many because of features not implemented, which have been marked NYI. For example, some require deep comparisons e.g. when objects or arrays are present in enums fields. Some because of quirks with how Lua differs from JavaScript, e.g. no distinct array or object types. Tests involving fractional floating point numbers. We're definitely not going to follow references to remote resources. Or deal with UTF-16 sillyness. One test asserted that 1.0 is an integer, where Lua 5.3+ will disagree. --- teal-src/util/jsonschema.tl | 299 ++++++++++++++++++++++---------------------- 1 file changed, 152 insertions(+), 147 deletions(-) (limited to 'teal-src') diff --git a/teal-src/util/jsonschema.tl b/teal-src/util/jsonschema.tl index 5c5fd4ce..160c164c 100644 --- a/teal-src/util/jsonschema.tl +++ b/teal-src/util/jsonschema.tl @@ -16,13 +16,13 @@ local pointer = require "util.jsonpointer" local type json_type_name = json.json_type_name -- json_type_name here is non-standard -local type schema_t = boolean | json_type_name | json_schema_object +local type schema_t = boolean | json_schema_object local record json_schema_object type json_type_name = json.json_type_name type schema_object = json_schema_object - type : json_type_name + type : json_type_name | { json_type_name } enum : { any } const : any @@ -47,7 +47,7 @@ local record json_schema_object -- strings maxLength : integer minLength : integer - pattern : string + pattern : string -- NYI format : string -- arrays @@ -57,17 +57,17 @@ local record json_schema_object maxItems : integer minItems : integer uniqueItems : boolean - maxContains : integer - minContains : integer + maxContains : integer -- NYI + minContains : integer -- NYI -- objects properties : { string : schema_t } - maxProperties : integer - minProperties : integer + maxProperties : integer -- NYI + minProperties : integer -- NYI required : { string } dependentRequired : { string : { string } } additionalProperties: schema_t - patternProperties: schema_t + patternProperties: schema_t -- NYI propertyNames : schema_t -- xml @@ -99,10 +99,10 @@ end -- TODO validator function per schema property -local type_validators : { json_type_name : function (schema_t, any, json_schema_object) : boolean } = {} - -local function simple_validate(schema : json_type_name, data : any) : boolean - if schema == "object" and data is table then +local function simple_validate(schema : json_type_name | { json_type_name }, data : any) : boolean + if schema == nil then + return true + elseif schema == "object" and data is table then return type(data) == "table" and (next(data)==nil or type((next(data, nil))) == "string") elseif schema == "array" and data is table then return type(data) == "table" and (next(data)==nil or type((next(data, nil))) == "number") @@ -110,144 +110,171 @@ local function simple_validate(schema : json_type_name, data : any) : boolean return math.type(data) == schema elseif schema == "null" then return data == null + elseif schema is { json_type_name } then + for _, one in ipairs(schema as { json_type_name }) do + if simple_validate(one, data) then + return true + end + end + return false else return type(data) == schema end end -type_validators.string = function (schema : json_schema_object, data : any) : boolean - -- XXX this is measured in byte, while JSON measures in ... bork - -- TODO use utf8.len? - if data is string then - if schema.maxLength and #data > schema.maxLength then - return false - end - if schema.minLength and #data < schema.minLength then - return false - end - return true +local complex_validate : function ( json_schema_object, any, json_schema_object ) : boolean + +local function validate (schema : schema_t, data : any, root : json_schema_object) : boolean + if schema is boolean then + return schema + else + return complex_validate(schema, data, root) end - return false end -type_validators.number = function (schema : json_schema_object, data : number) : boolean - if schema.multipleOf and data % schema.multipleOf ~= 0 then - return false - end +function complex_validate (schema : json_schema_object, data : any, root : json_schema_object) : boolean - if schema.maximum and not ( data <= schema.maximum ) then - return false + if root == nil then + root = schema end - if schema.exclusiveMaximum and not ( data < schema.exclusiveMaximum ) then - return false + if schema["$ref"] and schema["$ref"]:sub(1,1) == "#" then + local referenced = pointer.resolve(root as table, schema["$ref"]:sub(2)) as schema_t + if referenced ~= nil and referenced ~= root and referenced ~= schema then + if not validate(referenced, data, root) then + return false; + end + end end - if schema.minimum and not ( data >= schema.minimum ) then - return false + if not simple_validate(schema.type, data) then + return false; end - if schema.exclusiveMinimum and not ( data > schema.exclusiveMinimum ) then - return false + if schema.type == "object" then + if data is table then + -- just check that there the keys are all strings + for k in pairs(data) do + if not k is string then + return false + end + end + end end - return true -end - -type_validators.integer = type_validators.number + if schema.type == "array" then + if data is table then + -- just check that there the keys are all numbers + for i in pairs(data) do + if not i is integer then + return false + end + end + end + end -local function validate(schema : schema_t, data : any, root : json_schema_object) : boolean - if schema is boolean then - return schema + if schema["enum"] ~= nil then + local match = false + for _, v in ipairs(schema["enum"]) do + if v == data then + -- FIXME supposed to do deep-compare + match = true + break + end + end + if not match then + return false + end end - if schema is json_type_name then - return simple_validate(schema, data) + + -- XXX this is measured in byte, while JSON measures in ... bork + -- TODO use utf8.len? + if data is string then + if schema.maxLength and #data > schema.maxLength then + return false + end + if schema.minLength and #data < schema.minLength then + return false + end end - if schema is json_schema_object then - if root == nil then - root = schema + + if data is number then + if schema.multipleOf and (data == 0 or data % schema.multipleOf ~= 0) then + return false end - if schema["$ref"] and schema["$ref"]:sub(1,1) == "#" then - local referenced = pointer.resolve(root as table, schema["$ref"]:sub(2)) as schema_t - if referenced ~= nil then - return validate(referenced, data, root); - end + + if schema.maximum and not ( data <= schema.maximum ) then + return false end - if schema.allOf then - for _, sub in ipairs(schema.allOf) do - if not validate(sub, data, root) then - return false - end - end - return true + if schema.exclusiveMaximum and not ( data < schema.exclusiveMaximum ) then + return false end - if schema.oneOf then - local valid = 0 - for _, sub in ipairs(schema.oneOf) do - if validate(sub, data, root) then - valid = valid + 1 - end - end - return valid == 1 + if schema.minimum and not ( data >= schema.minimum ) then + return false end - if schema.anyOf then - for _, sub in ipairs(schema.anyOf) do - if validate(sub, data, root) then - return true - end - end + if schema.exclusiveMinimum and not ( data > schema.exclusiveMinimum ) then return false end + end - if schema["not"] then - if validate(schema["not"], data, root) then + if schema.allOf then + for _, sub in ipairs(schema.allOf) do + if not validate(sub, data, root) then return false end end + end - if schema["if"] then - if validate(schema["if"], data, root) then - if schema["then"] then - return validate(schema["then"], data, root) - end - else - if schema["else"] then - return validate(schema["else"], data, root) - end + if schema.oneOf then + local valid = 0 + for _, sub in ipairs(schema.oneOf) do + if validate(sub, data, root) then + valid = valid + 1 end end - - if schema.const ~= nil and schema.const ~= data then + if valid ~= 1 then return false end + end - if schema["enum"] ~= nil then - for _, v in ipairs(schema["enum"]) do - if v == data then - return true - end + if schema.anyOf then + local match = false + for _, sub in ipairs(schema.anyOf) do + if validate(sub, data, root) then + match = true + break end + end + if not match then return false end + end - if schema.type then - if not simple_validate(schema.type, data) then - return false - end + if schema["not"] then + if validate(schema["not"], data, root) then + return false + end + end - local validator = type_validators[schema.type] - if validator then - return validator(schema, data, root) + if schema["if"] ~= nil then + if validate(schema["if"], data, root) then + if schema["then"] then + return validate(schema["then"], data, root) + end + else + if schema["else"] then + return validate(schema["else"], data, root) end end - return true end -end -type_validators.table = function (schema : json_schema_object, data : any, root : json_schema_object) : boolean + if schema.const ~= nil and schema.const ~= data then + return false + end + if data is table then if schema.maxItems and #data > schema.maxItems then @@ -266,24 +293,28 @@ type_validators.table = function (schema : json_schema_object, data : any, root end end - if schema.properties then - local additional : schema_t = schema.additionalProperties or true - for k, v in pairs(data) do - if schema.propertyNames and not validate(schema.propertyNames, k, root) then + if schema.propertyNames ~= nil then + for k in pairs(data) do + if not validate(schema.propertyNames, k, root) then return false end - local s = schema.properties[k as string] or additional - if not validate(s, v, root) then + end + end + + if schema.properties then + for k, sub in pairs(schema.properties) do + if data[k] ~= nil and not validate(sub, data[k], root) then return false end end - elseif schema.additionalProperties then + end + + if schema.additionalProperties ~= nil then for k, v in pairs(data) do - if schema.propertyNames and not validate(schema.propertyNames, k, root) then - return false - end - if not validate(schema.additionalProperties, v, root) then - return false + if schema.properties == nil or schema.properties[k as string] == nil then + if not validate(schema.additionalProperties, v, root) then + return false + end end end end @@ -300,9 +331,11 @@ type_validators.table = function (schema : json_schema_object, data : any, root end local p = 0 - if schema.prefixItems then + if schema.prefixItems ~= nil then for i, s in ipairs(schema.prefixItems) do - if validate(s, data[i], root) then + if data[i] == nil then + break + elseif validate(s, data[i], root) then p = i else return false @@ -310,7 +343,7 @@ type_validators.table = function (schema : json_schema_object, data : any, root end end - if schema.items then + if schema.items ~= nil then for i = p+1, #data do if not validate(schema.items, data[i], root) then return false @@ -318,7 +351,7 @@ type_validators.table = function (schema : json_schema_object, data : any, root end end - if schema.contains then + if schema.contains ~= nil then local found = false for i = 1, #data do if validate(schema.contains, data[i], root) then @@ -330,39 +363,11 @@ type_validators.table = function (schema : json_schema_object, data : any, root return false end end - - return true end - return false -end -type_validators.object = function (schema : schema_t, data : any, root : json_schema_object) : boolean - if data is table then - for k in pairs(data) do - if not k is string then - return false - end - end - - return type_validators.table(schema, data, root) - end - return false + return true; end -type_validators.array = function (schema : schema_t, data : any, root : json_schema_object) : boolean - if data is table then - - -- just check that there the keys are all numbers - for i in pairs(data) do - if not i is number then - return false - end - end - - return type_validators.table(schema, data, root) - end - return false -end json_schema_object.validate = validate; -- cgit v1.2.3 From 89359b70dc0446cdda15da19173f460504bafc3d Mon Sep 17 00:00:00 2001 From: Kim Alvefur Date: Fri, 8 Jul 2022 17:32:48 +0200 Subject: util.datamapper: Improve handling of schemas with non-obvious "type" The JSON Schema specification says that schemas are objects or booleans, and that the 'type' property is optional and can be an array. This module previously allowed bare type names as schemas and did not really handle booleans. It now handles missing 'type' properties and boolean 'true' as a schema. Objects and arrays are guessed based on the presence of 'properties' or 'items' field. --- teal-src/util/datamapper.tl | 43 +++++++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 12 deletions(-) (limited to 'teal-src') diff --git a/teal-src/util/datamapper.tl b/teal-src/util/datamapper.tl index 78ca3035..73b1dfc0 100644 --- a/teal-src/util/datamapper.tl +++ b/teal-src/util/datamapper.tl @@ -25,7 +25,7 @@ local pointer = require"util.jsonpointer"; local json_type_name = json.json_type_name; local json_schema_object = require "util.jsonschema" -local type schema_t = boolean | json_type_name | json_schema_object +local type schema_t = boolean | json_schema_object local function toboolean ( s : string ) : boolean if s == "true" or s == "1" then @@ -59,15 +59,28 @@ local enum value_goes end local function resolve_schema(schema : schema_t, root : json_schema_object) : schema_t - if schema is json_schema_object and schema["$ref"] and schema["$ref"]:sub(1, 1) == "#" then - local referenced = pointer.resolve(root as table, schema["$ref"]:sub(2)) as schema_t; - if referenced ~= nil then - return referenced + if schema is json_schema_object then + if schema["$ref"] and schema["$ref"]:sub(1, 1) == "#" then + return pointer.resolve(root as table, schema["$ref"]:sub(2)) as schema_t; end end return schema; end +local function guess_schema_type(schema : json_schema_object) : json_type_name + local schema_types = schema.type + if schema_types is json_type_name then + return schema_types + elseif schema_types ~= nil then + error "schema has unsupported 'type' property" + elseif schema.properties then + return "object" + elseif schema.items then + return "array" + end + return "string" -- default assumption +end + local function unpack_propschema( propschema : schema_t, propname : string, current_ns : string ) : json_type_name, value_goes, string, string, string, string, { any } local proptype : json_type_name = "string" @@ -79,9 +92,9 @@ local function unpack_propschema( propschema : schema_t, propname : string, curr local enums : { any } if propschema is json_schema_object then - proptype = propschema.type - elseif propschema is json_type_name then - proptype = propschema + proptype = guess_schema_type(propschema); + elseif propschema is string then -- Teal says this can never be a string, but it could before so best be sure + error("schema as string is not supported: "..propschema.." {"..current_ns.."}"..propname) end if proptype == "object" or proptype == "array" then @@ -120,6 +133,10 @@ local function unpack_propschema( propschema : schema_t, propname : string, curr end end + if current_ns == "urn:xmpp:reactions:0" and name == "reactions" then + assert(proptype=="array") + end + return proptype, value_where, name, namespace, prefix, single_attribute, enums end @@ -239,9 +256,10 @@ function parse_array (schema : json_schema_object, s : st.stanza_t, root : json_ end local function parse (schema : json_schema_object, s : st.stanza_t) : table - if schema.type == "object" then + local s_type = guess_schema_type(schema) + if s_type == "object" then return parse_object(schema, s, schema) - elseif schema.type == "array" then + elseif s_type == "array" then return parse_array(schema, s, schema) else error "top-level scalars unsupported" @@ -333,7 +351,8 @@ function unparse ( schema : json_schema_object, t : table, current_name : string local out = ctx or st.stanza(current_name, { xmlns = current_ns }) - if schema.type == "object" then + local s_type = guess_schema_type(schema) + if s_type == "object" then for prop, propschema in pairs(schema.properties) do propschema = resolve_schema(propschema, root) @@ -346,7 +365,7 @@ function unparse ( schema : json_schema_object, t : table, current_name : string end return out; - elseif schema.type == "array" then + elseif s_type == "array" then local itemschema = resolve_schema(schema.items, root) local proptype, value_where, name, namespace, prefix, single_attribute = unpack_propschema(itemschema, current_name, current_ns) for _, item in ipairs(t as { string }) do -- cgit v1.2.3