From e700edc50f3bd7f05d45bb4410396178811f3561 Mon Sep 17 00:00:00 2001 From: Kim Alvefur Date: Fri, 8 Jul 2022 14:38:23 +0200 Subject: util.jsonschema: Fix validation to not assume presence of "type" field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MattJ reported a curious issue where validation did not work as expected. Primarily that the "type" field was expected to be mandatory, and thus leaving it out would result in no checks being performed. This was likely caused by misreading during initial development. Spent some time testing against https://github.com/json-schema-org/JSON-Schema-Test-Suite.git and discovered a multitude of issues, far too many to bother splitting into separate commits. More than half of them fail. Many because of features not implemented, which have been marked NYI. For example, some require deep comparisons e.g. when objects or arrays are present in enums fields. Some because of quirks with how Lua differs from JavaScript, e.g. no distinct array or object types. Tests involving fractional floating point numbers. We're definitely not going to follow references to remote resources. Or deal with UTF-16 sillyness. One test asserted that 1.0 is an integer, where Lua 5.3+ will disagree. --- spec/util_jsonschema_spec.lua | 102 ++++++++++++++ teal-src/util/jsonschema.tl | 299 +++++++++++++++++++++--------------------- util/jsonschema.lua | 276 +++++++++++++++++++------------------- 3 files changed, 395 insertions(+), 282 deletions(-) create mode 100644 spec/util_jsonschema_spec.lua diff --git a/spec/util_jsonschema_spec.lua b/spec/util_jsonschema_spec.lua new file mode 100644 index 00000000..74da2c07 --- /dev/null +++ b/spec/util_jsonschema_spec.lua @@ -0,0 +1,102 @@ +local js = require "util.jsonschema"; +local json = require "util.json"; +local lfs = require "lfs"; + +-- https://github.com/json-schema-org/JSON-Schema-Test-Suite.git 2.0.0-550-g88d6948 +local test_suite_dir = "spec/JSON-Schema-Test-Suite/tests/draft2020-12" +if lfs.attributes(test_suite_dir, "mode") ~= "directory" then return end + +-- Tests to skip and short reason why (NYI = not yet implemented) +local skip = { + ["ref.json:0:3"] = "NYI additionalProperties"; + ["ref.json:3:2"] = "FIXME investigate, util.jsonpath issue?", + ["ref.json:6:1"] = "NYI", + ["required.json:0:2"] = "distinguishing objects from arrays", + ["additionalProperties.json:0:2"] = "distinguishing objects from arrays", + ["additionalProperties.json:0:5"] = "NYI", + ["additionalProperties.json:1:0"] = "NYI", + ["anchor.json"] = "$anchor NYI", + ["const.json:1"] = "deepcompare", + ["const.json:13:2"] = "IEEE 754 equality", + ["const.json:2"] = "deepcompare", + ["const.json:8"] = "deepcompare", + ["const.json:9"] = "deepcompare", + ["contains.json:0:5"] = "distinguishing objects from arrays", + ["defs.json"] = "need built-in meta-schema", + ["dependentRequired.json"] = "NYI", + ["dependentSchemas.json"] = "NYI", + ["dynamicRef.json"] = "NYI", + ["enum.json:1:3"] = "deepcompare", + ["id.json"] = "NYI", + ["maxContains.json"] = "NYI", + ["maxLength.json:0:4"] = "UTF-16", + ["maxProperties.json"] = "NYI", + ["minContains.json"] = "NYI", + ["minLength.json:0:4"] = "UTF-16", + ["minProperties.json"] = "NYI", + ["multipleOf.json:1"] = "multiples of IEEE 754 fractions", + ["multipleOf.json:2"] = "multiples of IEEE 754 fractions", + ["pattern.json"] = "NYI", + ["patternProperties.json"] = "NYI", + ["properties.json:1:2"] = "NYI", + ["properties.json:1:3"] = "NYI", + ["ref.json:14"] = "NYI", + ["ref.json:15"] = "NYI", + ["ref.json:16"] = "NYI", + ["ref.json:17"] = "NYI", + ["ref.json:18"] = "NYI", + ["ref.json:13"] = "NYI", + ["ref.json:19"] = "NYI", + ["ref.json:11"] = "NYI", + ["ref.json:12:1"] = "FIXME", + ["refRemote.json"] = "DEFINITELY NYI", + ["type.json:3:4"] = "distinguishing objects from arrays", + ["type.json:3:6"] = "null is weird", + ["type.json:4:3"] = "distinguishing objects from arrays", + ["type.json:4:6"] = "null is weird", + ["type.json:9:4"] = "null is weird", + ["type.json:9:6"] = "null is weird", + ["unevaluatedItems.json"] = "NYI", + ["unevaluatedProperties.json"] = "NYI", + ["uniqueItems.json:0:11"] = "deepcompare", + ["uniqueItems.json:0:13"] = "deepcompare", + ["uniqueItems.json:0:14"] = "deepcompare", + ["uniqueItems.json:0:22"] = "deepcompare", + ["uniqueItems.json:0:24"] = "deepcompare", + ["uniqueItems.json:0:9"] = "deepcompare", + ["unknownKeyword.json"] = "NYI", + ["vocabulary.json"] = "NYI", +}; + +local function label(s, i) + return string.format("%s:%d", s, i-1); +end + +describe("util.jsonschema.validate", function() + for test_case_file in lfs.dir(test_suite_dir) do + -- print(skip[test_case_file] and "do " or "skip", test_case_file) + if test_case_file:sub(-5) == ".json" and not skip[test_case_file] then + describe(test_case_file, function() + local test_cases; + setup(function() + local f = assert(io.open(test_suite_dir .. "/" .. test_case_file)); + local rawdata = assert(f:read("*a"), "failed to read " .. test_case_file) + test_cases = assert(json.decode(rawdata), "failed to parse " .. test_case_file) + end) + describe("tests", function() + for i, schema_test in ipairs(test_cases) do + local generic_label = label(test_case_file, i); + describe(schema_test.description or generic_label, function() + for j, test in ipairs(schema_test.tests) do + local specific_label = label(generic_label, j); + ((skip[generic_label] or skip[specific_label]) and pending or it)(test.description, function() + assert.equal(test.valid, js.validate(schema_test.schema, test.data), specific_label .. " " .. test.description); + end) + end + end) + end + end) + end) + end + end +end); diff --git a/teal-src/util/jsonschema.tl b/teal-src/util/jsonschema.tl index 5c5fd4ce..160c164c 100644 --- a/teal-src/util/jsonschema.tl +++ b/teal-src/util/jsonschema.tl @@ -16,13 +16,13 @@ local pointer = require "util.jsonpointer" local type json_type_name = json.json_type_name -- json_type_name here is non-standard -local type schema_t = boolean | json_type_name | json_schema_object +local type schema_t = boolean | json_schema_object local record json_schema_object type json_type_name = json.json_type_name type schema_object = json_schema_object - type : json_type_name + type : json_type_name | { json_type_name } enum : { any } const : any @@ -47,7 +47,7 @@ local record json_schema_object -- strings maxLength : integer minLength : integer - pattern : string + pattern : string -- NYI format : string -- arrays @@ -57,17 +57,17 @@ local record json_schema_object maxItems : integer minItems : integer uniqueItems : boolean - maxContains : integer - minContains : integer + maxContains : integer -- NYI + minContains : integer -- NYI -- objects properties : { string : schema_t } - maxProperties : integer - minProperties : integer + maxProperties : integer -- NYI + minProperties : integer -- NYI required : { string } dependentRequired : { string : { string } } additionalProperties: schema_t - patternProperties: schema_t + patternProperties: schema_t -- NYI propertyNames : schema_t -- xml @@ -99,10 +99,10 @@ end -- TODO validator function per schema property -local type_validators : { json_type_name : function (schema_t, any, json_schema_object) : boolean } = {} - -local function simple_validate(schema : json_type_name, data : any) : boolean - if schema == "object" and data is table then +local function simple_validate(schema : json_type_name | { json_type_name }, data : any) : boolean + if schema == nil then + return true + elseif schema == "object" and data is table then return type(data) == "table" and (next(data)==nil or type((next(data, nil))) == "string") elseif schema == "array" and data is table then return type(data) == "table" and (next(data)==nil or type((next(data, nil))) == "number") @@ -110,144 +110,171 @@ local function simple_validate(schema : json_type_name, data : any) : boolean return math.type(data) == schema elseif schema == "null" then return data == null + elseif schema is { json_type_name } then + for _, one in ipairs(schema as { json_type_name }) do + if simple_validate(one, data) then + return true + end + end + return false else return type(data) == schema end end -type_validators.string = function (schema : json_schema_object, data : any) : boolean - -- XXX this is measured in byte, while JSON measures in ... bork - -- TODO use utf8.len? - if data is string then - if schema.maxLength and #data > schema.maxLength then - return false - end - if schema.minLength and #data < schema.minLength then - return false - end - return true +local complex_validate : function ( json_schema_object, any, json_schema_object ) : boolean + +local function validate (schema : schema_t, data : any, root : json_schema_object) : boolean + if schema is boolean then + return schema + else + return complex_validate(schema, data, root) end - return false end -type_validators.number = function (schema : json_schema_object, data : number) : boolean - if schema.multipleOf and data % schema.multipleOf ~= 0 then - return false - end +function complex_validate (schema : json_schema_object, data : any, root : json_schema_object) : boolean - if schema.maximum and not ( data <= schema.maximum ) then - return false + if root == nil then + root = schema end - if schema.exclusiveMaximum and not ( data < schema.exclusiveMaximum ) then - return false + if schema["$ref"] and schema["$ref"]:sub(1,1) == "#" then + local referenced = pointer.resolve(root as table, schema["$ref"]:sub(2)) as schema_t + if referenced ~= nil and referenced ~= root and referenced ~= schema then + if not validate(referenced, data, root) then + return false; + end + end end - if schema.minimum and not ( data >= schema.minimum ) then - return false + if not simple_validate(schema.type, data) then + return false; end - if schema.exclusiveMinimum and not ( data > schema.exclusiveMinimum ) then - return false + if schema.type == "object" then + if data is table then + -- just check that there the keys are all strings + for k in pairs(data) do + if not k is string then + return false + end + end + end end - return true -end - -type_validators.integer = type_validators.number + if schema.type == "array" then + if data is table then + -- just check that there the keys are all numbers + for i in pairs(data) do + if not i is integer then + return false + end + end + end + end -local function validate(schema : schema_t, data : any, root : json_schema_object) : boolean - if schema is boolean then - return schema + if schema["enum"] ~= nil then + local match = false + for _, v in ipairs(schema["enum"]) do + if v == data then + -- FIXME supposed to do deep-compare + match = true + break + end + end + if not match then + return false + end end - if schema is json_type_name then - return simple_validate(schema, data) + + -- XXX this is measured in byte, while JSON measures in ... bork + -- TODO use utf8.len? + if data is string then + if schema.maxLength and #data > schema.maxLength then + return false + end + if schema.minLength and #data < schema.minLength then + return false + end end - if schema is json_schema_object then - if root == nil then - root = schema + + if data is number then + if schema.multipleOf and (data == 0 or data % schema.multipleOf ~= 0) then + return false end - if schema["$ref"] and schema["$ref"]:sub(1,1) == "#" then - local referenced = pointer.resolve(root as table, schema["$ref"]:sub(2)) as schema_t - if referenced ~= nil then - return validate(referenced, data, root); - end + + if schema.maximum and not ( data <= schema.maximum ) then + return false end - if schema.allOf then - for _, sub in ipairs(schema.allOf) do - if not validate(sub, data, root) then - return false - end - end - return true + if schema.exclusiveMaximum and not ( data < schema.exclusiveMaximum ) then + return false end - if schema.oneOf then - local valid = 0 - for _, sub in ipairs(schema.oneOf) do - if validate(sub, data, root) then - valid = valid + 1 - end - end - return valid == 1 + if schema.minimum and not ( data >= schema.minimum ) then + return false end - if schema.anyOf then - for _, sub in ipairs(schema.anyOf) do - if validate(sub, data, root) then - return true - end - end + if schema.exclusiveMinimum and not ( data > schema.exclusiveMinimum ) then return false end + end - if schema["not"] then - if validate(schema["not"], data, root) then + if schema.allOf then + for _, sub in ipairs(schema.allOf) do + if not validate(sub, data, root) then return false end end + end - if schema["if"] then - if validate(schema["if"], data, root) then - if schema["then"] then - return validate(schema["then"], data, root) - end - else - if schema["else"] then - return validate(schema["else"], data, root) - end + if schema.oneOf then + local valid = 0 + for _, sub in ipairs(schema.oneOf) do + if validate(sub, data, root) then + valid = valid + 1 end end - - if schema.const ~= nil and schema.const ~= data then + if valid ~= 1 then return false end + end - if schema["enum"] ~= nil then - for _, v in ipairs(schema["enum"]) do - if v == data then - return true - end + if schema.anyOf then + local match = false + for _, sub in ipairs(schema.anyOf) do + if validate(sub, data, root) then + match = true + break end + end + if not match then return false end + end - if schema.type then - if not simple_validate(schema.type, data) then - return false - end + if schema["not"] then + if validate(schema["not"], data, root) then + return false + end + end - local validator = type_validators[schema.type] - if validator then - return validator(schema, data, root) + if schema["if"] ~= nil then + if validate(schema["if"], data, root) then + if schema["then"] then + return validate(schema["then"], data, root) + end + else + if schema["else"] then + return validate(schema["else"], data, root) end end - return true end -end -type_validators.table = function (schema : json_schema_object, data : any, root : json_schema_object) : boolean + if schema.const ~= nil and schema.const ~= data then + return false + end + if data is table then if schema.maxItems and #data > schema.maxItems then @@ -266,24 +293,28 @@ type_validators.table = function (schema : json_schema_object, data : any, root end end - if schema.properties then - local additional : schema_t = schema.additionalProperties or true - for k, v in pairs(data) do - if schema.propertyNames and not validate(schema.propertyNames, k, root) then + if schema.propertyNames ~= nil then + for k in pairs(data) do + if not validate(schema.propertyNames, k, root) then return false end - local s = schema.properties[k as string] or additional - if not validate(s, v, root) then + end + end + + if schema.properties then + for k, sub in pairs(schema.properties) do + if data[k] ~= nil and not validate(sub, data[k], root) then return false end end - elseif schema.additionalProperties then + end + + if schema.additionalProperties ~= nil then for k, v in pairs(data) do - if schema.propertyNames and not validate(schema.propertyNames, k, root) then - return false - end - if not validate(schema.additionalProperties, v, root) then - return false + if schema.properties == nil or schema.properties[k as string] == nil then + if not validate(schema.additionalProperties, v, root) then + return false + end end end end @@ -300,9 +331,11 @@ type_validators.table = function (schema : json_schema_object, data : any, root end local p = 0 - if schema.prefixItems then + if schema.prefixItems ~= nil then for i, s in ipairs(schema.prefixItems) do - if validate(s, data[i], root) then + if data[i] == nil then + break + elseif validate(s, data[i], root) then p = i else return false @@ -310,7 +343,7 @@ type_validators.table = function (schema : json_schema_object, data : any, root end end - if schema.items then + if schema.items ~= nil then for i = p+1, #data do if not validate(schema.items, data[i], root) then return false @@ -318,7 +351,7 @@ type_validators.table = function (schema : json_schema_object, data : any, root end end - if schema.contains then + if schema.contains ~= nil then local found = false for i = 1, #data do if validate(schema.contains, data[i], root) then @@ -330,39 +363,11 @@ type_validators.table = function (schema : json_schema_object, data : any, root return false end end - - return true end - return false -end -type_validators.object = function (schema : schema_t, data : any, root : json_schema_object) : boolean - if data is table then - for k in pairs(data) do - if not k is string then - return false - end - end - - return type_validators.table(schema, data, root) - end - return false + return true; end -type_validators.array = function (schema : schema_t, data : any, root : json_schema_object) : boolean - if data is table then - - -- just check that there the keys are all numbers - for i in pairs(data) do - if not i is number then - return false - end - end - - return type_validators.table(schema, data, root) - end - return false -end json_schema_object.validate = validate; diff --git a/util/jsonschema.lua b/util/jsonschema.lua index 8286fa19..48a3351f 100644 --- a/util/jsonschema.lua +++ b/util/jsonschema.lua @@ -1,3 +1,5 @@ +-- This file is generated from teal-src/util/jsonschema.lua + local m_type = math.type or function (n) return n % 1 == 0 and n <= 9007199254740992 and n >= -9007199254740992 and "integer" or "float"; end; @@ -10,12 +12,12 @@ local json_type_name = json.json_type_name local schema_t = {} -local json_schema_object = {xml_t = {}} - -local type_validators = {} +local json_schema_object = { xml_t = {} } local function simple_validate(schema, data) - if schema == "object" and type(data) == "table" then + if schema == nil then + return true + elseif schema == "object" and type(data) == "table" then return type(data) == "table" and (next(data) == nil or type((next(data, nil))) == "string") elseif schema == "array" and type(data) == "table" then return type(data) == "table" and (next(data) == nil or type((next(data, nil))) == "number") @@ -23,143 +25,169 @@ local function simple_validate(schema, data) return m_type(data) == schema elseif schema == "null" then return data == null + elseif type(schema) == "table" then + for _, one in ipairs(schema) do + if simple_validate(one, data) then + return true + end + end + return false else return type(data) == schema end end -type_validators.string = function(schema, data) +local complex_validate - if type(data) == "string" then - if schema.maxLength and #data > schema.maxLength then - return false - end - if schema.minLength and #data < schema.minLength then - return false - end - return true +local function validate(schema, data, root) + if type(schema) == "boolean" then + return schema + else + return complex_validate(schema, data, root) end - return false end -type_validators.number = function(schema, data) - if schema.multipleOf and data % schema.multipleOf ~= 0 then - return false - end +function complex_validate(schema, data, root) - if schema.maximum and not (data <= schema.maximum) then - return false + if root == nil then + root = schema end - if schema.exclusiveMaximum and not (data < schema.exclusiveMaximum) then - return false + if schema["$ref"] and schema["$ref"]:sub(1, 1) == "#" then + local referenced = pointer.resolve(root, schema["$ref"]:sub(2)) + if referenced ~= nil and referenced ~= root and referenced ~= schema then + if not validate(referenced, data, root) then + return false + end + end end - if schema.minimum and not (data >= schema.minimum) then + if not simple_validate(schema.type, data) then return false end - if schema.exclusiveMinimum and not (data > schema.exclusiveMinimum) then - return false + if schema.type == "object" then + if type(data) == "table" then + + for k in pairs(data) do + if not (type(k) == "string") then + return false + end + end + end end - return true -end + if schema.type == "array" then + if type(data) == "table" then -type_validators.integer = type_validators.number + for i in pairs(data) do + if not (math.type(i) == "integer") then + return false + end + end + end + end -local function validate(schema, data, root) - if type(schema) == "boolean" then - return schema + if schema["enum"] ~= nil then + local match = false + for _, v in ipairs(schema["enum"]) do + if v == data then + + match = true + break + end + end + if not match then + return false + end end - if type(schema) == "string" then - return simple_validate(schema, data) + + if type(data) == "string" then + if schema.maxLength and #data > schema.maxLength then + return false + end + if schema.minLength and #data < schema.minLength then + return false + end end - if type(schema) == "table" then - if root == nil then - root = schema + + if type(data) == "number" then + if schema.multipleOf and (data == 0 or data % schema.multipleOf ~= 0) then + return false end - if schema["$ref"] and schema["$ref"]:sub(1, 1) == "#" then - local referenced = pointer.resolve(root, schema["$ref"]:sub(2)) - if referenced ~= nil then - return validate(referenced, data, root) - end + + if schema.maximum and not (data <= schema.maximum) then + return false end - if schema.allOf then - for _, sub in ipairs(schema.allOf) do - if not validate(sub, data, root) then - return false - end - end - return true + if schema.exclusiveMaximum and not (data < schema.exclusiveMaximum) then + return false end - if schema.oneOf then - local valid = 0 - for _, sub in ipairs(schema.oneOf) do - if validate(sub, data, root) then - valid = valid + 1 - end - end - return valid == 1 + if schema.minimum and not (data >= schema.minimum) then + return false end - if schema.anyOf then - for _, sub in ipairs(schema.anyOf) do - if validate(sub, data, root) then - return true - end - end + if schema.exclusiveMinimum and not (data > schema.exclusiveMinimum) then return false end + end - if schema["not"] then - if validate(schema["not"], data, root) then + if schema.allOf then + for _, sub in ipairs(schema.allOf) do + if not validate(sub, data, root) then return false end end + end - if schema["if"] then - if validate(schema["if"], data, root) then - if schema["then"] then - return validate(schema["then"], data, root) - end - else - if schema["else"] then - return validate(schema["else"], data, root) - end + if schema.oneOf then + local valid = 0 + for _, sub in ipairs(schema.oneOf) do + if validate(sub, data, root) then + valid = valid + 1 end end - - if schema.const ~= nil and schema.const ~= data then + if valid ~= 1 then return false end + end - if schema["enum"] ~= nil then - for _, v in ipairs(schema["enum"]) do - if v == data then - return true - end + if schema.anyOf then + local match = false + for _, sub in ipairs(schema.anyOf) do + if validate(sub, data, root) then + match = true + break end + end + if not match then return false end + end - if schema.type then - if not simple_validate(schema.type, data) then - return false - end + if schema["not"] then + if validate(schema["not"], data, root) then + return false + end + end - local validator = type_validators[schema.type] - if validator then - return validator(schema, data, root) + if schema["if"] ~= nil then + if validate(schema["if"], data, root) then + if schema["then"] then + return validate(schema["then"], data, root) + end + else + if schema["else"] then + return validate(schema["else"], data, root) end end - return true end -end -type_validators.table = function(schema, data, root) + if schema.const ~= nil and schema.const ~= data then + return false + end + if type(data) == "table" then if schema.maxItems and #data > schema.maxItems then @@ -178,24 +206,28 @@ type_validators.table = function(schema, data, root) end end - if schema.properties then - local additional = schema.additionalProperties or true - for k, v in pairs(data) do - if schema.propertyNames and not validate(schema.propertyNames, k, root) then + if schema.propertyNames ~= nil then + for k in pairs(data) do + if not validate(schema.propertyNames, k, root) then return false end - local s = schema.properties[k] or additional - if not validate(s, v, root) then + end + end + + if schema.properties then + for k, sub in pairs(schema.properties) do + if data[k] ~= nil and not validate(sub, data[k], root) then return false end end - elseif schema.additionalProperties then + end + + if schema.additionalProperties ~= nil then for k, v in pairs(data) do - if schema.propertyNames and not validate(schema.propertyNames, k, root) then - return false - end - if not validate(schema.additionalProperties, v, root) then - return false + if schema.properties == nil or schema.properties[k] == nil then + if not validate(schema.additionalProperties, v, root) then + return false + end end end end @@ -212,9 +244,11 @@ type_validators.table = function(schema, data, root) end local p = 0 - if schema.prefixItems then + if schema.prefixItems ~= nil then for i, s in ipairs(schema.prefixItems) do - if validate(s, data[i], root) then + if data[i] == nil then + break + elseif validate(s, data[i], root) then p = i else return false @@ -222,7 +256,7 @@ type_validators.table = function(schema, data, root) end end - if schema.items then + if schema.items ~= nil then for i = p + 1, #data do if not validate(schema.items, data[i], root) then return false @@ -230,7 +264,7 @@ type_validators.table = function(schema, data, root) end end - if schema.contains then + if schema.contains ~= nil then local found = false for i = 1, #data do if validate(schema.contains, data[i], root) then @@ -242,37 +276,9 @@ type_validators.table = function(schema, data, root) return false end end - - return true end - return false -end -type_validators.object = function(schema, data, root) - if type(data) == "table" then - for k in pairs(data) do - if not (type(k) == "string") then - return false - end - end - - return type_validators.table(schema, data, root) - end - return false -end - -type_validators.array = function(schema, data, root) - if type(data) == "table" then - - for i in pairs(data) do - if not (type(i) == "number") then - return false - end - end - - return type_validators.table(schema, data, root) - end - return false + return true end json_schema_object.validate = validate; -- cgit v1.2.3