return _add_rule(name, "\"\\\"\" " + to_rule(transform()) + " \"\\\"\" space");
}
+ /*
+ Returns a rule that matches a JSON string that is none of the provided strings
+
+ not_strings({"a"})
+ -> ["] ( [a] char+ | [^"a] char* )? ["] space
+ not_strings({"and", "also"})
+ -> ["] ( [a] ([l] ([s] ([o] char+ | [^"o] char*) | [^"s] char*) | [n] ([d] char+ | [^"d] char*) | [^"ln] char*) | [^"a] char* )? ["] space
+ */
+ std::string _not_strings(const std::vector<std::string> & strings) {
+
+ struct TrieNode {
+ std::map<char, TrieNode> children;
+ bool is_end_of_string;
+
+ TrieNode() : is_end_of_string(false) {}
+
+ void insert(const std::string & string) {
+ auto node = this;
+ for (char c : string) {
+ node = &node->children[c];
+ }
+ node->is_end_of_string = true;
+ }
+ };
+
+ TrieNode trie;
+ for (const auto & s : strings) {
+ trie.insert(s);
+ }
+
+ std::string char_rule = _add_primitive("char", PRIMITIVE_RULES.at("char"));
+ std::ostringstream out;
+ out << "[\"] ( ";
+ std::function<void(const TrieNode &)> visit = [&](const TrieNode & node) {
+ std::ostringstream rejects;
+ auto first = true;
+ for (const auto & kv : node.children) {
+ rejects << kv.first;
+ if (first) {
+ first = false;
+ } else {
+ out << " | ";
+ }
+ out << "[" << kv.first << "]";
+ if (!kv.second.children.empty()) {
+ out << " (";
+ visit(kv.second);
+ out << ")";
+ } else if (kv.second.is_end_of_string) {
+ out << " " << char_rule << "+";
+ }
+ }
+ if (!node.children.empty()) {
+ if (!first) {
+ out << " | ";
+ }
+ out << "[^\"" << rejects.str() << "] " << char_rule << "*";
+ }
+ };
+ visit(trie);
+
+ out << " )";
+ if (!trie.is_end_of_string) {
+ out << "?";
+ }
+ out << " [\"] space";
+ return out.str();
+ }
+
std::string _resolve_ref(const std::string & ref) {
std::string ref_name = ref.substr(ref.find_last_of('/') + 1);
if (_rules.find(ref_name) == _rules.end() && _refs_being_resolved.find(ref) == _refs_being_resolved.end()) {
std::vector<std::string> required_props;
std::vector<std::string> optional_props;
std::unordered_map<std::string, std::string> prop_kv_rule_names;
+ std::vector<std::string> prop_names;
for (const auto & kv : properties) {
const auto &prop_name = kv.first;
const auto &prop_schema = kv.second;
} else {
optional_props.push_back(prop_name);
}
+ prop_names.push_back(prop_name);
}
- if (additional_properties.is_object() || (additional_properties.is_boolean() && additional_properties.get<bool>())) {
+ if (!(additional_properties.is_boolean() && !additional_properties.get<bool>())) {
std::string sub_name = name + (name.empty() ? "" : "-") + "additional";
- std::string value_rule = visit(additional_properties.is_object() ? additional_properties : json::object(), sub_name + "-value");
- std::string kv_rule = _add_rule(sub_name + "-kv", _add_primitive("string", PRIMITIVE_RULES.at("string")) + " \":\" space " + value_rule);
+ std::string value_rule =
+ additional_properties.is_object() ? visit(additional_properties, sub_name + "-value")
+ : _add_primitive("value", PRIMITIVE_RULES.at("value"));
+
+ auto key_rule =
+ prop_names.empty() ? _add_primitive("string", PRIMITIVE_RULES.at("string"))
+ : _add_rule(sub_name + "-k", _not_strings(prop_names));
+ std::string kv_rule = _add_rule(sub_name + "-kv", key_rule + " \":\" space " + value_rule);
prop_kv_rule_names["*"] = kv_rule;
optional_props.push_back("*");
}
}
std::string k = ks[0];
std::string kv_rule_name = prop_kv_rule_names[k];
- if (k == "*") {
- res = _add_rule(
- name + (name.empty() ? "" : "-") + "additional-kvs",
- kv_rule_name + " ( \",\" space " + kv_rule_name + " )*"
- );
- } else if (first_is_optional) {
- res = "( \",\" space " + kv_rule_name + " )?";
+ std::string comma_ref = "( \",\" space " + kv_rule_name + " )";
+ if (first_is_optional) {
+ res = comma_ref + (k == "*" ? "*" : "?");
} else {
- res = kv_rule_name;
+ res = kv_rule_name + (k == "*" ? " " + comma_ref + "*" : "");
}
if (ks.size() > 1) {
res += " " + _add_rule(
}
return _add_rule(rule_name, _generate_union_rule(name, schema_types));
} else if (schema.contains("const")) {
- return _add_rule(rule_name, _generate_constant_rule(schema["const"]));
+ return _add_rule(rule_name, _generate_constant_rule(schema["const"]) + " space");
} else if (schema.contains("enum")) {
std::vector<std::string> enum_values;
for (const auto & v : schema["enum"]) {
enum_values.push_back(_generate_constant_rule(v));
}
- return _add_rule(rule_name, join(enum_values.begin(), enum_values.end(), " | "));
+ return _add_rule(rule_name, "(" + join(enum_values.begin(), enum_values.end(), " | ") + ") space");
} else if ((schema_type.is_null() || schema_type == "object")
&& (schema.contains("properties") ||
(schema.contains("additionalProperties") && schema["additionalProperties"] != true))) {
#! pip install pydantic
#! python json-schema-pydantic-example.py
-from pydantic import BaseModel, TypeAdapter
+from pydantic import BaseModel, Extra, TypeAdapter
from annotated_types import MinLen
from typing import Annotated, List, Optional
import json, requests
if __name__ == '__main__':
class QAPair(BaseModel):
+ class Config:
+ extra = 'forbid' # triggers additionalProperties: false in the JSON schema
question: str
concise_answer: str
justification: str
stars: Annotated[int, Field(ge=1, le=5)]
class PyramidalSummary(BaseModel):
+ class Config:
+ extra = 'forbid' # triggers additionalProperties: false in the JSON schema
title: str
summary: str
question_answers: Annotated[List[QAPair], MinLen(2)]
import json
import re
import sys
-from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
-
+from typing import Any, List, Optional, Set, Tuple, Union
def _build_repetition(item_rule, min_items, max_items, separator_rule=None):
return ''.join(('(', *recurse(0), ')'))
+ def _not_strings(self, strings):
+ class TrieNode:
+ def __init__(self):
+ self.children = {}
+ self.is_end_of_string = False
+
+ def insert(self, string):
+ node = self
+ for c in string:
+ node = node.children.setdefault(c, TrieNode())
+ node.is_end_of_string = True
+
+ trie = TrieNode()
+ for s in strings:
+ trie.insert(s)
+
+ char_rule = self._add_primitive('char', PRIMITIVE_RULES['char'])
+ out = ['["] ( ']
+
+ def visit(node):
+ rejects = []
+ first = True
+ for c in sorted(node.children.keys()):
+ child = node.children[c]
+ rejects.append(c)
+ if first:
+ first = False
+ else:
+ out.append(' | ')
+ out.append(f'[{c}]')
+ if child.children:
+ out.append(f' (')
+ visit(child)
+ out.append(')')
+ elif child.is_end_of_string:
+ out.append(f' {char_rule}+')
+ if node.children:
+ if not first:
+ out.append(' | ')
+ out.append(f'[^"{"".join(rejects)}] {char_rule}*')
+ visit(trie)
+
+ out.append(f' ){"" if trie.is_end_of_string else "?"} ["] space')
+ return ''.join(out)
+
def _add_rule(self, name, rule):
esc_name = INVALID_RULE_CHARS_RE.sub('-', name)
if esc_name not in self._rules or self._rules[esc_name] == rule:
return self._add_rule(rule_name, self._generate_union_rule(name, [{'type': t} for t in schema_type]))
elif 'const' in schema:
- return self._add_rule(rule_name, self._generate_constant_rule(schema['const']))
+ return self._add_rule(rule_name, self._generate_constant_rule(schema['const']) + ' space')
elif 'enum' in schema:
- rule = ' | '.join((self._generate_constant_rule(v) for v in schema['enum']))
+ rule = '(' + ' | '.join((self._generate_constant_rule(v) for v in schema['enum'])) + ') space'
return self._add_rule(rule_name, rule)
elif schema_type in (None, 'object') and \
self._add_primitive(dep, dep_rule)
return n
- def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Union[bool, Any]):
+ def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Optional[Union[bool, Any]]):
prop_order = self._prop_order
# sort by position in prop_order (if specified) then by original order
sorted_props = [kv[0] for _, kv in sorted(enumerate(properties), key=lambda ikv: (prop_order.get(ikv[1][0], len(prop_order)), ikv[0]))]
required_props = [k for k in sorted_props if k in required]
optional_props = [k for k in sorted_props if k not in required]
- if additional_properties == True or isinstance(additional_properties, dict):
+ if additional_properties != False:
sub_name = f'{name}{"-" if name else ""}additional'
- value_rule = self.visit({} if additional_properties == True else additional_properties, f'{sub_name}-value')
+ value_rule = self.visit(additional_properties, f'{sub_name}-value') if isinstance(additional_properties, dict) else \
+ self._add_primitive('value', PRIMITIVE_RULES['value'])
+ key_rule = self._add_primitive('string', PRIMITIVE_RULES['string']) if not sorted_props \
+ else self._add_rule(f'{sub_name}-k', self._not_strings(sorted_props))
+
prop_kv_rule_names["*"] = self._add_rule(
f'{sub_name}-kv',
- self._add_primitive('string', PRIMITIVE_RULES['string']) + f' ":" space {value_rule}'
+ f'{key_rule} ":" space {value_rule}'
)
optional_props.append("*")
def get_recursive_refs(ks, first_is_optional):
[k, *rest] = ks
kv_rule_name = prop_kv_rule_names[k]
- if k == '*':
- res = self._add_rule(
- f'{name}{"-" if name else ""}additional-kvs',
- f'{kv_rule_name} ( "," space ' + kv_rule_name + ' )*'
- )
- elif first_is_optional:
- res = f'( "," space {kv_rule_name} )?'
+ comma_ref = f'( "," space {kv_rule_name} )'
+ if first_is_optional:
+ res = comma_ref + ('*' if k == '*' else '?')
else:
- res = kv_rule_name
+ res = kv_rule_name + (' ' + comma_ref + "*" if k == '*' else '')
if len(rest) > 0:
res += ' ' + self._add_rule(
f'{name}{"-" if name else ""}{k}-rest',
return this._addRule(name, "\"\\\"\" " + toRule(transform()) + " \"\\\"\" space")
}
+ _notStrings(strings) {
+ class TrieNode {
+ constructor() {
+ this.children = {};
+ this.isEndOfString = false;
+ }
+
+ insert(str) {
+ let node = this;
+ for (const c of str) {
+ node = node.children[c] = node.children[c] || new TrieNode();
+ }
+ node.isEndOfString = true;
+ }
+ }
+
+ const trie = new TrieNode();
+ for (const s of strings) {
+ trie.insert(s);
+ }
+
+ const charRuleName = this._addPrimitive('char', PRIMITIVE_RULES['char']);
+ const out = ['["] ( '];
+
+ const visit = (node) => {
+ const rejects = [];
+ let first = true;
+ for (const c of Object.keys(node.children).sort()) {
+ const child = node.children[c];
+ rejects.push(c);
+ if (first) {
+ first = false;
+ } else {
+ out.push(' | ');
+ }
+ out.push(`[${c}]`);
+ if (Object.keys(child.children).length > 0) {
+ out.push(' (');
+ visit(child);
+ out.push(')');
+ } else if (child.isEndOfString) {
+ out.push(` ${charRuleName}+`);
+ }
+ }
+ if (Object.keys(node.children).length > 0) {
+ if (!first) {
+ out.push(' | ');
+ }
+ out.push(`[^"${rejects.join('')}] ${charRuleName}*`);
+ }
+ };
+
+ visit(trie);
+
+ out.push(` )${trie.isEndOfString ? '' : '?'} ["] space`);
+ return out.join('');
+ }
+
_resolveRef(ref) {
let refName = ref.split('/').pop();
if (!(refName in this._rules) && !this._refsBeingResolved.has(ref)) {
} else if (Array.isArray(schemaType)) {
return this._addRule(ruleName, this._generateUnionRule(name, schemaType.map(t => ({ type: t }))));
} else if ('const' in schema) {
- return this._addRule(ruleName, this._generateConstantRule(schema.const));
+ return this._addRule(ruleName, this._generateConstantRule(schema.const) + ' space');
} else if ('enum' in schema) {
- const rule = schema.enum.map(v => this._generateConstantRule(v)).join(' | ');
+ const rule = '(' + schema.enum.map(v => this._generateConstantRule(v)).join(' | ') + ') space';
return this._addRule(ruleName, rule);
} else if ((schemaType === undefined || schemaType === 'object') &&
('properties' in schema ||
}
}
- return this._addRule(ruleName, this._buildObjectRule(properties, required, name, /* additionalProperties= */ false));
+ return this._addRule(ruleName, this._buildObjectRule(properties, required, name, null));
} else if ((schemaType === undefined || schemaType === 'array') && ('items' in schema || 'prefixItems' in schema)) {
const items = schema.items ?? schema.prefixItems;
if (Array.isArray(items)) {
const requiredProps = sortedProps.filter(k => required.has(k));
const optionalProps = sortedProps.filter(k => !required.has(k));
- if (typeof additionalProperties === 'object' || additionalProperties === true) {
+ if (additionalProperties !== false) {
const subName = `${name ?? ''}${name ? '-' : ''}additional`;
- const valueRule = this.visit(additionalProperties === true ? {} : additionalProperties, `${subName}-value`);
+ const valueRule =
+ additionalProperties != null && typeof additionalProperties === 'object' ? this.visit(additionalProperties, `${subName}-value`)
+ : this._addPrimitive('value', PRIMITIVE_RULES['value']);
+
+ const key_rule =
+ sortedProps.length === 0 ? this._addPrimitive('string', PRIMITIVE_RULES['string'])
+ : this._addRule(`${subName}-k`, this._notStrings(sortedProps));
+
propKvRuleNames['*'] = this._addRule(
`${subName}-kv`,
- `${this._addPrimitive('string', PRIMITIVE_RULES['string'])} ":" space ${valueRule}`);
+ `${key_rule} ":" space ${valueRule}`);
optionalProps.push('*');
}
const [k, ...rest] = ks;
const kvRuleName = propKvRuleNames[k];
let res;
- if (k === '*') {
- res = this._addRule(
- `${name ?? ''}${name ? '-' : ''}additional-kvs`,
- `${kvRuleName} ( "," space ` + kvRuleName + ` )*`
- )
- } else if (firstIsOptional) {
- res = `( "," space ${kvRuleName} )?`;
+ const commaRef = `( "," space ${kvRuleName} )`;
+ if (firstIsOptional) {
+ res = commaRef + (k === '*' ? '*' : '?');
} else {
- res = kvRuleName;
+ res = kvRuleName + (k === '*' ? ' ' + commaRef + '*' : '');
}
if (rest.length > 0) {
res += ' ' + this._addRule(
Examples: Prompts
| response_format | n_predicted | re_content |
- | {"type": "json_object", "schema": {"const": "42"}} | 5 | "42" |
+ | {"type": "json_object", "schema": {"const": "42"}} | 6 | "42" |
| {"type": "json_object", "schema": {"items": [{"type": "integer"}]}} | 10 | \[ -300 \] |
| {"type": "json_object"} | 10 | \{ " Jacky. |
using json = nlohmann::ordered_json;
-//#define INCLUDE_FAILING_TESTS 1
-
static llama_grammar* build_grammar(const std::string & grammar_str) {
auto parsed_grammar = grammar_parser::parse(grammar_str.c_str());
)""",
// Passing strings
{
- "{}",
+ R"""({})""",
R"""({"foo": "bar"})""",
},
// Failing strings
"",
"[]",
"null",
- "\"\"",
+ R"""("")""",
"true",
}
);
test_schema(
"exotic formats (list)",
// Schema
- R"""(
- {
+ R"""({
"items": [
{ "format": "date" },
{ "format": "uuid" },
{ "format": "time" },
{ "format": "date-time" }
]
- }
- )""",
+ })""",
// Passing strings
{
// "{}", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
test_schema(
"string",
// Schema
- R"""(
- {
- "type": "string"
- }
- )""",
+ R"""({
+ "type": "string"
+ })""",
// Passing strings
{
- "\"foo\"",
- "\"bar\"",
- "\"\"",
+ R"""("foo")""",
+ R"""("bar")""",
+ R"""("")""",
},
// Failing strings
{
- "{}",
- "\"foo\": \"bar\"",
+ R"""({})""",
+ R"""("foo": "bar")""",
}
);
test_schema(
"string w/ min length 1",
// Schema
- R"""(
- {
- "type": "string",
- "minLength": 1
- }
- )""",
+ R"""({
+ "type": "string",
+ "minLength": 1
+ })""",
// Passing strings
{
- "\"foo\"",
- "\"bar\"",
+ R"""("foo")""",
+ R"""("bar")""",
},
// Failing strings
{
- "\"\"",
- "{}",
- "\"foo\": \"bar\"",
+ R"""("")""",
+ R"""({})""",
+ R"""("foo": "bar")""",
}
);
test_schema(
"string w/ min length 3",
// Schema
- R"""(
- {
+ R"""({
"type": "string",
"minLength": 3
- }
- )""",
+ })""",
// Passing strings
{
- "\"foo\"",
- "\"bar\"",
- "\"foobar\"",
+ R"""("foo")""",
+ R"""("bar")""",
+ R"""("foobar")""",
},
// Failing strings
{
- "\"\"",
- "\"f\"",
- "\"fo\"",
+ R"""("")""",
+ R"""("f")""",
+ R"""("fo")""",
}
);
test_schema(
"string w/ max length",
// Schema
- R"""(
- {
- "type": "string",
- "maxLength": 3
- }
- )""",
+ R"""({
+ "type": "string",
+ "maxLength": 3
+ })""",
// Passing strings
{
- "\"foo\"",
- "\"bar\"",
- "\"\"",
- "\"f\"",
- "\"fo\"",
+ R"""("foo")""",
+ R"""("bar")""",
+ R"""("")""",
+ R"""("f")""",
+ R"""("fo")""",
},
// Failing strings
{
- "\"foobar\"",
+ R"""("foobar")""",
}
);
test_schema(
"string w/ min & max length",
// Schema
- R"""(
- {
- "type": "string",
- "minLength": 1,
- "maxLength": 4
- }
- )""",
+ R"""({
+ "type": "string",
+ "minLength": 1,
+ "maxLength": 4
+ })""",
// Passing strings
{
- "\"foo\"",
- "\"bar\"",
- "\"f\"",
- "\"barf\"",
+ R"""("foo")""",
+ R"""("bar")""",
+ R"""("f")""",
+ R"""("barf")""",
},
// Failing strings
{
- "\"\"",
- "\"barfo\"",
- "\"foobar\"",
+ R"""("")""",
+ R"""("barfo")""",
+ R"""("foobar")""",
}
);
test_schema(
"boolean",
// Schema
- R"""(
- {
- "type": "boolean"
- }
- )""",
+ R"""({
+ "type": "boolean"
+ })""",
// Passing strings
{
"true",
},
// Failing strings
{
- "\"\"",
- "\"true\"",
- "True",
- "FALSE",
+ R"""("")""",
+ R"""("true")""",
+ R"""(True)""",
+ R"""(FALSE)""",
}
);
test_schema(
"integer",
// Schema
- R"""(
- {
- "type": "integer"
- }
- )""",
+ R"""({
+ "type": "integer"
+ })""",
// Passing strings
{
- "0",
- "12345",
- "1234567890123456"
+ R"""(0)""",
+ R"""(12345)""",
+ R"""(1234567890123456)""",
},
// Failing strings
{
- "",
- "01",
- "007",
- "12345678901234567"
+ R"""()""",
+ R"""(01)""",
+ R"""(007)""",
+ R"""(12345678901234567 )""",
}
);
test_schema(
"string const",
// Schema
- R"""(
- {
- "const": "foo"
- }
- )""",
+ R"""({
+ "const": "foo"
+ })""",
// Passing strings
{
- "\"foo\"",
+ R"""("foo")""",
},
// Failing strings
{
- "foo",
- "\"bar\"",
+ R"""(foo)""",
+ R"""("bar")""",
}
);
test_schema(
"non-string const",
// Schema
- R"""(
- {
- "const": true
- }
- )""",
+ R"""({
+ "const": true
+ })""",
// Passing strings
{
- "true",
+ R"""(true)""",
},
// Failing strings
{
- "",
- "foo",
- "\"true\"",
+ R"""()""",
+ R"""(foo)""",
+ R"""("true")""",
}
);
test_schema(
"non-string const",
// Schema
- R"""(
- {
- "enum": ["red", "amber", "green", null, 42, ["foo"]]
- }
- )""",
+ R"""({
+ "enum": ["red", "amber", "green", null, 42, ["foo"]]
+ })""",
// Passing strings
{
- "\"red\"",
- "null",
- "42",
- "[\"foo\"]",
+ R"""("red")""",
+ R"""(null)""",
+ R"""(42)""",
+ R"""(["foo"])""",
},
// Failing strings
{
- "",
- "420",
- "true",
- "foo",
+ R"""()""",
+ R"""(420)""",
+ R"""(true)""",
+ R"""(foo)""",
}
);
test_schema(
"min+max items",
// Schema
- R"""(
- {
- "items": {
- "type": ["number", "integer"]
- },
- "minItems": 3,
- "maxItems": 5
- }
- )""",
+ R"""({
+ "items": {
+ "type": ["number", "integer"]
+ },
+ "minItems": 3,
+ "maxItems": 5
+ })""",
// Passing strings
{
- "[1, 2, 3]",
- "[1, 2, 3, 4]",
- "[1, 2, 3, 4, 5]",
+ R"""([1, 2, 3])""",
+ R"""([1, 2, 3, 4])""",
+ R"""([1, 2, 3, 4, 5])""",
},
// Failing strings
{
- "[1, 2]",
- "[1, 2, 3, 4, 5, 6]",
- "1"
+ R"""([1, 2])""",
+ R"""([1, 2, 3, 4, 5, 6])""",
+ R"""(1)""",
}
);
test_schema(
"object properties",
// Schema
- R"""(
- {
+ R"""({
"type": "object",
"properties": {
"number": { "type": "number" },
"street_name": { "type": "string" },
"street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
}
- }
- )""",
+ })""",
// Passing strings
{
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
// "By extension, even an empty object is valid"
R"""({})""",
// "By default, providing additional properties is valid"
-#ifdef INCLUDE_FAILING_TESTS
- // TODO: The following should pass, but currently FAILS. Additional properties should be permitted by default.
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
- // TODO: Spaces should be permitted around enum values, but currently they fail to pass.
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
-#endif
},
// Failing strings
{
}
);
+ test_schema(
+ "additional properties can't override other properties",
+ R"""({
+ "properties": {
+ "a": {"type": "integer"},
+ "b": {"type": "integer"}
+ },
+ "additionalProperties": true
+ })""",
+ // Passing strings
+ {
+ R"""({"a": 42})""",
+ R"""({"c": ""})""",
+ R"""({"a": 42, "c": ""})""",
+ R"""({"a_": ""})""",
+ },
+ // Failing strings
+ {
+ R"""()""",
+ R"""({"a": ""})""",
+ R"""({"a": "", "b": ""})""",
+ }
+ );
// Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
test_schema(
"object properties, additionalProperties: true",
// Schema
- R"""(
- {
+ R"""({
"type": "object",
"properties": {
"number": { "type": "number" },
"street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
},
"additionalProperties": true
- }
- )""",
+ })""",
// Passing strings
{
// "By extension, even an empty object is valid"
R"""({})""",
-#ifdef INCLUDE_FAILING_TESTS
- // TODO: Following line should pass and doesn't
R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""",
// "By default, leaving out properties is valid"
- // TODO: Following line should pass and doesn't
R"""({ "street_name": "Pennsylvania" })""",
- // TODO: Following line should pass and doesn't
R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
// "By default, providing additional properties is valid"
- // TODO: The following should pass, but currently FAILS. Additional properties should be permitted by default.
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
- // TODO: Spaces should be permitted around enum values, but currently they fail to pass.
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
-#endif
},
// Failing strings
{
test_schema(
"required + optional props each in original order",
// Schema
- R"""(
- {
+ R"""({
"type": "object",
"properties": {
"number": { "type": "number" },
"street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
},
"additionalProperties": false
- }
- )""",
+ })""",
// Passing strings
{
R"""({ "street_name": "Pennsylvania" })""",
R"""({ "number": 1600, "street_type":"Avenue"})""",
R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
-#ifdef INCLUDE_FAILING_TESTS
- // TODO: Spaces should be permitted around enum values, but currently they fail to pass.
+ // Spaces are permitted around enum values
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
-#endif
},
// Failing strings
{
test_schema(
"required + optional props each in original order",
// Schema
- R"""(
- {
- "properties": {
- "b": {"type": "string"},
- "a": {"type": "string"},
- "d": {"type": "string"},
- "c": {"type": "string"}
- },
- "required": ["a", "b"],
- "additionalProperties": false
- }
- )""",
+ R"""({
+ "properties": {
+ "b": {"type": "string"},
+ "a": {"type": "string"},
+ "d": {"type": "string"},
+ "c": {"type": "string"}
+ },
+ "required": ["a", "b"],
+ "additionalProperties": false
+ })""",
// Passing strings
{
R"""({"b": "foo", "a": "bar"})""",
test_schema(
"required props",
// Schema
- R"""(
- {
+ R"""({
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://example.com/product.schema.json",
"title": "Product",
}
},
"required": [ "productId", "productName", "price" ]
- }
- )""",
+ })""",
// Passing strings
{
R"""({"productId": 1, "productName": "A green door", "price": 12.50})""",
"const": "foo"
})""",
R"""(
- root ::= "\"foo\""
+ root ::= "\"foo\"" space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
"const": 123
})""",
R"""(
- root ::= "123"
+ root ::= "123" space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
"enum": ["red", "amber", "green", null, 42, ["foo"]]
})""",
R"""(
- root ::= "\"red\"" | "\"amber\"" | "\"green\"" | "null" | "42" | "[\"foo\"]"
+ root ::= ("\"red\"" | "\"amber\"" | "\"green\"" | "null" | "42" | "[\"foo\"]") space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
})""",
R"""(
additional-kv ::= string ":" space additional-value
- additional-kvs ::= additional-kv ( "," space additional-kv )*
additional-value ::= "[" space (number ("," space number)*)? "]" space
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
decimal-part ::= [0-9]{1,16}
integral-part ::= [0] | [1-9] [0-9]{0,15}
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
- root ::= "{" space (additional-kvs )? "}" space
+ root ::= "{" space (additional-kv ( "," space additional-kv )* )? "}" space
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
)"""
})""",
R"""(
a-kv ::= "\"a\"" space ":" space number
- additional-kv ::= string ":" space string
- additional-kvs ::= additional-kv ( "," space additional-kv )*
+ additional-k ::= ["] ( [a] char+ | [^"a] char* )? ["] space
+ additional-kv ::= additional-k ":" space string
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
decimal-part ::= [0-9]{1,16}
integral-part ::= [0] | [1-9] [0-9]{0,15}
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
- root ::= "{" space a-kv ( "," space ( additional-kvs ) )? "}" space
+ root ::= "{" space a-kv ( "," space ( additional-kv ( "," space additional-kv )* ) )? "}" space
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
)"""
})""",
R"""(
a-kv ::= "\"a\"" space ":" space number
- a-rest ::= additional-kvs
- additional-kv ::= string ":" space number
- additional-kvs ::= additional-kv ( "," space additional-kv )*
+ a-rest ::= ( "," space additional-kv )*
+ additional-k ::= ["] ( [a] char+ | [^"a] char* )? ["] space
+ additional-kv ::= additional-k ":" space number
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
decimal-part ::= [0-9]{1,16}
integral-part ::= [0] | [1-9] [0-9]{0,15}
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
- root ::= "{" space (a-kv a-rest | additional-kvs )? "}" space
+ root ::= "{" space (a-kv a-rest | additional-kv ( "," space additional-kv )* )? "}" space
space ::= | " " | "\n" [ \t]{0,20}
- string ::= "\"" char* "\"" space
)"""
});
R"""({
"type": "object",
"properties": {
- "a": {"type": "number"},
- "b": {"type": "number"}
+ "and": {"type": "number"},
+ "also": {"type": "number"}
},
- "required": ["a"],
+ "required": ["and"],
"additionalProperties": {"type": "number"}
})""",
R"""(
- a-kv ::= "\"a\"" space ":" space number
- additional-kv ::= string ":" space number
- additional-kvs ::= additional-kv ( "," space additional-kv )*
- b-kv ::= "\"b\"" space ":" space number
- b-rest ::= additional-kvs
+ additional-k ::= ["] ( [a] ([l] ([s] ([o] char+ | [^"o] char*) | [^"s] char*) | [n] ([d] char+ | [^"d] char*) | [^"ln] char*) | [^"a] char* )? ["] space
+ additional-kv ::= additional-k ":" space number
+ also-kv ::= "\"also\"" space ":" space number
+ also-rest ::= ( "," space additional-kv )*
+ and-kv ::= "\"and\"" space ":" space number
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
decimal-part ::= [0-9]{1,16}
integral-part ::= [0] | [1-9] [0-9]{0,15}
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
- root ::= "{" space a-kv ( "," space ( b-kv b-rest | additional-kvs ) )? "}" space
+ root ::= "{" space and-kv ( "," space ( also-kv also-rest | additional-kv ( "," space additional-kv )* ) )? "}" space
+ space ::= | " " | "\n" [ \t]{0,20}
+ )"""
+ });
+
+ test({
+ SUCCESS,
+ "optional props with empty name",
+ R"""({
+ "properties": {
+ "": {"type": "integer"},
+ "a": {"type": "integer"}
+ },
+ "additionalProperties": {"type": "integer"}
+ })""",
+ R"""(
+ -kv ::= "\"\"" space ":" space root
+ -rest ::= ( "," space a-kv )? a-rest
+ a-kv ::= "\"a\"" space ":" space integer
+ a-rest ::= ( "," space additional-kv )*
+ additional-k ::= ["] ( [a] char+ | [^"a] char* ) ["] space
+ additional-kv ::= additional-k ":" space integer
+ char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
+ integer ::= ("-"? integral-part) space
+ integral-part ::= [0] | [1-9] [0-9]{0,15}
+ root ::= ("-"? integral-part) space
+ root0 ::= "{" space (-kv -rest | a-kv a-rest | additional-kv ( "," space additional-kv )* )? "}" space
+ space ::= | " " | "\n" [ \t]{0,20}
+ )"""
+ });
+
+ test({
+ SUCCESS,
+ "optional props with nested names",
+ R"""({
+ "properties": {
+ "a": {"type": "integer"},
+ "aa": {"type": "integer"}
+ },
+ "additionalProperties": {"type": "integer"}
+ })""",
+ R"""(
+ a-kv ::= "\"a\"" space ":" space integer
+ a-rest ::= ( "," space aa-kv )? aa-rest
+ aa-kv ::= "\"aa\"" space ":" space integer
+ aa-rest ::= ( "," space additional-kv )*
+ additional-k ::= ["] ( [a] ([a] char+ | [^"a] char*) | [^"a] char* )? ["] space
+ additional-kv ::= additional-k ":" space integer
+ char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
+ integer ::= ("-"? integral-part) space
+ integral-part ::= [0] | [1-9] [0-9]{0,15}
+ root ::= "{" space (a-kv a-rest | aa-kv aa-rest | additional-kv ( "," space additional-kv )* )? "}" space
+ space ::= | " " | "\n" [ \t]{0,20}
+ )"""
+ });
+
+ test({
+ SUCCESS,
+ "optional props with common prefix",
+ R"""({
+ "properties": {
+ "ab": {"type": "integer"},
+ "ac": {"type": "integer"}
+ },
+ "additionalProperties": {"type": "integer"}
+ })""",
+ R"""(
+ ab-kv ::= "\"ab\"" space ":" space integer
+ ab-rest ::= ( "," space ac-kv )? ac-rest
+ ac-kv ::= "\"ac\"" space ":" space integer
+ ac-rest ::= ( "," space additional-kv )*
+ additional-k ::= ["] ( [a] ([b] char+ | [c] char+ | [^"bc] char*) | [^"a] char* )? ["] space
+ additional-kv ::= additional-k ":" space integer
+ char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
+ integer ::= ("-"? integral-part) space
+ integral-part ::= [0] | [1-9] [0-9]{0,15}
+ root ::= "{" space (ab-kv ab-rest | ac-kv ac-rest | additional-kv ( "," space additional-kv )* )? "}" space
space ::= | " " | "\n" [ \t]{0,20}
- string ::= "\"" char* "\"" space
)"""
});
R"""(
alternative-0 ::= foo
alternative-1 ::= bar
- bar ::= "{" space (bar-b-kv )? "}" space
+ array ::= "[" space ( value ("," space value)* )? "]" space
+ bar ::= "{" space (bar-b-kv bar-b-rest | bar-additional-kv ( "," space bar-additional-kv )* )? "}" space
+ bar-additional-k ::= ["] ( [b] char+ | [^"b] char* )? ["] space
+ bar-additional-kv ::= bar-additional-k ":" space value
bar-b-kv ::= "\"b\"" space ":" space number
+ bar-b-rest ::= ( "," space bar-additional-kv )*
+ boolean ::= ("true" | "false") space
+ char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
decimal-part ::= [0-9]{1,16}
- foo ::= "{" space (foo-a-kv )? "}" space
+ foo ::= "{" space (foo-a-kv foo-a-rest | foo-additional-kv ( "," space foo-additional-kv )* )? "}" space
foo-a-kv ::= "\"a\"" space ":" space number
+ foo-a-rest ::= ( "," space foo-additional-kv )*
+ foo-additional-k ::= ["] ( [a] char+ | [^"a] char* )? ["] space
+ foo-additional-kv ::= foo-additional-k ":" space value
integral-part ::= [0] | [1-9] [0-9]{0,15}
+ null ::= "null" space
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
+ object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
root ::= alternative-0 | alternative-1
space ::= | " " | "\n" [ \t]{0,20}
+ string ::= "\"" char* "\"" space
+ value ::= object | array | string | number | boolean | null
)"""
});
})""",
R"""(
a-kv ::= "\"a\"" space ":" space number
+ additional-k ::= ["] ( [a] char+ | [b] char+ | [c] char+ | [d] char+ | [^"abcd] char* )? ["] space
+ additional-kv ::= additional-k ":" space value
+ array ::= "[" space ( value ("," space value)* )? "]" space
b-kv ::= "\"b\"" space ":" space number
+ boolean ::= ("true" | "false") space
c-kv ::= "\"c\"" space ":" space number
+ c-rest ::= ( "," space additional-kv )*
+ char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
d-kv ::= "\"d\"" space ":" space number
- d-rest ::= ( "," space c-kv )?
+ d-rest ::= ( "," space c-kv )? c-rest
decimal-part ::= [0-9]{1,16}
integral-part ::= [0] | [1-9] [0-9]{0,15}
+ null ::= "null" space
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
- root ::= "{" space a-kv "," space b-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space
+ object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
+ root ::= "{" space a-kv "," space b-kv ( "," space ( d-kv d-rest | c-kv c-rest | additional-kv ( "," space additional-kv )* ) )? "}" space
space ::= | " " | "\n" [ \t]{0,20}
+ string ::= "\"" char* "\"" space
+ value ::= object | array | string | number | boolean | null
)"""
});