From 757c4df0e0e6a7abc5d036c2e46d67048d0271c6 Mon Sep 17 00:00:00 2001 From: ochafik Date: Wed, 26 Jun 2024 09:39:15 +0100 Subject: [PATCH] `json`: unified properties order across optional & required --- common/json-schema-to-grammar.cpp | 55 +++++++--------- examples/json_schema_to_grammar.py | 56 +++++++++------- .../server/public/json-schema-to-grammar.mjs | 66 +++++++++++-------- tests/test-grammar-integration.cpp | 27 ++++++++ tests/test-json-schema-to-grammar.cpp | 46 +++++++------ 5 files changed, 147 insertions(+), 103 deletions(-) diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index 881eb49e3389e..5e7dddcb7f06e 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -700,10 +700,9 @@ class SchemaConverter { const std::string & name, const json & additional_properties) { - std::vector required_props; - std::vector optional_props; - std::unordered_map prop_kv_rule_names; std::vector prop_names; + prop_names.reserve(properties.size() + 1); + std::unordered_map prop_kv_rule_names; for (const auto & kv : properties) { const auto &prop_name = kv.first; const auto &prop_schema = kv.second; @@ -713,11 +712,6 @@ class SchemaConverter { name + (name.empty() ? "" : "-") + prop_name + "-kv", format_literal(json(prop_name).dump()) + " space \":\" space " + prop_rule_name ); - if (required.find(prop_name) != required.end()) { - required_props.push_back(prop_name); - } else { - optional_props.push_back(prop_name); - } prop_names.push_back(prop_name); } if ((additional_properties.is_boolean() && additional_properties.get()) || additional_properties.is_object()) { @@ -731,23 +725,11 @@ class SchemaConverter { : _add_rule(sub_name + "-k", _not_strings(prop_names)); std::string kv_rule = _add_rule(sub_name + "-kv", key_rule + " \":\" space " + value_rule); prop_kv_rule_names["*"] = kv_rule; - optional_props.push_back("*"); + prop_names.push_back("*"); } std::string rule = "\"{\" space "; - for (size_t i = 0; i < required_props.size(); i++) { - if (i > 0) { - rule += " \",\" space "; - } - rule += prop_kv_rule_names[required_props[i]]; - } - - if (!optional_props.empty()) { - rule += " ("; - if (!required_props.empty()) { - rule += " \",\" space ( "; - } - + if (!prop_kv_rule_names.empty()) { std::function &, bool)> get_recursive_refs = [&](const std::vector & ks, bool first_is_optional) { std::string res; if (ks.empty()) { @@ -755,11 +737,15 @@ class SchemaConverter { } std::string k = ks[0]; std::string kv_rule_name = prop_kv_rule_names[k]; - std::string comma_ref = "( \",\" space " + kv_rule_name + " )"; + std::string comma_ref = "\",\" space " + kv_rule_name; if (first_is_optional) { - res = comma_ref + (k == "*" ? "*" : "?"); + if (required.find(k) == required.end()) { + res = "( " + comma_ref + (k == "*" ? " )*" : " )?"); + } else { + res = comma_ref; + } } else { - res = kv_rule_name + (k == "*" ? " " + comma_ref + "*" : ""); + res = kv_rule_name + (k == "*" ? " ( " + comma_ref + " )*" : ""); } if (ks.size() > 1) { res += " " + _add_rule( @@ -770,16 +756,21 @@ class SchemaConverter { return res; }; - for (size_t i = 0; i < optional_props.size(); i++) { - if (i > 0) { - rule += " | "; + std::vector alternatives; + auto has_required = false; + for (size_t i = 0; i < prop_names.size(); i++) { + alternatives.push_back(get_recursive_refs(std::vector(prop_names.begin() + i, prop_names.end()), false)); + if (required.find(prop_names[i]) != required.end()) { + has_required = true; + break; } - rule += get_recursive_refs(std::vector(optional_props.begin() + i, optional_props.end()), false); } - if (!required_props.empty()) { - rule += " )"; + auto alts = join(alternatives.begin(), alternatives.end(), " | "); + if (alternatives.size() > 1 || !has_required) { + rule += "( " + alts + (has_required ? " )" : " )?"); + } else { + rule += alts; } - rule += " )?"; } rule += " \"}\" space"; diff --git a/examples/json_schema_to_grammar.py b/examples/json_schema_to_grammar.py index 072a230f72408..9c40542fc6da1 100755 --- a/examples/json_schema_to_grammar.py +++ b/examples/json_schema_to_grammar.py @@ -6,6 +6,7 @@ import sys from typing import Any, List, Optional, Set, Tuple, Union + def _build_repetition(item_rule, min_items, max_items, separator_rule=None): if min_items == 0 and max_items == 1: @@ -677,49 +678,48 @@ def _add_primitive(self, name: str, rule: BuiltinRule): return n def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Optional[Union[bool, Any]]): - prop_order = self._prop_order - # sort by position in prop_order (if specified) then by original order - sorted_props = [kv[0] for _, kv in sorted(enumerate(properties), key=lambda ikv: (prop_order.get(ikv[1][0], len(prop_order)), ikv[0]))] - prop_kv_rule_names = {} + prop_names = [] for prop_name, prop_schema in properties: prop_rule_name = self.visit(prop_schema, f'{name}{"-" if name else ""}{prop_name}') prop_kv_rule_names[prop_name] = self._add_rule( f'{name}{"-" if name else ""}{prop_name}-kv', fr'{self._format_literal(json.dumps(prop_name))} space ":" space {prop_rule_name}' ) - required_props = [k for k in sorted_props if k in required] - optional_props = [k for k in sorted_props if k not in required] + prop_names.append(prop_name) + + prop_order = self._prop_order + if prop_order: + # sort by position in prop_order (if specified) then by original order + prop_names.sort(key=lambda k: (prop_order.get(k, float('inf')), prop_names.index(k))) if additional_properties is not None and additional_properties != False: sub_name = f'{name}{"-" if name else ""}additional' value_rule = self.visit(additional_properties, f'{sub_name}-value') if isinstance(additional_properties, dict) else \ self._add_primitive('value', PRIMITIVE_RULES['value']) - key_rule = self._add_primitive('string', PRIMITIVE_RULES['string']) if not sorted_props \ - else self._add_rule(f'{sub_name}-k', self._not_strings(sorted_props)) + key_rule = self._add_primitive('string', PRIMITIVE_RULES['string']) if not prop_names \ + else self._add_rule(f'{sub_name}-k', self._not_strings(prop_names)) prop_kv_rule_names["*"] = self._add_rule( f'{sub_name}-kv', f'{key_rule} ":" space {value_rule}' ) - optional_props.append("*") + prop_names.append("*") rule = '"{" space ' - rule += ' "," space '.join(prop_kv_rule_names[k] for k in required_props) - - if optional_props: - rule += ' (' - if required_props: - rule += ' "," space ( ' + if prop_kv_rule_names: def get_recursive_refs(ks, first_is_optional): [k, *rest] = ks kv_rule_name = prop_kv_rule_names[k] - comma_ref = f'( "," space {kv_rule_name} )' + comma_ref = f'"," space {kv_rule_name}' if first_is_optional: - res = comma_ref + ('*' if k == '*' else '?') + if k not in required: + res = '( ' + comma_ref + (' )*' if k == '*' else ' )?') + else: + res = comma_ref else: - res = kv_rule_name + (' ' + comma_ref + "*" if k == '*' else '') + res = kv_rule_name + (' ( ' + comma_ref + " )*" if k == '*' else '') if len(rest) > 0: res += ' ' + self._add_rule( f'{name}{"-" if name else ""}{k}-rest', @@ -727,13 +727,19 @@ def get_recursive_refs(ks, first_is_optional): ) return res - rule += ' | '.join( - get_recursive_refs(optional_props[i:], first_is_optional=False) - for i in range(len(optional_props)) - ) - if required_props: - rule += ' )' - rule += ' )?' + alternatives = [] + has_required = False + for i, k in enumerate(prop_names): + alternatives.append(get_recursive_refs(prop_names[i:], first_is_optional=False)) + if k in required: + has_required = True + break + + alts = ' | '.join(alternatives) + if len(alternatives) > 1 or not has_required: + rule += '( ' + alts + (' )' if has_required else ' )?') + else: + rule += alts rule += ' "}" space' diff --git a/examples/server/public/json-schema-to-grammar.mjs b/examples/server/public/json-schema-to-grammar.mjs index 7267f3f9c7fad..4e4b324c7f7ee 100644 --- a/examples/server/public/json-schema-to-grammar.mjs +++ b/examples/server/public/json-schema-to-grammar.mjs @@ -732,24 +732,26 @@ export class SchemaConverter { } _buildObjectRule(properties, required, name, additionalProperties) { - const propOrder = this._propOrder; - // sort by position in prop_order (if specified) then by original order - const sortedProps = properties.map(([k]) => k).sort((a, b) => { - const orderA = propOrder[a] || Infinity; - const orderB = propOrder[b] || Infinity; - return orderA - orderB || properties.findIndex(([k]) => k === a) - properties.findIndex(([k]) => k === b); - }); - const propKvRuleNames = {}; + const propNames = [] for (const [propName, propSchema] of properties) { const propRuleName = this.visit(propSchema, `${name ?? ''}${name ? '-' : ''}${propName}`); propKvRuleNames[propName] = this._addRule( `${name ?? ''}${name ? '-' : ''}${propName}-kv`, `${this._formatLiteral(JSON.stringify(propName))} space ":" space ${propRuleName}` ); + propNames.push(propName); + } + + const propOrder = this._propOrder; + if (Object.keys(propOrder).length > 0) { + // sort by position in prop_order (if specified) then by original order + propNames.sort((a, b) => { + const orderA = propOrder[a] || Infinity; + const orderB = propOrder[b] || Infinity; + return orderA - orderB || properties.findIndex(([k]) => k === a) - properties.findIndex(([k]) => k === b); + }); } - const requiredProps = sortedProps.filter(k => required.has(k)); - const optionalProps = sortedProps.filter(k => !required.has(k)); if (additionalProperties) { const subName = `${name ?? ''}${name ? '-' : ''}additional`; @@ -758,33 +760,32 @@ export class SchemaConverter { : this._addPrimitive('value', PRIMITIVE_RULES['value']); const key_rule = - sortedProps.length === 0 ? this._addPrimitive('string', PRIMITIVE_RULES['string']) - : this._addRule(`${subName}-k`, this._notStrings(sortedProps)); + propNames.length === 0 ? this._addPrimitive('string', PRIMITIVE_RULES['string']) + : this._addRule(`${subName}-k`, this._notStrings(propNames)); propKvRuleNames['*'] = this._addRule( `${subName}-kv`, `${key_rule} ":" space ${valueRule}`); - optionalProps.push('*'); + propNames.push('*'); } let rule = '"{" space '; - rule += requiredProps.map(k => propKvRuleNames[k]).join(' "," space '); - - if (optionalProps.length > 0) { - rule += ' ('; - if (requiredProps.length > 0) { - rule += ' "," space ( '; - } + if (propNames.length > 0) { const getRecursiveRefs = (ks, firstIsOptional) => { const [k, ...rest] = ks; const kvRuleName = propKvRuleNames[k]; let res; - const commaRef = `( "," space ${kvRuleName} )`; + const commaRef = `"," space ${kvRuleName}`; if (firstIsOptional) { - res = commaRef + (k === '*' ? '*' : '?'); + // res = commaRef + (k === '*' ? '*' : '?'); + if (!required.has(k)) { + res = `( ${commaRef} )${k === '*' ? '*' : '?'}`; + } else { + res = commaRef; + } } else { - res = kvRuleName + (k === '*' ? ' ' + commaRef + '*' : ''); + res = kvRuleName + (k === '*' ? ' ( ' + commaRef + ' )*' : ''); } if (rest.length > 0) { res += ' ' + this._addRule( @@ -795,11 +796,22 @@ export class SchemaConverter { return res; }; - rule += optionalProps.map((_, i) => getRecursiveRefs(optionalProps.slice(i), false)).join(' | '); - if (requiredProps.length > 0) { - rule += ' )'; + let hasRequired = false; + const alternatives = []; + for (let i = 0; i < propNames.length; i++) { + alternatives.push(getRecursiveRefs(propNames.slice(i), false)); + if (required.has(propNames[i])) { + hasRequired = true; + break; + } + } + + const alts = alternatives.join(' | '); + if (alternatives.length > 1 || !hasRequired) { + rule += `( ${alts} )${hasRequired ? '' : '?'}`; + } else { + rule += alts; } - rule += ' )?'; } rule += ' "}" space'; diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp index 975658f7953c9..63aae24735b2b 100644 --- a/tests/test-grammar-integration.cpp +++ b/tests/test-grammar-integration.cpp @@ -1112,6 +1112,33 @@ static void test_json_schema() { } ); + test_schema( + "object property order", + // Schema + R"""({ + "type": "object", + "properties": { + "a": { "type": "integer" }, + "b": { "type": "integer" }, + "c": { "type": "integer" }, + "d": { "type": "integer" } + }, + "required": ["b", "d"] + })""", + // Passing strings + { + R"""({ "b": 0, "d": 0 })""", + R"""({ "b": 0, "d": 0, "E": -1 })""", + R"""({ "a": 0, "b": 0, "c": 0, "d": 0 })""", + R"""({ "a": 0, "b": 0, "c": 0, "d": 0, "E": -1 })""", + }, + // Failing strings + { + R"""({ "E": -1, "b": 0, "d": 0 })""", + R"""({ "b": 0, "d": 0, "a": 0 })""", + } + ); + test_schema( "additional properties can't override other properties", R"""({ diff --git a/tests/test-json-schema-to-grammar.cpp b/tests/test-json-schema-to-grammar.cpp index 720a949c73644..8561deae00834 100755 --- a/tests/test-json-schema-to-grammar.cpp +++ b/tests/test-json-schema-to-grammar.cpp @@ -761,9 +761,11 @@ static void test_all(const std::string & lang, std::function