Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

json: unified properties order across optional & required #8133

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 23 additions & 32 deletions common/json-schema-to-grammar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -700,10 +700,9 @@ class SchemaConverter {
const std::string & name,
const json & additional_properties)
{
std::vector<std::string> required_props;
std::vector<std::string> optional_props;
std::unordered_map<std::string, std::string> prop_kv_rule_names;
std::vector<std::string> prop_names;
prop_names.reserve(properties.size() + 1);
std::unordered_map<std::string, std::string> prop_kv_rule_names;
for (const auto & kv : properties) {
const auto &prop_name = kv.first;
const auto &prop_schema = kv.second;
Expand All @@ -713,11 +712,6 @@ class SchemaConverter {
name + (name.empty() ? "" : "-") + prop_name + "-kv",
format_literal(json(prop_name).dump()) + " space \":\" space " + prop_rule_name
);
if (required.find(prop_name) != required.end()) {
required_props.push_back(prop_name);
} else {
optional_props.push_back(prop_name);
}
prop_names.push_back(prop_name);
}
if ((additional_properties.is_boolean() && additional_properties.get<bool>()) || additional_properties.is_object()) {
Expand All @@ -731,35 +725,27 @@ class SchemaConverter {
: _add_rule(sub_name + "-k", _not_strings(prop_names));
std::string kv_rule = _add_rule(sub_name + "-kv", key_rule + " \":\" space " + value_rule);
prop_kv_rule_names["*"] = kv_rule;
optional_props.push_back("*");
prop_names.push_back("*");
}

std::string rule = "\"{\" space ";
for (size_t i = 0; i < required_props.size(); i++) {
if (i > 0) {
rule += " \",\" space ";
}
rule += prop_kv_rule_names[required_props[i]];
}

if (!optional_props.empty()) {
rule += " (";
if (!required_props.empty()) {
rule += " \",\" space ( ";
}

if (!prop_kv_rule_names.empty()) {
std::function<std::string(const std::vector<std::string> &, bool)> get_recursive_refs = [&](const std::vector<std::string> & ks, bool first_is_optional) {
std::string res;
if (ks.empty()) {
return res;
}
std::string k = ks[0];
std::string kv_rule_name = prop_kv_rule_names[k];
std::string comma_ref = "( \",\" space " + kv_rule_name + " )";
std::string comma_ref = "\",\" space " + kv_rule_name;
if (first_is_optional) {
res = comma_ref + (k == "*" ? "*" : "?");
if (required.find(k) == required.end()) {
res = "( " + comma_ref + (k == "*" ? " )*" : " )?");
} else {
res = comma_ref;
}
} else {
res = kv_rule_name + (k == "*" ? " " + comma_ref + "*" : "");
res = kv_rule_name + (k == "*" ? " ( " + comma_ref + " )*" : "");
}
if (ks.size() > 1) {
res += " " + _add_rule(
Expand All @@ -770,16 +756,21 @@ class SchemaConverter {
return res;
};

for (size_t i = 0; i < optional_props.size(); i++) {
if (i > 0) {
rule += " | ";
std::vector<std::string> alternatives;
auto has_required = false;
for (size_t i = 0; i < prop_names.size(); i++) {
alternatives.push_back(get_recursive_refs(std::vector<std::string>(prop_names.begin() + i, prop_names.end()), false));
if (required.find(prop_names[i]) != required.end()) {
has_required = true;
break;
}
rule += get_recursive_refs(std::vector<std::string>(optional_props.begin() + i, optional_props.end()), false);
}
if (!required_props.empty()) {
rule += " )";
auto alts = join(alternatives.begin(), alternatives.end(), " | ");
if (alternatives.size() > 1 || !has_required) {
rule += "( " + alts + (has_required ? " )" : " )?");
} else {
rule += alts;
}
rule += " )?";
}

rule += " \"}\" space";
Expand Down
56 changes: 31 additions & 25 deletions examples/json_schema_to_grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import sys
from typing import Any, List, Optional, Set, Tuple, Union


def _build_repetition(item_rule, min_items, max_items, separator_rule=None):

if min_items == 0 and max_items == 1:
Expand Down Expand Up @@ -677,63 +678,68 @@ def _add_primitive(self, name: str, rule: BuiltinRule):
return n

def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Optional[Union[bool, Any]]):
prop_order = self._prop_order
# sort by position in prop_order (if specified) then by original order
sorted_props = [kv[0] for _, kv in sorted(enumerate(properties), key=lambda ikv: (prop_order.get(ikv[1][0], len(prop_order)), ikv[0]))]

prop_kv_rule_names = {}
prop_names = []
for prop_name, prop_schema in properties:
prop_rule_name = self.visit(prop_schema, f'{name}{"-" if name else ""}{prop_name}')
prop_kv_rule_names[prop_name] = self._add_rule(
f'{name}{"-" if name else ""}{prop_name}-kv',
fr'{self._format_literal(json.dumps(prop_name))} space ":" space {prop_rule_name}'
)
required_props = [k for k in sorted_props if k in required]
optional_props = [k for k in sorted_props if k not in required]
prop_names.append(prop_name)

prop_order = self._prop_order
if prop_order:
# sort by position in prop_order (if specified) then by original order
prop_names.sort(key=lambda k: (prop_order.get(k, float('inf')), prop_names.index(k)))

if additional_properties is not None and additional_properties != False:
sub_name = f'{name}{"-" if name else ""}additional'
value_rule = self.visit(additional_properties, f'{sub_name}-value') if isinstance(additional_properties, dict) else \
self._add_primitive('value', PRIMITIVE_RULES['value'])
key_rule = self._add_primitive('string', PRIMITIVE_RULES['string']) if not sorted_props \
else self._add_rule(f'{sub_name}-k', self._not_strings(sorted_props))
key_rule = self._add_primitive('string', PRIMITIVE_RULES['string']) if not prop_names \
else self._add_rule(f'{sub_name}-k', self._not_strings(prop_names))

prop_kv_rule_names["*"] = self._add_rule(
f'{sub_name}-kv',
f'{key_rule} ":" space {value_rule}'
)
optional_props.append("*")
prop_names.append("*")

rule = '"{" space '
rule += ' "," space '.join(prop_kv_rule_names[k] for k in required_props)

if optional_props:
rule += ' ('
if required_props:
rule += ' "," space ( '

if prop_kv_rule_names:
def get_recursive_refs(ks, first_is_optional):
[k, *rest] = ks
kv_rule_name = prop_kv_rule_names[k]
comma_ref = f'( "," space {kv_rule_name} )'
comma_ref = f'"," space {kv_rule_name}'
if first_is_optional:
res = comma_ref + ('*' if k == '*' else '?')
if k not in required:
res = '( ' + comma_ref + (' )*' if k == '*' else ' )?')
else:
res = comma_ref
else:
res = kv_rule_name + (' ' + comma_ref + "*" if k == '*' else '')
res = kv_rule_name + (' ( ' + comma_ref + " )*" if k == '*' else '')
if len(rest) > 0:
res += ' ' + self._add_rule(
f'{name}{"-" if name else ""}{k}-rest',
get_recursive_refs(rest, first_is_optional=True)
)
return res

rule += ' | '.join(
get_recursive_refs(optional_props[i:], first_is_optional=False)
for i in range(len(optional_props))
)
if required_props:
rule += ' )'
rule += ' )?'
alternatives = []
has_required = False
for i, k in enumerate(prop_names):
alternatives.append(get_recursive_refs(prop_names[i:], first_is_optional=False))
if k in required:
has_required = True
break

alts = ' | '.join(alternatives)
if len(alternatives) > 1 or not has_required:
rule += '( ' + alts + (' )' if has_required else ' )?')
else:
rule += alts

rule += ' "}" space'

Expand Down
66 changes: 39 additions & 27 deletions examples/server/public/json-schema-to-grammar.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -732,24 +732,26 @@ export class SchemaConverter {
}

_buildObjectRule(properties, required, name, additionalProperties) {
const propOrder = this._propOrder;
// sort by position in prop_order (if specified) then by original order
const sortedProps = properties.map(([k]) => k).sort((a, b) => {
const orderA = propOrder[a] || Infinity;
const orderB = propOrder[b] || Infinity;
return orderA - orderB || properties.findIndex(([k]) => k === a) - properties.findIndex(([k]) => k === b);
});

const propKvRuleNames = {};
const propNames = []
for (const [propName, propSchema] of properties) {
const propRuleName = this.visit(propSchema, `${name ?? ''}${name ? '-' : ''}${propName}`);
propKvRuleNames[propName] = this._addRule(
`${name ?? ''}${name ? '-' : ''}${propName}-kv`,
`${this._formatLiteral(JSON.stringify(propName))} space ":" space ${propRuleName}`
);
propNames.push(propName);
}

const propOrder = this._propOrder;
if (Object.keys(propOrder).length > 0) {
// sort by position in prop_order (if specified) then by original order
propNames.sort((a, b) => {
const orderA = propOrder[a] || Infinity;
const orderB = propOrder[b] || Infinity;
return orderA - orderB || properties.findIndex(([k]) => k === a) - properties.findIndex(([k]) => k === b);
});
}
const requiredProps = sortedProps.filter(k => required.has(k));
const optionalProps = sortedProps.filter(k => !required.has(k));

if (additionalProperties) {
const subName = `${name ?? ''}${name ? '-' : ''}additional`;
Expand All @@ -758,33 +760,32 @@ export class SchemaConverter {
: this._addPrimitive('value', PRIMITIVE_RULES['value']);

const key_rule =
sortedProps.length === 0 ? this._addPrimitive('string', PRIMITIVE_RULES['string'])
: this._addRule(`${subName}-k`, this._notStrings(sortedProps));
propNames.length === 0 ? this._addPrimitive('string', PRIMITIVE_RULES['string'])
: this._addRule(`${subName}-k`, this._notStrings(propNames));

propKvRuleNames['*'] = this._addRule(
`${subName}-kv`,
`${key_rule} ":" space ${valueRule}`);
optionalProps.push('*');
propNames.push('*');
}

let rule = '"{" space ';
rule += requiredProps.map(k => propKvRuleNames[k]).join(' "," space ');

if (optionalProps.length > 0) {
rule += ' (';
if (requiredProps.length > 0) {
rule += ' "," space ( ';
}

if (propNames.length > 0) {
const getRecursiveRefs = (ks, firstIsOptional) => {
const [k, ...rest] = ks;
const kvRuleName = propKvRuleNames[k];
let res;
const commaRef = `( "," space ${kvRuleName} )`;
const commaRef = `"," space ${kvRuleName}`;
if (firstIsOptional) {
res = commaRef + (k === '*' ? '*' : '?');
// res = commaRef + (k === '*' ? '*' : '?');
if (!required.has(k)) {
res = `( ${commaRef} )${k === '*' ? '*' : '?'}`;
} else {
res = commaRef;
}
} else {
res = kvRuleName + (k === '*' ? ' ' + commaRef + '*' : '');
res = kvRuleName + (k === '*' ? ' ( ' + commaRef + ' )*' : '');
}
if (rest.length > 0) {
res += ' ' + this._addRule(
Expand All @@ -795,11 +796,22 @@ export class SchemaConverter {
return res;
};

rule += optionalProps.map((_, i) => getRecursiveRefs(optionalProps.slice(i), false)).join(' | ');
if (requiredProps.length > 0) {
rule += ' )';
let hasRequired = false;
const alternatives = [];
for (let i = 0; i < propNames.length; i++) {
alternatives.push(getRecursiveRefs(propNames.slice(i), false));
if (required.has(propNames[i])) {
hasRequired = true;
break;
}
}

const alts = alternatives.join(' | ');
if (alternatives.length > 1 || !hasRequired) {
rule += `( ${alts} )${hasRequired ? '' : '?'}`;
} else {
rule += alts;
}
rule += ' )?';
}

rule += ' "}" space';
Expand Down
27 changes: 27 additions & 0 deletions tests/test-grammar-integration.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1112,6 +1112,33 @@ static void test_json_schema() {
}
);

test_schema(
"object property order",
// Schema
R"""({
"type": "object",
"properties": {
"a": { "type": "integer" },
"b": { "type": "integer" },
"c": { "type": "integer" },
"d": { "type": "integer" }
},
"required": ["b", "d"]
})""",
// Passing strings
{
R"""({ "b": 0, "d": 0 })""",
R"""({ "b": 0, "d": 0, "E": -1 })""",
R"""({ "a": 0, "b": 0, "c": 0, "d": 0 })""",
R"""({ "a": 0, "b": 0, "c": 0, "d": 0, "E": -1 })""",
},
// Failing strings
{
R"""({ "E": -1, "b": 0, "d": 0 })""",
R"""({ "b": 0, "d": 0, "a": 0 })""",
}
);

test_schema(
"additional properties can't override other properties",
R"""({
Expand Down
Loading
Loading