Skip to content

Commit ed521ed

Browse files
trinity-1686aPSeitz
authored andcommitted
allow some mixing of occur and bool in strict query parser (#2323)
* allow some mixing of occur and bool in strict query parser * allow all mixing of binary and occur in strict parser
1 parent 5a197d0 commit ed521ed

File tree

1 file changed

+85
-76
lines changed

1 file changed

+85
-76
lines changed

query-grammar/src/query_grammar.rs

+85-76
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use nom::character::complete::{
77
};
88
use nom::combinator::{eof, map, map_res, opt, peek, recognize, value, verify};
99
use nom::error::{Error, ErrorKind};
10-
use nom::multi::{many0, many1, separated_list0, separated_list1};
10+
use nom::multi::{many0, many1, separated_list0};
1111
use nom::sequence::{delimited, preceded, separated_pair, terminated, tuple};
1212
use nom::IResult;
1313

@@ -786,27 +786,23 @@ fn binary_operand(inp: &str) -> IResult<&str, BinaryOperand> {
786786
}
787787

788788
fn aggregate_binary_expressions(
789-
left: UserInputAst,
790-
others: Vec<(BinaryOperand, UserInputAst)>,
791-
) -> UserInputAst {
792-
let mut dnf: Vec<Vec<UserInputAst>> = vec![vec![left]];
793-
for (operator, operand_ast) in others {
794-
match operator {
795-
BinaryOperand::And => {
796-
if let Some(last) = dnf.last_mut() {
797-
last.push(operand_ast);
798-
}
799-
}
800-
BinaryOperand::Or => {
801-
dnf.push(vec![operand_ast]);
802-
}
803-
}
804-
}
805-
if dnf.len() == 1 {
806-
UserInputAst::and(dnf.into_iter().next().unwrap()) //< safe
789+
left: (Option<Occur>, UserInputAst),
790+
others: Vec<(Option<BinaryOperand>, Option<Occur>, UserInputAst)>,
791+
) -> Result<UserInputAst, LenientErrorInternal> {
792+
let mut leafs = Vec::with_capacity(others.len() + 1);
793+
leafs.push((None, left.0, Some(left.1)));
794+
leafs.extend(
795+
others
796+
.into_iter()
797+
.map(|(operand, occur, ast)| (operand, occur, Some(ast))),
798+
);
799+
// the parameters we pass should statically guarantee we can't get errors
800+
// (no prefix BinaryOperand is provided)
801+
let (res, mut errors) = aggregate_infallible_expressions(leafs);
802+
if errors.is_empty() {
803+
Ok(res)
807804
} else {
808-
let conjunctions = dnf.into_iter().map(UserInputAst::and).collect();
809-
UserInputAst::or(conjunctions)
805+
Err(errors.swap_remove(0))
810806
}
811807
}
812808

@@ -822,30 +818,10 @@ fn aggregate_infallible_expressions(
822818
return (UserInputAst::empty_query(), err);
823819
}
824820

825-
let use_operand = leafs.iter().any(|(operand, _, _)| operand.is_some());
826-
let all_operand = leafs
827-
.iter()
828-
.skip(1)
829-
.all(|(operand, _, _)| operand.is_some());
830821
let early_operand = leafs
831822
.iter()
832823
.take(1)
833824
.all(|(operand, _, _)| operand.is_some());
834-
let use_occur = leafs.iter().any(|(_, occur, _)| occur.is_some());
835-
836-
if use_operand && use_occur {
837-
err.push(LenientErrorInternal {
838-
pos: 0,
839-
message: "Use of mixed occur and boolean operator".to_string(),
840-
});
841-
}
842-
843-
if use_operand && !all_operand {
844-
err.push(LenientErrorInternal {
845-
pos: 0,
846-
message: "Missing boolean operator".to_string(),
847-
});
848-
}
849825

850826
if early_operand {
851827
err.push(LenientErrorInternal {
@@ -872,15 +848,31 @@ fn aggregate_infallible_expressions(
872848
Some(BinaryOperand::And) => Some(Occur::Must),
873849
_ => Some(Occur::Should),
874850
};
875-
clauses.push(vec![(occur.or(default_op), ast.clone())]);
851+
if occur == &Some(Occur::MustNot) && default_op == Some(Occur::Should) {
852+
// if occur is MustNot *and* operation is OR, we synthetize a ShouldNot
853+
clauses.push(vec![(
854+
Some(Occur::Should),
855+
ast.clone().unary(Occur::MustNot),
856+
)])
857+
} else {
858+
clauses.push(vec![(occur.or(default_op), ast.clone())]);
859+
}
876860
}
877861
None => {
878862
let default_op = match next_operator {
879863
Some(BinaryOperand::And) => Some(Occur::Must),
880864
Some(BinaryOperand::Or) => Some(Occur::Should),
881865
None => None,
882866
};
883-
clauses.push(vec![(occur.or(default_op), ast.clone())])
867+
if occur == &Some(Occur::MustNot) && default_op == Some(Occur::Should) {
868+
// if occur is MustNot *and* operation is OR, we synthetize a ShouldNot
869+
clauses.push(vec![(
870+
Some(Occur::Should),
871+
ast.clone().unary(Occur::MustNot),
872+
)])
873+
} else {
874+
clauses.push(vec![(occur.or(default_op), ast.clone())])
875+
}
884876
}
885877
}
886878
}
@@ -897,7 +889,12 @@ fn aggregate_infallible_expressions(
897889
}
898890
}
899891
Some(BinaryOperand::Or) => {
900-
clauses.push(vec![(last_occur.or(Some(Occur::Should)), last_ast)]);
892+
if last_occur == Some(Occur::MustNot) {
893+
// if occur is MustNot *and* operation is OR, we synthetize a ShouldNot
894+
clauses.push(vec![(Some(Occur::Should), last_ast.unary(Occur::MustNot))]);
895+
} else {
896+
clauses.push(vec![(last_occur.or(Some(Occur::Should)), last_ast)]);
897+
}
901898
}
902899
None => clauses.push(vec![(last_occur, last_ast)]),
903900
}
@@ -923,35 +920,29 @@ fn aggregate_infallible_expressions(
923920
}
924921
}
925922

926-
fn operand_leaf(inp: &str) -> IResult<&str, (BinaryOperand, UserInputAst)> {
927-
tuple((
928-
terminated(binary_operand, multispace0),
929-
terminated(boosted_leaf, multispace0),
930-
))(inp)
923+
fn operand_leaf(inp: &str) -> IResult<&str, (Option<BinaryOperand>, Option<Occur>, UserInputAst)> {
924+
map(
925+
tuple((
926+
terminated(opt(binary_operand), multispace0),
927+
terminated(occur_leaf, multispace0),
928+
)),
929+
|(operand, (occur, ast))| (operand, occur, ast),
930+
)(inp)
931931
}
932932

933933
fn ast(inp: &str) -> IResult<&str, UserInputAst> {
934-
let boolean_expr = map(
935-
separated_pair(boosted_leaf, multispace1, many1(operand_leaf)),
934+
let boolean_expr = map_res(
935+
separated_pair(occur_leaf, multispace1, many1(operand_leaf)),
936936
|(left, right)| aggregate_binary_expressions(left, right),
937937
);
938-
let whitespace_separated_leaves = map(separated_list1(multispace1, occur_leaf), |subqueries| {
939-
if subqueries.len() == 1 {
940-
let (occur_opt, ast) = subqueries.into_iter().next().unwrap();
941-
match occur_opt.unwrap_or(Occur::Should) {
942-
Occur::Must | Occur::Should => ast,
943-
Occur::MustNot => UserInputAst::Clause(vec![(Some(Occur::MustNot), ast)]),
944-
}
938+
let single_leaf = map(occur_leaf, |(occur, ast)| {
939+
if occur == Some(Occur::MustNot) {
940+
ast.unary(Occur::MustNot)
945941
} else {
946-
UserInputAst::Clause(subqueries.into_iter().collect())
942+
ast
947943
}
948944
});
949-
950-
delimited(
951-
multispace0,
952-
alt((boolean_expr, whitespace_separated_leaves)),
953-
multispace0,
954-
)(inp)
945+
delimited(multispace0, alt((boolean_expr, single_leaf)), multispace0)(inp)
955946
}
956947

957948
fn ast_infallible(inp: &str) -> JResult<&str, UserInputAst> {
@@ -1155,21 +1146,39 @@ mod test {
11551146
test_parse_query_to_ast_helper("a OR b", "(?a ?b)");
11561147
test_parse_query_to_ast_helper("a OR b AND c", "(?a ?(+b +c))");
11571148
test_parse_query_to_ast_helper("a AND b AND c", "(+a +b +c)");
1158-
test_is_parse_err("a OR b aaa", "(?a ?b *aaa)");
1159-
test_is_parse_err("a AND b aaa", "(?(+a +b) *aaa)");
1160-
test_is_parse_err("aaa a OR b ", "(*aaa ?a ?b)");
1161-
test_is_parse_err("aaa ccc a OR b ", "(*aaa *ccc ?a ?b)");
1162-
test_is_parse_err("aaa a AND b ", "(*aaa ?(+a +b))");
1163-
test_is_parse_err("aaa ccc a AND b ", "(*aaa *ccc ?(+a +b))");
1149+
test_parse_query_to_ast_helper("a OR b aaa", "(?a ?b *aaa)");
1150+
test_parse_query_to_ast_helper("a AND b aaa", "(?(+a +b) *aaa)");
1151+
test_parse_query_to_ast_helper("aaa a OR b ", "(*aaa ?a ?b)");
1152+
test_parse_query_to_ast_helper("aaa ccc a OR b ", "(*aaa *ccc ?a ?b)");
1153+
test_parse_query_to_ast_helper("aaa a AND b ", "(*aaa ?(+a +b))");
1154+
test_parse_query_to_ast_helper("aaa ccc a AND b ", "(*aaa *ccc ?(+a +b))");
11641155
}
11651156

11661157
#[test]
11671158
fn test_parse_mixed_bool_occur() {
1168-
test_is_parse_err("a OR b +aaa", "(?a ?b +aaa)");
1169-
test_is_parse_err("a AND b -aaa", "(?(+a +b) -aaa)");
1170-
test_is_parse_err("+a OR +b aaa", "(+a +b *aaa)");
1171-
test_is_parse_err("-a AND -b aaa", "(?(-a -b) *aaa)");
1172-
test_is_parse_err("-aaa +ccc -a OR b ", "(-aaa +ccc -a ?b)");
1159+
test_parse_query_to_ast_helper("+a OR +b", "(+a +b)");
1160+
1161+
test_parse_query_to_ast_helper("a AND -b", "(+a -b)");
1162+
test_parse_query_to_ast_helper("-a AND b", "(-a +b)");
1163+
test_parse_query_to_ast_helper("a AND NOT b", "(+a +(-b))");
1164+
test_parse_query_to_ast_helper("NOT a AND b", "(+(-a) +b)");
1165+
1166+
test_parse_query_to_ast_helper("a AND NOT b AND c", "(+a +(-b) +c)");
1167+
test_parse_query_to_ast_helper("a AND -b AND c", "(+a -b +c)");
1168+
1169+
test_parse_query_to_ast_helper("a OR -b", "(?a ?(-b))");
1170+
test_parse_query_to_ast_helper("-a OR b", "(?(-a) ?b)");
1171+
test_parse_query_to_ast_helper("a OR NOT b", "(?a ?(-b))");
1172+
test_parse_query_to_ast_helper("NOT a OR b", "(?(-a) ?b)");
1173+
1174+
test_parse_query_to_ast_helper("a OR NOT b OR c", "(?a ?(-b) ?c)");
1175+
test_parse_query_to_ast_helper("a OR -b OR c", "(?a ?(-b) ?c)");
1176+
1177+
test_parse_query_to_ast_helper("a OR b +aaa", "(?a ?b +aaa)");
1178+
test_parse_query_to_ast_helper("a AND b -aaa", "(?(+a +b) -aaa)");
1179+
test_parse_query_to_ast_helper("+a OR +b aaa", "(+a +b *aaa)");
1180+
test_parse_query_to_ast_helper("-a AND -b aaa", "(?(-a -b) *aaa)");
1181+
test_parse_query_to_ast_helper("-aaa +ccc -a OR b ", "(-aaa +ccc ?(-a) ?b)");
11731182
}
11741183

11751184
#[test]

0 commit comments

Comments
 (0)