Skip to content

Commit 557af61

Browse files
committed
Expose phrase-prefix queries via the built-in query parser
This proposes the less-than-imaginative syntax `field:"phrase ter"*` to perform a phrase prefix query against `field` using `phrase` and `ter` as the terms. The aim of this is to make this type of query more discoverable and simplify manual testing. I did consider exposing the `max_expansions` parameter similar to how slop is handled, but I think that this is rather something that should be configured via the querser parser (similar to `set_field_boost` and `set_field_fuzzy`) as choosing it requires rather intimiate knowledge of the backing index.
1 parent 62709b8 commit 557af61

File tree

4 files changed

+99
-30
lines changed

4 files changed

+99
-30
lines changed

query-grammar/src/query_grammar.rs

+26-9
Original file line numberDiff line numberDiff line change
@@ -162,14 +162,22 @@ fn term_val<'a>() -> impl Parser<&'a str, Output = (Delimiter, String)> {
162162
}
163163

164164
fn term_query<'a>() -> impl Parser<&'a str, Output = UserInputLiteral> {
165-
(field_name(), term_val(), slop_val()).map(|(field_name, (delimiter, phrase), slop)| {
166-
UserInputLiteral {
165+
(field_name(), term_val(), slop_or_prefix_val()).map(
166+
|(field_name, (delimiter, phrase), (slop, prefix))| UserInputLiteral {
167167
field_name: Some(field_name),
168168
phrase,
169169
delimiter,
170170
slop,
171-
}
172-
})
171+
prefix,
172+
},
173+
)
174+
}
175+
176+
fn slop_or_prefix_val<'a>() -> impl Parser<&'a str, Output = (u32, bool)> {
177+
let prefix_val = char('*').map(|_ast| (0, true));
178+
let slop_val = slop_val().map(|slop| (slop, false));
179+
180+
prefix_val.or(slop_val)
173181
}
174182

175183
fn slop_val<'a>() -> impl Parser<&'a str, Output = u32> {
@@ -186,11 +194,14 @@ fn slop_val<'a>() -> impl Parser<&'a str, Output = u32> {
186194

187195
fn literal<'a>() -> impl Parser<&'a str, Output = UserInputLeaf> {
188196
let term_default_field =
189-
(term_val(), slop_val()).map(|((delimiter, phrase), slop)| UserInputLiteral {
190-
field_name: None,
191-
phrase,
192-
delimiter,
193-
slop,
197+
(term_val(), slop_or_prefix_val()).map(|((delimiter, phrase), (slop, prefix))| {
198+
UserInputLiteral {
199+
field_name: None,
200+
phrase,
201+
delimiter,
202+
slop,
203+
prefix,
204+
}
194205
});
195206

196207
attempt(term_query())
@@ -872,6 +883,12 @@ mod test {
872883
test_parse_query_to_ast_helper("\"a b\"~300^2", "(\"a b\"~300)^2");
873884
}
874885

886+
#[test]
887+
fn test_phrase_prefix() {
888+
test_parse_query_to_ast_helper("\"a b\"*", "\"a b\"*");
889+
test_parse_query_to_ast_helper("foo:\"a b\"*", "\"foo\":\"a b\"*");
890+
}
891+
875892
#[test]
876893
fn test_not_queries_are_consistent() {
877894
test_parse_query_to_ast_helper("tata -toto", "(*tata -toto)");

query-grammar/src/user_input_ast.rs

+3
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ pub struct UserInputLiteral {
6666
pub phrase: String,
6767
pub delimiter: Delimiter,
6868
pub slop: u32,
69+
pub prefix: bool,
6970
}
7071

7172
impl fmt::Debug for UserInputLiteral {
@@ -86,6 +87,8 @@ impl fmt::Debug for UserInputLiteral {
8687
}
8788
if self.slop > 0 {
8889
write!(formatter, "~{}", self.slop)?;
90+
} else if self.prefix {
91+
write!(formatter, "*")?;
8992
}
9093
Ok(())
9194
}

src/query/query_parser/logical_ast.rs

+12-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,11 @@ use crate::Score;
88
#[derive(Clone)]
99
pub enum LogicalLiteral {
1010
Term(Term),
11-
Phrase(Vec<(usize, Term)>, u32),
11+
Phrase {
12+
terms: Vec<(usize, Term)>,
13+
slop: u32,
14+
prefix: bool,
15+
},
1216
Range {
1317
field: String,
1418
value_type: Type,
@@ -79,10 +83,16 @@ impl fmt::Debug for LogicalLiteral {
7983
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
8084
match *self {
8185
LogicalLiteral::Term(ref term) => write!(formatter, "{term:?}"),
82-
LogicalLiteral::Phrase(ref terms, slop) => {
86+
LogicalLiteral::Phrase {
87+
ref terms,
88+
slop,
89+
prefix,
90+
} => {
8391
write!(formatter, "\"{terms:?}\"")?;
8492
if slop > 0 {
8593
write!(formatter, "~{slop:?}")
94+
} else if prefix {
95+
write!(formatter, "*")
8696
} else {
8797
Ok(())
8898
}

src/query/query_parser/query_parser.rs

+58-19
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,8 @@ use crate::core::json_utils::{
1515
use crate::core::Index;
1616
use crate::query::range_query::{is_type_valid_for_fastfield_range_query, RangeQuery};
1717
use crate::query::{
18-
AllQuery,
19-
BooleanQuery,
20-
BoostQuery,
21-
EmptyQuery,
22-
FuzzyTermQuery,
23-
Occur,
24-
PhraseQuery,
25-
Query,
26-
// RangeQuery,
27-
TermQuery,
28-
TermSetQuery,
18+
AllQuery, BooleanQuery, BoostQuery, EmptyQuery, FuzzyTermQuery, Occur, PhrasePrefixQuery,
19+
PhraseQuery, Query, TermQuery, TermSetQuery,
2920
};
3021
use crate::schema::{
3122
Facet, FacetParseError, Field, FieldType, IndexRecordOption, IntoIpv6Addr, JsonObjectOptions,
@@ -194,6 +185,10 @@ fn trim_ast(logical_ast: LogicalAst) -> Option<LogicalAst> {
194185
///
195186
/// Phrase terms support the `~` slop operator which allows to set the phrase's matching
196187
/// distance in words. `"big wolf"~1` will return documents containing the phrase `"big bad wolf"`.
188+
///
189+
/// Phrase terms also support the `*` prefix operator which switches the phrase's matching
190+
/// to consider all documents which contain the last term as a prefix, e.g. `"big bad wo"*` will
191+
/// match `"big bad wolf"`.
197192
#[derive(Clone)]
198193
pub struct QueryParser {
199194
schema: Schema,
@@ -446,6 +441,7 @@ impl QueryParser {
446441
json_path: &str,
447442
phrase: &str,
448443
slop: u32,
444+
prefix: bool,
449445
) -> Result<Vec<LogicalLiteral>, QueryParserError> {
450446
let field_entry = self.schema.get_field_entry(field);
451447
let field_type = field_entry.field_type();
@@ -503,6 +499,7 @@ impl QueryParser {
503499
field,
504500
phrase,
505501
slop,
502+
prefix,
506503
&text_analyzer,
507504
index_record_option,
508505
)?
@@ -661,9 +658,13 @@ impl QueryParser {
661658
self.compute_path_triplets_for_literal(&literal)?;
662659
let mut asts: Vec<LogicalAst> = Vec::new();
663660
for (field, json_path, phrase) in term_phrases {
664-
for ast in
665-
self.compute_logical_ast_for_leaf(field, json_path, phrase, literal.slop)?
666-
{
661+
for ast in self.compute_logical_ast_for_leaf(
662+
field,
663+
json_path,
664+
phrase,
665+
literal.slop,
666+
literal.prefix,
667+
)? {
667668
// Apply some field specific boost defined at the query parser level.
668669
let boost = self.field_boost(field);
669670
asts.push(LogicalAst::Leaf(Box::new(ast)).boost(boost));
@@ -753,9 +754,17 @@ fn convert_literal_to_query(
753754
Box::new(TermQuery::new(term, IndexRecordOption::WithFreqs))
754755
}
755756
}
756-
LogicalLiteral::Phrase(term_with_offsets, slop) => Box::new(
757-
PhraseQuery::new_with_offset_and_slop(term_with_offsets, slop),
758-
),
757+
LogicalLiteral::Phrase {
758+
terms,
759+
slop,
760+
prefix,
761+
} => {
762+
if prefix {
763+
Box::new(PhrasePrefixQuery::new_with_offset(terms))
764+
} else {
765+
Box::new(PhraseQuery::new_with_offset_and_slop(terms, slop))
766+
}
767+
}
759768
LogicalLiteral::Range {
760769
field,
761770
value_type,
@@ -774,6 +783,7 @@ fn generate_literals_for_str(
774783
field: Field,
775784
phrase: &str,
776785
slop: u32,
786+
prefix: bool,
777787
text_analyzer: &TextAnalyzer,
778788
index_record_option: IndexRecordOption,
779789
) -> Result<Option<LogicalLiteral>, QueryParserError> {
@@ -795,7 +805,11 @@ fn generate_literals_for_str(
795805
field_name.to_string(),
796806
));
797807
}
798-
Ok(Some(LogicalLiteral::Phrase(terms, slop)))
808+
Ok(Some(LogicalLiteral::Phrase {
809+
terms,
810+
slop,
811+
prefix,
812+
}))
799813
}
800814

801815
fn generate_literals_for_json_object(
@@ -841,7 +855,11 @@ fn generate_literals_for_json_object(
841855
field_name.to_string(),
842856
));
843857
}
844-
logical_literals.push(LogicalLiteral::Phrase(terms, 0));
858+
logical_literals.push(LogicalLiteral::Phrase {
859+
terms,
860+
slop: 0,
861+
prefix: false,
862+
});
845863
Ok(logical_literals)
846864
}
847865

@@ -1643,6 +1661,27 @@ mod test {
16431661
);
16441662
}
16451663

1664+
#[test]
1665+
pub fn test_phrase_prefix() {
1666+
test_parse_query_to_logical_ast_helper(
1667+
"\"big bad wo\"*",
1668+
r#"("[(0, Term(field=0, type=Str, "big")), (1, Term(field=0, type=Str, "bad")), (2, Term(field=0, type=Str, "wo"))]"* "[(0, Term(field=1, type=Str, "big")), (1, Term(field=1, type=Str, "bad")), (2, Term(field=1, type=Str, "wo"))]"*)"#,
1669+
false,
1670+
);
1671+
1672+
let query_parser = make_query_parser();
1673+
let query = query_parser.parse_query("\"big bad wo\"*").unwrap();
1674+
assert_eq!(
1675+
format!("{query:?}"),
1676+
"BooleanQuery { subqueries: [(Should, PhrasePrefixQuery { field: Field(0), \
1677+
phrase_terms: [(0, Term(field=0, type=Str, \"big\")), (1, Term(field=0, type=Str, \
1678+
\"bad\"))], prefix: (2, Term(field=0, type=Str, \"wo\")), max_expansions: 50 }), \
1679+
(Should, PhrasePrefixQuery { field: Field(1), phrase_terms: [(0, Term(field=1, \
1680+
type=Str, \"big\")), (1, Term(field=1, type=Str, \"bad\"))], prefix: (2, \
1681+
Term(field=1, type=Str, \"wo\")), max_expansions: 50 })] }"
1682+
);
1683+
}
1684+
16461685
#[test]
16471686
pub fn test_term_set_query() {
16481687
test_parse_query_to_logical_ast_helper(

0 commit comments

Comments
 (0)