Skip to content

Commit 0eafbaa

Browse files
authored
fix slop (#2031)
Fix slop by carrying slop so far for multiterms. Define slop contract in the API
1 parent d3357a8 commit 0eafbaa

File tree

3 files changed

+358
-14
lines changed

3 files changed

+358
-14
lines changed

src/query/phrase_query/mod.rs

+36
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,20 @@ pub mod tests {
184184
Ok(())
185185
}
186186

187+
#[test]
188+
pub fn test_phrase_score_with_slop_bug_2() -> crate::Result<()> {
189+
// fails
190+
let index = create_index(&["a x b x c", "a a c"])?;
191+
let scores = test_query(2, &index, vec!["a", "b", "c"]);
192+
assert_eq!(scores.len(), 1);
193+
194+
let index = create_index(&["a x b x c", "b c c"])?;
195+
let scores = test_query(2, &index, vec!["a", "b", "c"]);
196+
assert_eq!(scores.len(), 1);
197+
198+
Ok(())
199+
}
200+
187201
fn test_query(slop: u32, index: &Index, texts: Vec<&str>) -> Vec<f32> {
188202
let text_field = index.schema().get_field("text").unwrap();
189203
let searcher = index.reader().unwrap().searcher();
@@ -212,11 +226,33 @@ pub mod tests {
212226
pub fn test_phrase_score_with_slop_size() -> crate::Result<()> {
213227
let index = create_index(&["a b e c", "a e e e c", "a e e e e c"])?;
214228
let scores = test_query(3, &index, vec!["a", "c"]);
229+
assert_eq!(scores.len(), 2);
215230
assert_nearly_equals!(scores[0], 0.29086056);
216231
assert_nearly_equals!(scores[1], 0.26706287);
217232
Ok(())
218233
}
219234

235+
#[test]
236+
pub fn test_phrase_slop() -> crate::Result<()> {
237+
let index = create_index(&["a x b c"])?;
238+
let scores = test_query(1, &index, vec!["a", "b", "c"]);
239+
assert_eq!(scores.len(), 1);
240+
241+
let index = create_index(&["a x b x c"])?;
242+
let scores = test_query(1, &index, vec!["a", "b", "c"]);
243+
assert_eq!(scores.len(), 0);
244+
245+
let index = create_index(&["a b"])?;
246+
let scores = test_query(1, &index, vec!["b", "a"]);
247+
assert_eq!(scores.len(), 0);
248+
249+
let index = create_index(&["a b"])?;
250+
let scores = test_query(2, &index, vec!["b", "a"]);
251+
assert_eq!(scores.len(), 1);
252+
253+
Ok(())
254+
}
255+
220256
#[test]
221257
pub fn test_phrase_score_with_slop_ordering() -> crate::Result<()> {
222258
let index = create_index(&[

src/query/phrase_query/phrase_query.rs

+10
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,16 @@ impl PhraseQuery {
6666
/// Slop allowed for the phrase.
6767
///
6868
/// The query will match if its terms are separated by `slop` terms at most.
69+
/// The slop can be considered a budget between all terms.
70+
/// E.g. "A B C" with slop 1 allows "A X B C", "A B X C", but not "A X B X C".
71+
///
72+
/// Transposition costs 2, e.g. "A B" with slop 1 will not match "B A" but it would with slop 2
73+
/// Transposition is not a special case, in the example above A is moved 1 position and B is
74+
/// moved 1 position, so the slop is 2.
75+
///
76+
/// As a result slop works in both directions, so the order of the terms may changed as long as
77+
/// they respect the slop.
78+
///
6979
/// By default the slop is 0 meaning query terms need to be adjacent.
7080
pub fn set_slop(&mut self, value: u32) {
7181
self.slop = value;

0 commit comments

Comments
 (0)