Skip to content

Commit 4c58b00

Browse files
authored
allow slop in both directions (#2020)
* allow slop in both directions allow slop in both directions so "big wolf"~3 can also match "wolf big" This also fixes #1934, when the docsets were reordered by size and didn't match the terms. * remove count * add test for repeating tokens, unduplicate tests
1 parent 85df322 commit 4c58b00

File tree

2 files changed

+138
-199
lines changed

2 files changed

+138
-199
lines changed

src/query/phrase_query/mod.rs

+39-69
Original file line numberDiff line numberDiff line change
@@ -160,22 +160,7 @@ pub mod tests {
160160
#[test]
161161
pub fn test_phrase_score() -> crate::Result<()> {
162162
let index = create_index(&["a b c", "a b c a b"])?;
163-
let schema = index.schema();
164-
let text_field = schema.get_field("text").unwrap();
165-
let searcher = index.reader()?.searcher();
166-
let test_query = |texts: Vec<&str>| {
167-
let terms: Vec<Term> = texts
168-
.iter()
169-
.map(|text| Term::from_field_text(text_field, text))
170-
.collect();
171-
let phrase_query = PhraseQuery::new(terms);
172-
searcher
173-
.search(&phrase_query, &TEST_COLLECTOR_WITH_SCORE)
174-
.expect("search should succeed")
175-
.scores()
176-
.to_vec()
177-
};
178-
let scores = test_query(vec!["a", "b"]);
163+
let scores = test_query(0, &index, vec!["a", "b"]);
179164
assert_nearly_equals!(scores[0], 0.40618482);
180165
assert_nearly_equals!(scores[1], 0.46844664);
181166
Ok(())
@@ -185,48 +170,48 @@ pub mod tests {
185170
#[test]
186171
pub fn test_phrase_score_with_slop() -> crate::Result<()> {
187172
let index = create_index(&["a c b", "a b c a b"])?;
188-
let schema = index.schema();
189-
let text_field = schema.get_field("text").unwrap();
190-
let searcher = index.reader().unwrap().searcher();
191-
let test_query = |texts: Vec<&str>| {
192-
let terms: Vec<Term> = texts
193-
.iter()
194-
.map(|text| Term::from_field_text(text_field, text))
195-
.collect();
196-
let mut phrase_query = PhraseQuery::new(terms);
197-
phrase_query.set_slop(1);
198-
searcher
199-
.search(&phrase_query, &TEST_COLLECTOR_WITH_SCORE)
200-
.expect("search should succeed")
201-
.scores()
202-
.to_vec()
203-
};
204-
let scores = test_query(vec!["a", "b"]);
173+
let scores = test_query(1, &index, vec!["a", "b"]);
205174
assert_nearly_equals!(scores[0], 0.40618482);
206175
assert_nearly_equals!(scores[1], 0.46844664);
207176
Ok(())
208177
}
209178

179+
#[test]
180+
pub fn test_phrase_score_with_slop_bug() -> crate::Result<()> {
181+
let index = create_index(&["asdf asdf Captain Subject Wendy", "Captain"])?;
182+
let scores = test_query(1, &index, vec!["captain", "wendy"]);
183+
assert_eq!(scores.len(), 1);
184+
Ok(())
185+
}
186+
187+
fn test_query(slop: u32, index: &Index, texts: Vec<&str>) -> Vec<f32> {
188+
let text_field = index.schema().get_field("text").unwrap();
189+
let searcher = index.reader().unwrap().searcher();
190+
let terms: Vec<Term> = texts
191+
.iter()
192+
.map(|text| Term::from_field_text(text_field, text))
193+
.collect();
194+
let mut phrase_query = PhraseQuery::new(terms);
195+
phrase_query.set_slop(slop);
196+
searcher
197+
.search(&phrase_query, &TEST_COLLECTOR_WITH_SCORE)
198+
.expect("search should succeed")
199+
.scores()
200+
.to_vec()
201+
}
202+
203+
#[test]
204+
pub fn test_phrase_score_with_slop_repeating() -> crate::Result<()> {
205+
let index = create_index(&["wendy subject subject captain", "Captain"])?;
206+
let scores = test_query(1, &index, vec!["wendy", "subject", "captain"]);
207+
assert_eq!(scores.len(), 1);
208+
Ok(())
209+
}
210+
210211
#[test]
211212
pub fn test_phrase_score_with_slop_size() -> crate::Result<()> {
212213
let index = create_index(&["a b e c", "a e e e c", "a e e e e c"])?;
213-
let schema = index.schema();
214-
let text_field = schema.get_field("text").unwrap();
215-
let searcher = index.reader().unwrap().searcher();
216-
let test_query = |texts: Vec<&str>| {
217-
let terms: Vec<Term> = texts
218-
.iter()
219-
.map(|text| Term::from_field_text(text_field, text))
220-
.collect();
221-
let mut phrase_query = PhraseQuery::new(terms);
222-
phrase_query.set_slop(3);
223-
searcher
224-
.search(&phrase_query, &TEST_COLLECTOR_WITH_SCORE)
225-
.expect("search should succeed")
226-
.scores()
227-
.to_vec()
228-
};
229-
let scores = test_query(vec!["a", "c"]);
214+
let scores = test_query(3, &index, vec!["a", "c"]);
230215
assert_nearly_equals!(scores[0], 0.29086056);
231216
assert_nearly_equals!(scores[1], 0.26706287);
232217
Ok(())
@@ -237,31 +222,16 @@ pub mod tests {
237222
let index = create_index(&[
238223
"a e b e c",
239224
"a e e e e e b e e e e c",
240-
"a c b",
225+
"a c b", // also matches
241226
"a c e b e",
242227
"a e c b",
243228
"a e b c",
244229
])?;
245-
let schema = index.schema();
246-
let text_field = schema.get_field("text").unwrap();
247-
let searcher = index.reader().unwrap().searcher();
248-
let test_query = |texts: Vec<&str>| {
249-
let terms: Vec<Term> = texts
250-
.iter()
251-
.map(|text| Term::from_field_text(text_field, text))
252-
.collect();
253-
let mut phrase_query = PhraseQuery::new(terms);
254-
phrase_query.set_slop(3);
255-
searcher
256-
.search(&phrase_query, &TEST_COLLECTOR_WITH_SCORE)
257-
.expect("search should succeed")
258-
.scores()
259-
.to_vec()
260-
};
261-
let scores = test_query(vec!["a", "b", "c"]);
230+
let scores = test_query(3, &index, vec!["a", "b", "c"]);
262231
// The first and last matches.
263232
assert_nearly_equals!(scores[0], 0.23091172);
264-
assert_nearly_equals!(scores[1], 0.25024384);
233+
assert_nearly_equals!(scores[1], 0.27310878);
234+
assert_nearly_equals!(scores[3], 0.25024384);
265235
Ok(())
266236
}
267237

0 commit comments

Comments
 (0)