Skip to content

Commit 5a618c1

Browse files
committed
Code clean up and more tests
1 parent ae7ea5c commit 5a618c1

File tree

3 files changed

+157
-55
lines changed

3 files changed

+157
-55
lines changed

columnar/src/dynamic_column.rs

-13
Original file line numberDiff line numberDiff line change
@@ -83,19 +83,6 @@ impl DynamicColumn {
8383
}
8484
}
8585

86-
pub fn num_docs(&self) -> u32 {
87-
match self {
88-
DynamicColumn::Bool(c) => c.num_docs(),
89-
DynamicColumn::I64(c) => c.num_docs(),
90-
DynamicColumn::U64(c) => c.num_docs(),
91-
DynamicColumn::F64(c) => c.num_docs(),
92-
DynamicColumn::IpAddr(c) => c.num_docs(),
93-
DynamicColumn::DateTime(c) => c.num_docs(),
94-
DynamicColumn::Bytes(c) => c.ords().num_docs(),
95-
DynamicColumn::Str(c) => c.ords().num_docs(),
96-
}
97-
}
98-
9986
pub fn coerce_numerical(self, target_numerical_type: NumericalType) -> Option<Self> {
10087
match target_numerical_type {
10188
NumericalType::I64 => self.coerce_to_i64(),

src/fastfield/readers.rs

+43
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,8 @@ impl FastFieldReaders {
354354

355355
#[cfg(test)]
356356
mod tests {
357+
use columnar::ColumnType;
358+
357359
use crate::schema::{JsonObjectOptions, Schema, FAST};
358360
use crate::{Document, Index};
359361

@@ -433,4 +435,45 @@ mod tests {
433435
Some("_dyna\u{1}notinschema\u{1}attr\u{1}color".to_string())
434436
);
435437
}
438+
439+
#[test]
440+
fn test_fast_field_reader_dynamic_column_handles() {
441+
let mut schema_builder = Schema::builder();
442+
let id = schema_builder.add_u64_field("id", FAST);
443+
let json = schema_builder.add_json_field("json", FAST);
444+
let schema = schema_builder.build();
445+
let index = Index::create_in_ram(schema);
446+
let mut index_writer = index.writer_for_tests().unwrap();
447+
index_writer
448+
.add_document(doc!(id=> 1u64, json => json!({"foo": 42})))
449+
.unwrap();
450+
index_writer
451+
.add_document(doc!(id=> 2u64, json => json!({"foo": true})))
452+
.unwrap();
453+
index_writer
454+
.add_document(doc!(id=> 3u64, json => json!({"foo": "bar"})))
455+
.unwrap();
456+
index_writer.commit().unwrap();
457+
let reader = index.reader().unwrap();
458+
let searcher = reader.searcher();
459+
let reader = searcher.segment_reader(0u32);
460+
let fast_fields = reader.fast_fields();
461+
let id_columns = fast_fields.dynamic_column_handles("id").unwrap();
462+
assert_eq!(id_columns.len(), 1);
463+
assert_eq!(id_columns.first().unwrap().column_type(), ColumnType::U64);
464+
465+
let foo_columns = fast_fields.dynamic_column_handles("json.foo").unwrap();
466+
assert_eq!(foo_columns.len(), 3);
467+
assert!(foo_columns
468+
.iter()
469+
.any(|column| column.column_type() == ColumnType::I64));
470+
assert!(foo_columns
471+
.iter()
472+
.any(|column| column.column_type() == ColumnType::Bool));
473+
assert!(foo_columns
474+
.iter()
475+
.any(|column| column.column_type() == ColumnType::Str));
476+
477+
println!("*** {:?}", fast_fields.columnar().list_columns());
478+
}
436479
}

src/query/exist_query.rs

+114-42
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,19 @@ use crate::query::explanation::does_not_match;
99
use crate::query::{EnableScoring, Explanation, Query, Scorer, Weight};
1010
use crate::{DocId, Score, TantivyError};
1111

12-
/// Query that matches all of the documents.
12+
/// Query that matches all documents with a non-null value in the specified field.
1313
///
14-
/// All of the document get the score 1.0.
14+
/// All of the matched documents get the score 1.0.
1515
#[derive(Clone, Debug)]
1616
pub struct ExistsQuery {
1717
field_name: String,
1818
}
1919

2020
impl ExistsQuery {
21-
/// Creates a new `RangeQuery` from bounded start and end terms.
21+
#![allow(dead_code)]
22+
/// Creates a new `ExistQuery` from the given field.
2223
///
23-
/// If the value type is not correct, something may go terribly wrong when
24-
/// the `Weight` object is created.
24+
/// This query mathces all documents with at least one non-null value in the specified field.
2525
pub fn new_exists_query(field: String) -> ExistsQuery {
2626
ExistsQuery { field_name: field }
2727
}
@@ -69,9 +69,9 @@ impl Weight for ExistsWeight {
6969
}
7070
if !non_empty_columns.is_empty() {
7171
let docset = ExistsDocSet::new(non_empty_columns, reader.max_doc());
72-
return Ok(Box::new(ConstScorer::new(docset, boost)));
72+
Ok(Box::new(ConstScorer::new(docset, boost)))
7373
} else {
74-
return Ok(Box::new(EmptyScorer));
74+
Ok(Box::new(EmptyScorer))
7575
}
7676
}
7777

@@ -102,20 +102,18 @@ impl ExistsDocSet {
102102
}
103103

104104
fn find_next(&mut self) -> DocId {
105-
// TODO: can this be optimized?
106105
while self.doc < self.max_doc {
107106
if self
108107
.columns
109108
.iter()
110-
.find(|col| col.column_index().has_value(self.doc))
111-
.is_some()
109+
.any(|col| col.column_index().has_value(self.doc))
112110
{
113111
return self.doc;
114112
}
115113
self.doc += 1;
116114
}
117115
self.doc = TERMINATED;
118-
return TERMINATED;
116+
TERMINATED
119117
}
120118
}
121119

@@ -126,7 +124,7 @@ impl DocSet for ExistsDocSet {
126124
}
127125

128126
fn size_hint(&self) -> u32 {
129-
0 // heuristic possible by checking number of hits when fetching a block
127+
0
130128
}
131129

132130
fn doc(&self) -> DocId {
@@ -136,10 +134,15 @@ impl DocSet for ExistsDocSet {
136134

137135
#[cfg(test)]
138136
mod tests {
137+
use std::net::Ipv6Addr;
138+
139+
use common::DateTime;
140+
use time::OffsetDateTime;
141+
139142
use crate::collector::Count;
140143
use crate::query::exist_query::ExistsQuery;
141-
use crate::schema::{Schema, FAST, INDEXED, STRING, TEXT};
142-
use crate::{doc, Index};
144+
use crate::schema::{Facet, FacetOptions, Schema, FAST, INDEXED, STRING, TEXT};
145+
use crate::{doc, Index, Searcher};
143146

144147
#[test]
145148
fn test_exists_query_simple() -> crate::Result<()> {
@@ -170,25 +173,11 @@ mod tests {
170173
let reader = index.reader()?;
171174
let searcher = reader.searcher();
172175

173-
let alldocs = ExistsQuery::new_exists_query("all".to_string());
174-
let count = searcher.search(&alldocs, &Count)?;
175-
assert_eq!(count, 100);
176-
177-
let even_docs = ExistsQuery::new_exists_query("even".to_string());
178-
let count = searcher.search(&even_docs, &Count)?;
179-
assert_eq!(count, 50);
180-
181-
let odd_docs = ExistsQuery::new_exists_query("odd".to_string());
182-
let count = searcher.search(&odd_docs, &Count)?;
183-
assert_eq!(count, 50);
184-
185-
let multi_docs = ExistsQuery::new_exists_query("multi".to_string());
186-
let count = searcher.search(&multi_docs, &Count)?;
187-
assert_eq!(count, 10);
188-
189-
let never_docs = ExistsQuery::new_exists_query("never".to_string());
190-
let count = searcher.search(&never_docs, &Count)?;
191-
assert_eq!(count, 0);
176+
assert_eq!(count_existing_fields(&searcher, "all")?, 100);
177+
assert_eq!(count_existing_fields(&searcher, "odd")?, 50);
178+
assert_eq!(count_existing_fields(&searcher, "even")?, 50);
179+
assert_eq!(count_existing_fields(&searcher, "multi")?, 10);
180+
assert_eq!(count_existing_fields(&searcher, "never")?, 0);
192181

193182
Ok(())
194183
}
@@ -215,18 +204,101 @@ mod tests {
215204
let reader = index.reader()?;
216205
let searcher = reader.searcher();
217206

218-
let alldocs = ExistsQuery::new_exists_query("json.all".to_string());
219-
let count = searcher.search(&alldocs, &Count)?;
220-
assert_eq!(count, 100);
207+
assert_eq!(count_existing_fields(&searcher, "json.all")?, 100);
208+
assert_eq!(count_existing_fields(&searcher, "json.even")?, 50);
209+
assert_eq!(count_existing_fields(&searcher, "json.odd")?, 50);
210+
211+
Ok(())
212+
}
213+
214+
#[test]
215+
fn test_exists_query_misc_supported_types() -> crate::Result<()> {
216+
let mut schema_builder = Schema::builder();
217+
let bool = schema_builder.add_bool_field("bool", FAST);
218+
let bytes = schema_builder.add_bytes_field("bytes", FAST);
219+
let date = schema_builder.add_date_field("date", FAST);
220+
let f64 = schema_builder.add_f64_field("f64", FAST);
221+
let ip_addr = schema_builder.add_ip_addr_field("ip_addr", FAST);
222+
let facet = schema_builder.add_facet_field("facet", FacetOptions::default());
223+
let schema = schema_builder.build();
224+
225+
let index = Index::create_in_ram(schema);
226+
{
227+
let mut index_writer = index.writer_for_tests()?;
228+
let now = OffsetDateTime::now_utc().unix_timestamp();
229+
for i in 0u8..100u8 {
230+
if i % 2 == 0 {
231+
let date_val = DateTime::from_utc(OffsetDateTime::from_unix_timestamp(
232+
now + i as i64 * 100,
233+
)?);
234+
index_writer.add_document(
235+
doc!(bool => i % 3 == 0, bytes => vec![i, i + 1, i + 2], date => date_val),
236+
)?;
237+
} else {
238+
let ip_addr_v6 = Ipv6Addr::new(0, 0, 0, 0, 0, 0xffff, 0xc00a, i.into());
239+
index_writer
240+
.add_document(doc!(f64 => i as f64 * 0.5, ip_addr => ip_addr_v6, facet => Facet::from("/facet/foo"), facet => Facet::from("/facet/bar")))?;
241+
}
242+
}
243+
index_writer.commit()?;
244+
}
245+
let reader = index.reader()?;
246+
let searcher = reader.searcher();
247+
248+
assert_eq!(count_existing_fields(&searcher, "bool")?, 50);
249+
assert_eq!(count_existing_fields(&searcher, "bytes")?, 50);
250+
assert_eq!(count_existing_fields(&searcher, "date")?, 50);
251+
assert_eq!(count_existing_fields(&searcher, "f64")?, 50);
252+
assert_eq!(count_existing_fields(&searcher, "ip_addr")?, 50);
253+
assert_eq!(count_existing_fields(&searcher, "facet")?, 50);
254+
255+
Ok(())
256+
}
257+
258+
#[test]
259+
fn test_exists_query_unsupported_types() -> crate::Result<()> {
260+
let mut schema_builder = Schema::builder();
261+
let not_fast = schema_builder.add_text_field("not_fast", TEXT);
262+
let schema = schema_builder.build();
263+
264+
let index = Index::create_in_ram(schema);
265+
{
266+
let mut index_writer = index.writer_for_tests()?;
267+
index_writer.add_document(doc!(
268+
not_fast => "slow",
269+
))?;
270+
index_writer.commit()?;
271+
}
272+
let reader = index.reader()?;
273+
let searcher = reader.searcher();
221274

222-
let even_docs = ExistsQuery::new_exists_query("json.even".to_string());
223-
let count = searcher.search(&even_docs, &Count)?;
224-
assert_eq!(count, 50);
275+
assert_eq!(
276+
searcher
277+
.search(
278+
&ExistsQuery::new_exists_query("not_fast".to_string()),
279+
&Count
280+
)
281+
.unwrap_err()
282+
.to_string(),
283+
"Schema error: 'Field not_fast is not a fast field.'"
284+
);
225285

226-
let odd_docs = ExistsQuery::new_exists_query("json.odd".to_string());
227-
let count = searcher.search(&odd_docs, &Count)?;
228-
assert_eq!(count, 50);
286+
assert_eq!(
287+
searcher
288+
.search(
289+
&ExistsQuery::new_exists_query("does_not_exists".to_string()),
290+
&Count
291+
)
292+
.unwrap_err()
293+
.to_string(),
294+
"The field does not exist: 'does_not_exists'"
295+
);
229296

230297
Ok(())
231298
}
299+
300+
fn count_existing_fields(searcher: &Searcher, field: &str) -> crate::Result<usize> {
301+
let query = ExistsQuery::new_exists_query(field.to_string());
302+
searcher.search(&query, &Count)
303+
}
232304
}

0 commit comments

Comments
 (0)