Skip to content

Commit bb3eec2

Browse files
committed
support escaped dot, add agg test (#2250)
add agg test for nested JSON allow escaping of dot
1 parent 8baae1b commit bb3eec2

File tree

4 files changed

+75
-1
lines changed

4 files changed

+75
-1
lines changed

src/aggregation/agg_tests.rs

+59
Original file line numberDiff line numberDiff line change
@@ -624,6 +624,65 @@ fn test_aggregation_on_json_object() {
624624
);
625625
}
626626

627+
#[test]
628+
fn test_aggregation_on_nested_json_object() {
629+
let mut schema_builder = Schema::builder();
630+
let json = schema_builder.add_json_field("json.blub", FAST);
631+
let schema = schema_builder.build();
632+
let index = Index::create_in_ram(schema);
633+
let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
634+
index_writer
635+
.add_document(doc!(json => json!({"color.dot": "red", "color": {"nested":"red"} })))
636+
.unwrap();
637+
index_writer
638+
.add_document(doc!(json => json!({"color.dot": "blue", "color": {"nested":"blue"} })))
639+
.unwrap();
640+
index_writer.commit().unwrap();
641+
let reader = index.reader().unwrap();
642+
let searcher = reader.searcher();
643+
644+
let agg: Aggregations = serde_json::from_value(json!({
645+
"jsonagg1": {
646+
"terms": {
647+
"field": "json\\.blub.color\\.dot",
648+
}
649+
},
650+
"jsonagg2": {
651+
"terms": {
652+
"field": "json\\.blub.color.nested",
653+
}
654+
}
655+
656+
}))
657+
.unwrap();
658+
659+
let aggregation_collector = get_collector(agg);
660+
let aggregation_results = searcher.search(&AllQuery, &aggregation_collector).unwrap();
661+
let aggregation_res_json = serde_json::to_value(aggregation_results).unwrap();
662+
assert_eq!(
663+
&aggregation_res_json,
664+
&serde_json::json!({
665+
"jsonagg1": {
666+
"buckets": [
667+
{"doc_count": 1, "key": "blue"},
668+
{"doc_count": 1, "key": "red"}
669+
],
670+
"doc_count_error_upper_bound": 0,
671+
"sum_other_doc_count": 0
672+
},
673+
"jsonagg2": {
674+
"buckets": [
675+
{"doc_count": 1, "key": "blue"},
676+
{"doc_count": 1, "key": "red"}
677+
],
678+
"doc_count_error_upper_bound": 0,
679+
"sum_other_doc_count": 0
680+
}
681+
682+
})
683+
);
684+
}
685+
627686
#[test]
628687
fn test_aggregation_on_json_object_empty_columns() {
629688
let mut schema_builder = Schema::builder();

src/core/json_utils.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ pub struct JsonTermWriter<'a> {
320320
/// In other words,
321321
/// - `k8s.node` ends up as `["k8s", "node"]`.
322322
/// - `k8s\.node` ends up as `["k8s.node"]`.
323-
fn split_json_path(json_path: &str) -> Vec<String> {
323+
pub fn split_json_path(json_path: &str) -> Vec<String> {
324324
let mut escaped_state: bool = false;
325325
let mut json_path_segments = Vec::new();
326326
let mut buffer = String::new();

src/fastfield/mod.rs

+7
Original file line numberDiff line numberDiff line change
@@ -1288,11 +1288,18 @@ mod tests {
12881288
index_writer.commit().unwrap();
12891289
let searcher = index.reader().unwrap().searcher();
12901290
let fast_field_reader = searcher.segment_reader(0u32).fast_fields();
1291+
// Supported for now, maybe dropped in the future.
12911292
let column = fast_field_reader
12921293
.column_opt::<i64>("jsonfield.attr.age")
12931294
.unwrap()
12941295
.unwrap();
12951296
let vals: Vec<i64> = column.values_for_doc(0u32).collect();
12961297
assert_eq!(&vals, &[33]);
1298+
let column = fast_field_reader
1299+
.column_opt::<i64>("jsonfield\\.attr.age")
1300+
.unwrap()
1301+
.unwrap();
1302+
let vals: Vec<i64> = column.values_for_doc(0u32).collect();
1303+
assert_eq!(&vals, &[33]);
12971304
}
12981305
}

src/schema/schema.rs

+8
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer};
88

99
use super::ip_options::IpAddrOptions;
1010
use super::*;
11+
use crate::json_utils::split_json_path;
1112
use crate::schema::bytes_options::BytesOptions;
1213
use crate::TantivyError;
1314

@@ -328,12 +329,19 @@ impl Schema {
328329
if let Some(field) = self.0.fields_map.get(full_path) {
329330
return Some((*field, ""));
330331
}
332+
331333
let mut splitting_period_pos: Vec<usize> = locate_splitting_dots(full_path);
332334
while let Some(pos) = splitting_period_pos.pop() {
333335
let (prefix, suffix) = full_path.split_at(pos);
336+
334337
if let Some(field) = self.0.fields_map.get(prefix) {
335338
return Some((*field, &suffix[1..]));
336339
}
340+
// JSON path may contain a dot, for now we try both variants to find the field.
341+
let prefix = split_json_path(prefix).join(".");
342+
if let Some(field) = self.0.fields_map.get(&prefix) {
343+
return Some((*field, &suffix[1..]));
344+
}
337345
}
338346
None
339347
}

0 commit comments

Comments
 (0)