Skip to content

Commit 2c50b02

Browse files
PSeitzfulmicoton
andauthored
Fix max bucket limit in histogram (#1703)
* Fix max bucket limit in histogram The max bucket limit in histogram was broken, since some code introduced temporary filtering of buckets, which then resulted into an incorrect increment on the bucket count. The provided solution covers more scenarios, but there are still some scenarios unhandled (See #1702). * Apply suggestions from code review Co-authored-by: Paul Masurel <[email protected]> Co-authored-by: Paul Masurel <[email protected]>
1 parent 509adab commit 2c50b02

File tree

1 file changed

+58
-5
lines changed

1 file changed

+58
-5
lines changed

src/aggregation/bucket/histogram/histogram.rs

+58-5
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ pub struct SegmentHistogramCollector {
206206
field_type: Type,
207207
interval: f64,
208208
offset: f64,
209+
min_doc_count: u64,
209210
first_bucket_num: i64,
210211
bounds: HistogramBounds,
211212
}
@@ -215,6 +216,30 @@ impl SegmentHistogramCollector {
215216
self,
216217
agg_with_accessor: &BucketAggregationWithAccessor,
217218
) -> crate::Result<IntermediateBucketResult> {
219+
// Compute the number of buckets to validate against max num buckets
220+
// Note: We use min_doc_count here, but it's only an lowerbound here, since were are on the
221+
// intermediate level and after merging the number of documents of a bucket could exceed
222+
// `min_doc_count`.
223+
{
224+
let cut_off_buckets_front = self
225+
.buckets
226+
.iter()
227+
.take_while(|bucket| bucket.doc_count <= self.min_doc_count)
228+
.count();
229+
let cut_off_buckets_back = self.buckets[cut_off_buckets_front..]
230+
.iter()
231+
.rev()
232+
.take_while(|bucket| bucket.doc_count <= self.min_doc_count)
233+
.count();
234+
let estimate_num_buckets =
235+
self.buckets.len() - cut_off_buckets_front - cut_off_buckets_back;
236+
237+
agg_with_accessor
238+
.bucket_count
239+
.add_count(estimate_num_buckets as u32);
240+
agg_with_accessor.bucket_count.validate_bucket_count()?;
241+
}
242+
218243
let mut buckets = Vec::with_capacity(
219244
self.buckets
220245
.iter()
@@ -251,11 +276,6 @@ impl SegmentHistogramCollector {
251276
);
252277
};
253278

254-
agg_with_accessor
255-
.bucket_count
256-
.add_count(buckets.len() as u32);
257-
agg_with_accessor.bucket_count.validate_bucket_count()?;
258-
259279
Ok(IntermediateBucketResult::Histogram { buckets })
260280
}
261281

@@ -308,6 +328,7 @@ impl SegmentHistogramCollector {
308328
first_bucket_num,
309329
bounds,
310330
sub_aggregations,
331+
min_doc_count: req.min_doc_count(),
311332
})
312333
}
313334

@@ -1521,4 +1542,36 @@ mod tests {
15211542

15221543
Ok(())
15231544
}
1545+
1546+
#[test]
1547+
fn histogram_test_max_buckets_segments() -> crate::Result<()> {
1548+
let values = vec![0.0, 70000.0];
1549+
1550+
let index = get_test_index_from_values(true, &values)?;
1551+
1552+
let agg_req: Aggregations = vec![(
1553+
"my_interval".to_string(),
1554+
Aggregation::Bucket(BucketAggregation {
1555+
bucket_agg: BucketAggregationType::Histogram(HistogramAggregation {
1556+
field: "score_f64".to_string(),
1557+
interval: 1.0,
1558+
..Default::default()
1559+
}),
1560+
sub_aggregation: Default::default(),
1561+
}),
1562+
)]
1563+
.into_iter()
1564+
.collect();
1565+
1566+
let res = exec_request(agg_req, &index);
1567+
1568+
assert_eq!(
1569+
res.unwrap_err().to_string(),
1570+
"An invalid argument was passed: 'Aborting aggregation because too many buckets were \
1571+
created'"
1572+
.to_string()
1573+
);
1574+
1575+
Ok(())
1576+
}
15241577
}

0 commit comments

Comments
 (0)