@@ -11,7 +11,9 @@ use crate::aggregation::agg_req_with_accessor::{
11
11
use crate :: aggregation:: intermediate_agg_result:: {
12
12
IntermediateBucketResult , IntermediateTermBucketEntry , IntermediateTermBucketResult ,
13
13
} ;
14
- use crate :: aggregation:: segment_agg_result:: SegmentAggregationResultsCollector ;
14
+ use crate :: aggregation:: segment_agg_result:: {
15
+ validate_bucket_count, SegmentAggregationResultsCollector ,
16
+ } ;
15
17
use crate :: error:: DataCorruption ;
16
18
use crate :: fastfield:: MultiValuedFastFieldReader ;
17
19
use crate :: schema:: Type ;
@@ -244,19 +246,23 @@ impl TermBuckets {
244
246
& mut self ,
245
247
term_ids : & [ u64 ] ,
246
248
doc : DocId ,
247
- bucket_with_accessor : & AggregationsWithAccessor ,
249
+ bucket_with_accessor : & BucketAggregationWithAccessor ,
248
250
blueprint : & Option < SegmentAggregationResultsCollector > ,
249
251
) -> crate :: Result < ( ) > {
250
252
for & term_id in term_ids {
251
- let entry = self
252
- . entries
253
- . entry ( term_id as u32 )
254
- . or_insert_with ( || TermBucketEntry :: from_blueprint ( blueprint) ) ;
253
+ let entry = self . entries . entry ( term_id as u32 ) . or_insert_with ( || {
254
+ bucket_with_accessor
255
+ . bucket_count
256
+ . fetch_add ( 1 , std:: sync:: atomic:: Ordering :: Relaxed ) ;
257
+
258
+ TermBucketEntry :: from_blueprint ( blueprint)
259
+ } ) ;
255
260
entry. doc_count += 1 ;
256
261
if let Some ( sub_aggregations) = entry. sub_aggregations . as_mut ( ) {
257
- sub_aggregations. collect ( doc, bucket_with_accessor) ?;
262
+ sub_aggregations. collect ( doc, & bucket_with_accessor. sub_aggregation ) ?;
258
263
}
259
264
}
265
+ validate_bucket_count ( & bucket_with_accessor. bucket_count ) ?;
260
266
Ok ( ( ) )
261
267
}
262
268
@@ -441,25 +447,25 @@ impl SegmentTermCollector {
441
447
self . term_buckets . increment_bucket (
442
448
& vals1,
443
449
docs[ 0 ] ,
444
- & bucket_with_accessor. sub_aggregation ,
450
+ bucket_with_accessor,
445
451
& self . blueprint ,
446
452
) ?;
447
453
self . term_buckets . increment_bucket (
448
454
& vals2,
449
455
docs[ 1 ] ,
450
- & bucket_with_accessor. sub_aggregation ,
456
+ bucket_with_accessor,
451
457
& self . blueprint ,
452
458
) ?;
453
459
self . term_buckets . increment_bucket (
454
460
& vals3,
455
461
docs[ 2 ] ,
456
- & bucket_with_accessor. sub_aggregation ,
462
+ bucket_with_accessor,
457
463
& self . blueprint ,
458
464
) ?;
459
465
self . term_buckets . increment_bucket (
460
466
& vals4,
461
467
docs[ 3 ] ,
462
- & bucket_with_accessor. sub_aggregation ,
468
+ bucket_with_accessor,
463
469
& self . blueprint ,
464
470
) ?;
465
471
}
@@ -469,7 +475,7 @@ impl SegmentTermCollector {
469
475
self . term_buckets . increment_bucket (
470
476
& vals1,
471
477
doc,
472
- & bucket_with_accessor. sub_aggregation ,
478
+ bucket_with_accessor,
473
479
& self . blueprint ,
474
480
) ?;
475
481
}
@@ -1175,6 +1181,33 @@ mod tests {
1175
1181
Ok ( ( ) )
1176
1182
}
1177
1183
1184
+ #[ test]
1185
+ fn terms_aggregation_term_bucket_limit ( ) -> crate :: Result < ( ) > {
1186
+ let terms: Vec < String > = ( 0 ..100_000 ) . map ( |el| el. to_string ( ) ) . collect ( ) ;
1187
+ let terms_per_segment = vec ! [ terms. iter( ) . map( |el| el. as_str( ) ) . collect( ) ] ;
1188
+
1189
+ let index = get_test_index_from_terms ( true , & terms_per_segment) ?;
1190
+
1191
+ let agg_req: Aggregations = vec ! [ (
1192
+ "my_texts" . to_string( ) ,
1193
+ Aggregation :: Bucket ( BucketAggregation {
1194
+ bucket_agg: BucketAggregationType :: Terms ( TermsAggregation {
1195
+ field: "string_id" . to_string( ) ,
1196
+ min_doc_count: Some ( 0 ) ,
1197
+ ..Default :: default ( )
1198
+ } ) ,
1199
+ sub_aggregation: Default :: default ( ) ,
1200
+ } ) ,
1201
+ ) ]
1202
+ . into_iter ( )
1203
+ . collect ( ) ;
1204
+
1205
+ let res = exec_request_with_query ( agg_req, & index, None ) ;
1206
+ assert ! ( res. is_err( ) ) ;
1207
+
1208
+ Ok ( ( ) )
1209
+ }
1210
+
1178
1211
#[ test]
1179
1212
fn test_json_format ( ) -> crate :: Result < ( ) > {
1180
1213
let agg_req: Aggregations = vec ! [ (
0 commit comments