Skip to content

Commit c5c2e59

Browse files
committed
introduce optional collect_block in segmentcollector
add collect_block in segment_collector to handle groups of documents as performance optimization add collect_block for MultiCollector
1 parent 44ea731 commit c5c2e59

File tree

6 files changed

+89
-5
lines changed

6 files changed

+89
-5
lines changed

src/aggregation/bucket/term_agg.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ impl TermBuckets {
256256
});
257257
entry.doc_count += 1;
258258
if let Some(sub_aggregations) = entry.sub_aggregations.as_mut() {
259-
sub_aggregations.collect(doc, &sub_aggregation)?;
259+
sub_aggregations.collect(doc, sub_aggregation)?;
260260
}
261261
}
262262
bucket_count.validate_bucket_count()?;

src/collector/count_collector.rs

+5
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,11 @@ impl SegmentCollector for SegmentCountCollector {
7070
Ok(())
7171
}
7272

73+
fn collect_block(&mut self, docs: &[(DocId, Score)]) -> crate::Result<()> {
74+
self.count += docs.len();
75+
Ok(())
76+
}
77+
7378
fn harvest(self) -> usize {
7479
self.count
7580
}

src/collector/custom_score_top_collector.rs

+12-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ pub(crate) struct CustomScoreTopCollector<TCustomScorer, TScore = Score> {
88
}
99

1010
impl<TCustomScorer, TScore> CustomScoreTopCollector<TCustomScorer, TScore>
11-
where TScore: Clone + PartialOrd
11+
where
12+
TScore: Clone + PartialOrd,
1213
{
1314
pub(crate) fn new(
1415
custom_scorer: TCustomScorer,
@@ -96,6 +97,14 @@ where
9697
Ok(())
9798
}
9899

100+
fn collect_block(&mut self, docs: &[(DocId, Score)]) -> crate::Result<()> {
101+
for (doc, _score) in docs {
102+
let score = self.segment_scorer.score(*doc);
103+
self.segment_collector.collect(*doc, score);
104+
}
105+
Ok(())
106+
}
107+
99108
fn harvest(self) -> Vec<(TScore, DocAddress)> {
100109
self.segment_collector.harvest()
101110
}
@@ -114,7 +123,8 @@ where
114123
}
115124

116125
impl<F, TScore> CustomSegmentScorer<TScore> for F
117-
where F: 'static + FnMut(DocId) -> TScore
126+
where
127+
F: 'static + FnMut(DocId) -> TScore,
118128
{
119129
fn score(&mut self, doc: DocId) -> TScore {
120130
(self)(doc)

src/collector/mod.rs

+45-2
Original file line numberDiff line numberDiff line change
@@ -172,19 +172,33 @@ pub trait Collector: Sync + Send {
172172
) -> crate::Result<<Self::Child as SegmentCollector>::Fruit> {
173173
let mut segment_collector = self.for_segment(segment_ord as u32, reader)?;
174174

175+
let mut cache_pos = 0;
176+
let mut cache = [(0, 0.0); 64];
177+
175178
if let Some(alive_bitset) = reader.alive_bitset() {
176179
weight.for_each(reader, &mut |doc, score| {
177180
if alive_bitset.is_alive(doc) {
178-
segment_collector.collect(doc, score)?;
181+
cache[cache_pos] = (doc, score);
182+
cache_pos += 1;
183+
if cache_pos == 64 {
184+
segment_collector.collect_block(&cache)?;
185+
cache_pos = 0;
186+
}
179187
}
180188
Ok(())
181189
})?;
182190
} else {
183191
weight.for_each(reader, &mut |doc, score| {
184-
segment_collector.collect(doc, score)?;
192+
cache[cache_pos] = (doc, score);
193+
cache_pos += 1;
194+
if cache_pos == 64 {
195+
segment_collector.collect_block(&cache)?;
196+
cache_pos = 0;
197+
}
185198
Ok(())
186199
})?;
187200
}
201+
segment_collector.collect_block(&cache[..cache_pos])?;
188202
Ok(segment_collector.harvest())
189203
}
190204
}
@@ -258,6 +272,14 @@ pub trait SegmentCollector: 'static {
258272
/// The query pushes the scored document to the collector via this method.
259273
fn collect(&mut self, doc: DocId, score: Score) -> crate::Result<()>;
260274

275+
/// The query pushes the scored document to the collector via this method.
276+
fn collect_block(&mut self, docs: &[(DocId, Score)]) -> crate::Result<()> {
277+
for (doc, score) in docs {
278+
self.collect(*doc, *score)?;
279+
}
280+
Ok(())
281+
}
282+
261283
/// Extract the fruit of the collection from the `SegmentCollector`.
262284
fn harvest(self) -> Self::Fruit;
263285
}
@@ -317,6 +339,12 @@ where
317339
Ok(())
318340
}
319341

342+
fn collect_block(&mut self, docs: &[(DocId, Score)]) -> crate::Result<()> {
343+
self.0.collect_block(docs)?;
344+
self.1.collect_block(docs)?;
345+
Ok(())
346+
}
347+
320348
fn harvest(self) -> <Self as SegmentCollector>::Fruit {
321349
(self.0.harvest(), self.1.harvest())
322350
}
@@ -383,6 +411,13 @@ where
383411
Ok(())
384412
}
385413

414+
fn collect_block(&mut self, docs: &[(DocId, Score)]) -> crate::Result<()> {
415+
self.0.collect_block(docs)?;
416+
self.1.collect_block(docs)?;
417+
self.2.collect_block(docs)?;
418+
Ok(())
419+
}
420+
386421
fn harvest(self) -> <Self as SegmentCollector>::Fruit {
387422
(self.0.harvest(), self.1.harvest(), self.2.harvest())
388423
}
@@ -459,6 +494,14 @@ where
459494
Ok(())
460495
}
461496

497+
fn collect_block(&mut self, docs: &[(DocId, Score)]) -> crate::Result<()> {
498+
self.0.collect_block(docs)?;
499+
self.1.collect_block(docs)?;
500+
self.2.collect_block(docs)?;
501+
self.3.collect_block(docs)?;
502+
Ok(())
503+
}
504+
462505
fn harvest(self) -> <Self as SegmentCollector>::Fruit {
463506
(
464507
self.0.harvest(),

src/collector/multi_collector.rs

+18
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,19 @@ impl SegmentCollector for Box<dyn BoxableSegmentCollector> {
5757
Ok(())
5858
}
5959

60+
fn collect_block(&mut self, docs: &[(DocId, Score)]) -> crate::Result<()> {
61+
self.as_mut().collect_block(docs)?;
62+
Ok(())
63+
}
64+
6065
fn harvest(self) -> Box<dyn Fruit> {
6166
BoxableSegmentCollector::harvest_from_box(self)
6267
}
6368
}
6469

6570
pub trait BoxableSegmentCollector {
6671
fn collect(&mut self, doc: u32, score: Score) -> crate::Result<()>;
72+
fn collect_block(&mut self, docs: &[(DocId, Score)]) -> crate::Result<()>;
6773
fn harvest_from_box(self: Box<Self>) -> Box<dyn Fruit>;
6874
}
6975

@@ -76,6 +82,11 @@ impl<TSegmentCollector: SegmentCollector> BoxableSegmentCollector
7682
self.0.collect(doc, score)
7783
}
7884

85+
fn collect_block(&mut self, docs: &[(DocId, Score)]) -> crate::Result<()> {
86+
self.0.collect_block(docs)?;
87+
Ok(())
88+
}
89+
7990
fn harvest_from_box(self: Box<Self>) -> Box<dyn Fruit> {
8091
Box::new(self.0.harvest())
8192
}
@@ -236,6 +247,13 @@ impl SegmentCollector for MultiCollectorChild {
236247
Ok(())
237248
}
238249

250+
fn collect_block(&mut self, docs: &[(DocId, Score)]) -> crate::Result<()> {
251+
for child in &mut self.children {
252+
child.collect_block(docs)?;
253+
}
254+
Ok(())
255+
}
256+
239257
fn harvest(self) -> MultiFruit {
240258
MultiFruit {
241259
sub_fruits: self

src/collector/top_score_collector.rs

+8
Original file line numberDiff line numberDiff line change
@@ -704,6 +704,14 @@ impl SegmentCollector for TopScoreSegmentCollector {
704704
Ok(())
705705
}
706706

707+
#[inline]
708+
fn collect_block(&mut self, docs: &[(DocId, Score)]) -> crate::Result<()> {
709+
for (doc, score) in docs {
710+
self.0.collect(*doc, *score);
711+
}
712+
Ok(())
713+
}
714+
707715
fn harvest(self) -> Vec<(Score, DocAddress)> {
708716
self.0.harvest()
709717
}

0 commit comments

Comments
 (0)