@@ -105,8 +105,14 @@ impl Recorder for DocIdRecorder {
105
105
106
106
#[ inline]
107
107
fn new_doc ( & mut self , doc : DocId , arena : & mut MemoryArena ) {
108
+ let delta = if self . current_doc == u32:: MAX {
109
+ doc
110
+ } else {
111
+ doc - self . current_doc
112
+ } ;
113
+
108
114
self . current_doc = doc;
109
- self . stack . writer ( arena) . write_u32_vint ( doc ) ;
115
+ self . stack . writer ( arena) . write_u32_vint ( delta ) ;
110
116
}
111
117
112
118
#[ inline]
@@ -125,18 +131,22 @@ impl Recorder for DocIdRecorder {
125
131
let ( buffer, doc_ids) = buffer_lender. lend_all ( ) ;
126
132
self . stack . read_to_end ( arena, buffer) ;
127
133
// TODO avoid reading twice.
134
+ let mut prev_doc = 0 ;
128
135
if let Some ( doc_id_map) = doc_id_map {
129
- doc_ids. extend (
130
- VInt32Reader :: new ( & buffer[ ..] )
131
- . map ( |old_doc_id| doc_id_map. get_new_doc_id ( old_doc_id) ) ,
132
- ) ;
136
+ doc_ids. extend ( VInt32Reader :: new ( & buffer[ ..] ) . map ( |delta_doc_id| {
137
+ let old_doc_id = prev_doc + delta_doc_id;
138
+ prev_doc = old_doc_id;
139
+ doc_id_map. get_new_doc_id ( old_doc_id)
140
+ } ) ) ;
133
141
doc_ids. sort_unstable ( ) ;
134
142
135
143
for doc in doc_ids {
136
144
serializer. write_doc ( * doc, 0u32 , & [ ] [ ..] ) ;
137
145
}
138
146
} else {
139
- for doc in VInt32Reader :: new ( & buffer[ ..] ) {
147
+ for delta_doc_id in VInt32Reader :: new ( & buffer[ ..] ) {
148
+ let doc = prev_doc + delta_doc_id;
149
+ prev_doc = doc;
140
150
serializer. write_doc ( doc, 0u32 , & [ ] [ ..] ) ;
141
151
}
142
152
}
@@ -164,9 +174,14 @@ impl Recorder for TermFrequencyRecorder {
164
174
165
175
#[ inline]
166
176
fn new_doc ( & mut self , doc : DocId , arena : & mut MemoryArena ) {
177
+ let delta = if self . current_doc == u32:: MAX {
178
+ doc
179
+ } else {
180
+ doc - self . current_doc
181
+ } ;
167
182
self . term_doc_freq += 1 ;
168
183
self . current_doc = doc;
169
- self . stack . writer ( arena) . write_u32_vint ( doc ) ;
184
+ self . stack . writer ( arena) . write_u32_vint ( delta ) ;
170
185
}
171
186
172
187
#[ inline]
@@ -191,21 +206,26 @@ impl Recorder for TermFrequencyRecorder {
191
206
let buffer = buffer_lender. lend_u8 ( ) ;
192
207
self . stack . read_to_end ( arena, buffer) ;
193
208
let mut u32_it = VInt32Reader :: new ( & buffer[ ..] ) ;
209
+ let mut prev_doc = 0 ;
194
210
if let Some ( doc_id_map) = doc_id_map {
195
211
let mut doc_id_and_tf = vec ! [ ] ;
196
- while let Some ( old_doc_id) = u32_it. next ( ) {
212
+ while let Some ( delta_doc_id) = u32_it. next ( ) {
213
+ let doc_id = prev_doc + delta_doc_id;
214
+ prev_doc = doc_id;
197
215
let term_freq = u32_it. next ( ) . unwrap_or ( self . current_tf ) ;
198
- doc_id_and_tf. push ( ( doc_id_map. get_new_doc_id ( old_doc_id ) , term_freq) ) ;
216
+ doc_id_and_tf. push ( ( doc_id_map. get_new_doc_id ( doc_id ) , term_freq) ) ;
199
217
}
200
218
doc_id_and_tf. sort_unstable_by_key ( |& ( doc_id, _) | doc_id) ;
201
219
202
220
for ( doc_id, tf) in doc_id_and_tf {
203
221
serializer. write_doc ( doc_id, tf, & [ ] [ ..] ) ;
204
222
}
205
223
} else {
206
- while let Some ( doc) = u32_it. next ( ) {
224
+ while let Some ( delta_doc_id) = u32_it. next ( ) {
225
+ let doc_id = prev_doc + delta_doc_id;
226
+ prev_doc = doc_id;
207
227
let term_freq = u32_it. next ( ) . unwrap_or ( self . current_tf ) ;
208
- serializer. write_doc ( doc , term_freq, & [ ] [ ..] ) ;
228
+ serializer. write_doc ( doc_id , term_freq, & [ ] [ ..] ) ;
209
229
}
210
230
}
211
231
}
@@ -241,9 +261,14 @@ impl Recorder for TfAndPositionRecorder {
241
261
242
262
#[ inline]
243
263
fn new_doc ( & mut self , doc : DocId , arena : & mut MemoryArena ) {
264
+ let delta = if self . current_doc == u32:: MAX {
265
+ doc
266
+ } else {
267
+ doc - self . current_doc
268
+ } ;
244
269
self . current_doc = doc;
245
270
self . term_doc_freq += 1u32 ;
246
- self . stack . writer ( arena) . write_u32_vint ( doc ) ;
271
+ self . stack . writer ( arena) . write_u32_vint ( delta ) ;
247
272
}
248
273
249
274
#[ inline]
@@ -269,7 +294,10 @@ impl Recorder for TfAndPositionRecorder {
269
294
self . stack . read_to_end ( arena, buffer_u8) ;
270
295
let mut u32_it = VInt32Reader :: new ( & buffer_u8[ ..] ) ;
271
296
let mut doc_id_and_positions = vec ! [ ] ;
272
- while let Some ( doc) = u32_it. next ( ) {
297
+ let mut prev_doc = 0 ;
298
+ while let Some ( delta_doc_id) = u32_it. next ( ) {
299
+ let doc_id = prev_doc + delta_doc_id;
300
+ prev_doc = doc_id;
273
301
let mut prev_position_plus_one = 1u32 ;
274
302
buffer_positions. clear ( ) ;
275
303
loop {
@@ -287,9 +315,9 @@ impl Recorder for TfAndPositionRecorder {
287
315
if let Some ( doc_id_map) = doc_id_map {
288
316
// this simple variant to remap may consume to much memory
289
317
doc_id_and_positions
290
- . push ( ( doc_id_map. get_new_doc_id ( doc ) , buffer_positions. to_vec ( ) ) ) ;
318
+ . push ( ( doc_id_map. get_new_doc_id ( doc_id ) , buffer_positions. to_vec ( ) ) ) ;
291
319
} else {
292
- serializer. write_doc ( doc , buffer_positions. len ( ) as u32 , buffer_positions) ;
320
+ serializer. write_doc ( doc_id , buffer_positions. len ( ) as u32 , buffer_positions) ;
293
321
}
294
322
}
295
323
if doc_id_map. is_some ( ) {
0 commit comments