@@ -3,6 +3,8 @@ use std::ops::RangeInclusive;
3
3
4
4
use tantivy_bitpacker:: minmax;
5
5
6
+ use crate :: monotonic_mapping:: StrictlyMonotonicFn ;
7
+
6
8
pub trait Column < T : PartialOrd = u64 > : Send + Sync {
7
9
/// Return the value associated with the given idx.
8
10
///
@@ -143,16 +145,30 @@ struct MonotonicMappingColumn<C, T, Input> {
143
145
_phantom : PhantomData < Input > ,
144
146
}
145
147
146
- /// Creates a view of a column transformed by a monotonic mapping.
147
- pub fn monotonic_map_column < C , T , Input : PartialOrd , Output : PartialOrd > (
148
+ /// Creates a view of a column transformed by a strictly monotonic mapping. See
149
+ /// [`StrictlyMonotonicFn`].
150
+ ///
151
+ /// E.g. apply a gcd monotonic_mapping([100, 200, 300]) == [1, 2, 3]
152
+ /// monotonic_mapping.mapping() is expected to be injective, and we should always have
153
+ /// monotonic_mapping.inverse(monotonic_mapping.mapping(el)) == el
154
+ ///
155
+ /// The inverse of the mapping is required for:
156
+ /// `fn get_between_vals(&self, range: RangeInclusive<T>) -> Vec<u64> `
157
+ /// The user provides the original value range and we need to monotonic map them in the same way the
158
+ /// serialization does before calling the underlying column.
159
+ ///
160
+ /// Note that when opening a codec, the monotonic_mapping should be the inverse of the mapping
161
+ /// during serialization. And therefore the monotonic_mapping_inv when opening is the same as
162
+ /// monotonic_mapping during serialization.
163
+ pub fn monotonic_map_column < C , T , Input , Output > (
148
164
from_column : C ,
149
165
monotonic_mapping : T ,
150
166
) -> impl Column < Output >
151
167
where
152
168
C : Column < Input > ,
153
- T : Fn ( Input ) -> Output + Send + Sync ,
154
- Input : Send + Sync ,
155
- Output : Send + Sync ,
169
+ T : StrictlyMonotonicFn < Input , Output > + Send + Sync ,
170
+ Input : PartialOrd + Send + Sync + Clone ,
171
+ Output : PartialOrd + Send + Sync + Clone ,
156
172
{
157
173
MonotonicMappingColumn {
158
174
from_column,
@@ -161,36 +177,46 @@ where
161
177
}
162
178
}
163
179
164
- impl < C , T , Input : PartialOrd , Output : PartialOrd > Column < Output >
165
- for MonotonicMappingColumn < C , T , Input >
180
+ impl < C , T , Input , Output > Column < Output > for MonotonicMappingColumn < C , T , Input >
166
181
where
167
182
C : Column < Input > ,
168
- T : Fn ( Input ) -> Output + Send + Sync ,
169
- Input : Send + Sync ,
170
- Output : Send + Sync ,
183
+ T : StrictlyMonotonicFn < Input , Output > + Send + Sync ,
184
+ Input : PartialOrd + Send + Sync + Clone ,
185
+ Output : PartialOrd + Send + Sync + Clone ,
171
186
{
172
187
#[ inline]
173
188
fn get_val ( & self , idx : u64 ) -> Output {
174
189
let from_val = self . from_column . get_val ( idx) ;
175
- ( self . monotonic_mapping ) ( from_val)
190
+ self . monotonic_mapping . mapping ( from_val)
176
191
}
177
192
178
193
fn min_value ( & self ) -> Output {
179
194
let from_min_value = self . from_column . min_value ( ) ;
180
- ( self . monotonic_mapping ) ( from_min_value)
195
+ self . monotonic_mapping . mapping ( from_min_value)
181
196
}
182
197
183
198
fn max_value ( & self ) -> Output {
184
199
let from_max_value = self . from_column . max_value ( ) ;
185
- ( self . monotonic_mapping ) ( from_max_value)
200
+ self . monotonic_mapping . mapping ( from_max_value)
186
201
}
187
202
188
203
fn num_vals ( & self ) -> u64 {
189
204
self . from_column . num_vals ( )
190
205
}
191
206
192
207
fn iter ( & self ) -> Box < dyn Iterator < Item = Output > + ' _ > {
193
- Box :: new ( self . from_column . iter ( ) . map ( & self . monotonic_mapping ) )
208
+ Box :: new (
209
+ self . from_column
210
+ . iter ( )
211
+ . map ( |el| self . monotonic_mapping . mapping ( el) ) ,
212
+ )
213
+ }
214
+
215
+ fn get_between_vals ( & self , range : RangeInclusive < Output > ) -> Vec < u64 > {
216
+ self . from_column . get_between_vals (
217
+ self . monotonic_mapping . inverse ( range. start ( ) . clone ( ) )
218
+ ..=self . monotonic_mapping . inverse ( range. end ( ) . clone ( ) ) ,
219
+ )
194
220
}
195
221
196
222
// We voluntarily do not implement get_range as it yields a regression,
@@ -236,19 +262,22 @@ where
236
262
#[ cfg( test) ]
237
263
mod tests {
238
264
use super :: * ;
239
- use crate :: MonotonicallyMappableToU64 ;
265
+ use crate :: monotonic_mapping:: {
266
+ StrictlyMonotonicMappingInverter , StrictlyMonotonicMappingToInternalBaseval ,
267
+ StrictlyMonotonicMappingToInternalGCDBaseval ,
268
+ } ;
240
269
241
270
#[ test]
242
271
fn test_monotonic_mapping ( ) {
243
- let vals = & [ 1u64 , 3u64 ] [ ..] ;
272
+ let vals = & [ 3u64 , 5u64 ] [ ..] ;
244
273
let col = VecColumn :: from ( vals) ;
245
- let mapped = monotonic_map_column ( col, |el| el + 4 ) ;
246
- assert_eq ! ( mapped. min_value( ) , 5u64 ) ;
247
- assert_eq ! ( mapped. max_value( ) , 7u64 ) ;
274
+ let mapped = monotonic_map_column ( col, StrictlyMonotonicMappingToInternalBaseval :: new ( 2 ) ) ;
275
+ assert_eq ! ( mapped. min_value( ) , 1u64 ) ;
276
+ assert_eq ! ( mapped. max_value( ) , 3u64 ) ;
248
277
assert_eq ! ( mapped. num_vals( ) , 2 ) ;
249
278
assert_eq ! ( mapped. num_vals( ) , 2 ) ;
250
- assert_eq ! ( mapped. get_val( 0 ) , 5 ) ;
251
- assert_eq ! ( mapped. get_val( 1 ) , 7 ) ;
279
+ assert_eq ! ( mapped. get_val( 0 ) , 1 ) ;
280
+ assert_eq ! ( mapped. get_val( 1 ) , 3 ) ;
252
281
}
253
282
254
283
#[ test]
@@ -260,31 +289,42 @@ mod tests {
260
289
261
290
#[ test]
262
291
fn test_monotonic_mapping_iter ( ) {
263
- let vals: Vec < u64 > = ( - 1 .. 99 ) . map ( i64 :: to_u64 ) . collect ( ) ;
292
+ let vals: Vec < u64 > = ( 10 .. 110u64 ) . map ( |el| el * 10 ) . collect ( ) ;
264
293
let col = VecColumn :: from ( & vals) ;
265
- let mapped = monotonic_map_column ( col, |el| i64:: from_u64 ( el) * 10i64 ) ;
266
- let val_i64s: Vec < i64 > = mapped. iter ( ) . collect ( ) ;
294
+ let mapped = monotonic_map_column (
295
+ col,
296
+ StrictlyMonotonicMappingInverter :: from (
297
+ StrictlyMonotonicMappingToInternalGCDBaseval :: new ( 10 , 100 ) ,
298
+ ) ,
299
+ ) ;
300
+ let val_i64s: Vec < u64 > = mapped. iter ( ) . collect ( ) ;
267
301
for i in 0 ..100 {
268
302
assert_eq ! ( val_i64s[ i as usize ] , mapped. get_val( i) ) ;
269
303
}
270
304
}
271
305
272
306
#[ test]
273
307
fn test_monotonic_mapping_get_range ( ) {
274
- let vals: Vec < u64 > = ( - 1 .. 99 ) . map ( i64 :: to_u64 ) . collect ( ) ;
308
+ let vals: Vec < u64 > = ( 0 .. 100u64 ) . map ( |el| el * 10 ) . collect ( ) ;
275
309
let col = VecColumn :: from ( & vals) ;
276
- let mapped = monotonic_map_column ( col, |el| i64:: from_u64 ( el) * 10i64 ) ;
277
- assert_eq ! ( mapped. min_value( ) , -10i64 ) ;
278
- assert_eq ! ( mapped. max_value( ) , 980i64 ) ;
310
+ let mapped = monotonic_map_column (
311
+ col,
312
+ StrictlyMonotonicMappingInverter :: from (
313
+ StrictlyMonotonicMappingToInternalGCDBaseval :: new ( 10 , 0 ) ,
314
+ ) ,
315
+ ) ;
316
+
317
+ assert_eq ! ( mapped. min_value( ) , 0u64 ) ;
318
+ assert_eq ! ( mapped. max_value( ) , 9900u64 ) ;
279
319
assert_eq ! ( mapped. num_vals( ) , 100 ) ;
280
- let val_i64s : Vec < i64 > = mapped. iter ( ) . collect ( ) ;
281
- assert_eq ! ( val_i64s . len( ) , 100 ) ;
320
+ let val_u64s : Vec < u64 > = mapped. iter ( ) . collect ( ) ;
321
+ assert_eq ! ( val_u64s . len( ) , 100 ) ;
282
322
for i in 0 ..100 {
283
- assert_eq ! ( val_i64s [ i as usize ] , mapped. get_val( i) ) ;
284
- assert_eq ! ( val_i64s [ i as usize ] , i64 :: from_u64 ( vals[ i as usize ] ) * 10 ) ;
323
+ assert_eq ! ( val_u64s [ i as usize ] , mapped. get_val( i) ) ;
324
+ assert_eq ! ( val_u64s [ i as usize ] , vals[ i as usize ] * 10 ) ;
285
325
}
286
- let mut buf = [ 0i64 ; 20 ] ;
326
+ let mut buf = [ 0u64 ; 20 ] ;
287
327
mapped. get_range ( 7 , & mut buf[ ..] ) ;
288
- assert_eq ! ( & val_i64s [ 7 ..] [ ..20 ] , & buf) ;
328
+ assert_eq ! ( & val_u64s [ 7 ..] [ ..20 ] , & buf) ;
289
329
}
290
330
}
0 commit comments