Skip to content

Commit 6239697

Browse files
PSeitzfulmicoton
andauthored
switch to ms in histogram for date type (#2045)
* switch to ms in histogram for date type switch to ms in histogram, by adding a normalization step that converts to nanoseconds precision when creating the collector. closes #2028 related to #2026 * add missing unit long variants * use single thread to avoid handling test case * fix docs * revert CI * cleanup * improve docs * Update src/aggregation/bucket/histogram/histogram.rs Co-authored-by: Paul Masurel <[email protected]> --------- Co-authored-by: Paul Masurel <[email protected]>
1 parent 62709b8 commit 6239697

File tree

10 files changed

+144
-117
lines changed

10 files changed

+144
-117
lines changed

columnar/src/columnar/column_type.rs

+3
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ impl ColumnType {
5454
pub fn to_code(self) -> u8 {
5555
self as u8
5656
}
57+
pub fn is_date_time(&self) -> bool {
58+
self == &ColumnType::DateTime
59+
}
5760

5861
pub(crate) fn try_from_code(code: u8) -> Result<ColumnType, InvalidData> {
5962
COLUMN_TYPES.get(code as usize).copied().ok_or(InvalidData)

src/aggregation/agg_req.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ use super::metric::{
3939
};
4040

4141
/// The top-level aggregation request structure, which contains [`Aggregation`] and their user
42-
/// defined names. It is also used in [buckets](BucketAggregation) to define sub-aggregations.
42+
/// defined names. It is also used in buckets aggregations to define sub-aggregations.
4343
///
4444
/// The key is the user defined name of the aggregation.
4545
pub type Aggregations = HashMap<String, Aggregation>;

src/aggregation/agg_req_with_accessor.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,13 @@ impl AggregationsWithAccessor {
3232

3333
pub struct AggregationWithAccessor {
3434
/// In general there can be buckets without fast field access, e.g. buckets that are created
35-
/// based on search terms. So eventually this needs to be Option or moved.
35+
/// based on search terms. That is not that case currently, but eventually this needs to be
36+
/// Option or moved.
3637
pub(crate) accessor: Column<u64>,
3738
pub(crate) str_dict_column: Option<StrColumn>,
3839
pub(crate) field_type: ColumnType,
3940
/// In case there are multiple types of fast fields, e.g. string and numeric.
40-
/// Only used for term aggregations
41+
/// Only used for term aggregations currently.
4142
pub(crate) accessor2: Option<(Column<u64>, ColumnType)>,
4243
pub(crate) sub_aggregation: AggregationsWithAccessor,
4344
pub(crate) limits: ResourceLimitGuard,
@@ -105,6 +106,7 @@ impl AggregationWithAccessor {
105106
(accessor, field_type)
106107
}
107108
};
109+
108110
let sub_aggregation = sub_aggregation.clone();
109111
Ok(AggregationWithAccessor {
110112
accessor,

src/aggregation/bucket/histogram/date_histogram.rs

+65-56
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,13 @@ pub struct DateHistogramAggregationReq {
6767
pub fixed_interval: Option<String>,
6868
/// Intervals implicitly defines an absolute grid of buckets `[interval * k, interval * (k +
6969
/// 1))`.
70+
///
71+
/// Offset makes it possible to shift this grid into
72+
/// `[offset + interval * k, offset + interval * (k + 1))`. Offset has to be in the range [0,
73+
/// interval).
74+
///
75+
/// The `offset` parameter is has the same syntax as the `fixed_interval` parameter, but
76+
/// also allows for negative values.
7077
pub offset: Option<String>,
7178
/// The minimum number of documents in a bucket to be returned. Defaults to 0.
7279
pub min_doc_count: Option<u64>,
@@ -77,7 +84,7 @@ pub struct DateHistogramAggregationReq {
7784
/// hard_bounds only limits the buckets, to force a range set both extended_bounds and
7885
/// hard_bounds to the same range.
7986
///
80-
/// Needs to be provided as timestamp in nanosecond precision.
87+
/// Needs to be provided as timestamp in millisecond precision.
8188
///
8289
/// ## Example
8390
/// ```json
@@ -88,7 +95,7 @@ pub struct DateHistogramAggregationReq {
8895
/// "interval": "1d",
8996
/// "hard_bounds": {
9097
/// "min": 0,
91-
/// "max": 1420502400000000000
98+
/// "max": 1420502400000
9299
/// }
93100
/// }
94101
/// }
@@ -114,11 +121,11 @@ impl DateHistogramAggregationReq {
114121
self.validate()?;
115122
Ok(HistogramAggregation {
116123
field: self.field.to_string(),
117-
interval: parse_into_nanoseconds(self.fixed_interval.as_ref().unwrap())? as f64,
124+
interval: parse_into_milliseconds(self.fixed_interval.as_ref().unwrap())? as f64,
118125
offset: self
119126
.offset
120127
.as_ref()
121-
.map(|offset| parse_offset_into_nanosecs(offset))
128+
.map(|offset| parse_offset_into_milliseconds(offset))
122129
.transpose()?
123130
.map(|el| el as f64),
124131
min_doc_count: self.min_doc_count,
@@ -153,7 +160,7 @@ impl DateHistogramAggregationReq {
153160
));
154161
}
155162

156-
parse_into_nanoseconds(self.fixed_interval.as_ref().unwrap())?;
163+
parse_into_milliseconds(self.fixed_interval.as_ref().unwrap())?;
157164

158165
Ok(())
159166
}
@@ -179,7 +186,7 @@ pub enum DateHistogramParseError {
179186
OutOfBounds(String),
180187
}
181188

182-
fn parse_offset_into_nanosecs(input: &str) -> Result<i64, AggregationError> {
189+
fn parse_offset_into_milliseconds(input: &str) -> Result<i64, AggregationError> {
183190
let is_sign = |byte| &[byte] == b"-" || &[byte] == b"+";
184191
if input.is_empty() {
185192
return Err(DateHistogramParseError::InvalidOffset(input.to_string()).into());
@@ -188,18 +195,18 @@ fn parse_offset_into_nanosecs(input: &str) -> Result<i64, AggregationError> {
188195
let has_sign = is_sign(input.as_bytes()[0]);
189196
if has_sign {
190197
let (sign, input) = input.split_at(1);
191-
let val = parse_into_nanoseconds(input)?;
198+
let val = parse_into_milliseconds(input)?;
192199
if sign == "-" {
193200
Ok(-val)
194201
} else {
195202
Ok(val)
196203
}
197204
} else {
198-
parse_into_nanoseconds(input)
205+
parse_into_milliseconds(input)
199206
}
200207
}
201208

202-
fn parse_into_nanoseconds(input: &str) -> Result<i64, AggregationError> {
209+
fn parse_into_milliseconds(input: &str) -> Result<i64, AggregationError> {
203210
let split_boundary = input
204211
.as_bytes()
205212
.iter()
@@ -218,17 +225,18 @@ fn parse_into_nanoseconds(input: &str) -> Result<i64, AggregationError> {
218225
// here and being defensive does not hurt.
219226
.map_err(|_err| DateHistogramParseError::NumberMissing(input.to_string()))?;
220227

221-
let multiplier_from_unit = match unit {
222-
"ms" => 1,
223-
"s" => 1000,
224-
"m" => 60 * 1000,
225-
"h" => 60 * 60 * 1000,
226-
"d" => 24 * 60 * 60 * 1000,
228+
let unit_in_ms = match unit {
229+
"ms" | "milliseconds" => 1,
230+
"s" | "seconds" => 1000,
231+
"m" | "minutes" => 60 * 1000,
232+
"h" | "hours" => 60 * 60 * 1000,
233+
"d" | "days" => 24 * 60 * 60 * 1000,
227234
_ => return Err(DateHistogramParseError::UnitNotRecognized(unit.to_string()).into()),
228235
};
229236

230-
let val = (number * multiplier_from_unit)
231-
.checked_mul(1_000_000)
237+
let val = number * unit_in_ms;
238+
// The field type is in nanoseconds precision, so validate the value to fit the range
239+
val.checked_mul(1_000_000)
232240
.ok_or_else(|| DateHistogramParseError::OutOfBounds(input.to_string()))?;
233241

234242
Ok(val)
@@ -246,49 +254,50 @@ mod tests {
246254
use crate::Index;
247255

248256
#[test]
249-
fn test_parse_into_nanosecs() {
250-
assert_eq!(parse_into_nanoseconds("1m").unwrap(), 60_000_000_000);
251-
assert_eq!(parse_into_nanoseconds("2m").unwrap(), 120_000_000_000);
257+
fn test_parse_into_millisecs() {
258+
assert_eq!(parse_into_milliseconds("1m").unwrap(), 60_000);
259+
assert_eq!(parse_into_milliseconds("2m").unwrap(), 120_000);
260+
assert_eq!(parse_into_milliseconds("2minutes").unwrap(), 120_000);
252261
assert_eq!(
253-
parse_into_nanoseconds("2y").unwrap_err(),
262+
parse_into_milliseconds("2y").unwrap_err(),
254263
DateHistogramParseError::UnitNotRecognized("y".to_string()).into()
255264
);
256265
assert_eq!(
257-
parse_into_nanoseconds("2000").unwrap_err(),
266+
parse_into_milliseconds("2000").unwrap_err(),
258267
DateHistogramParseError::UnitMissing("2000".to_string()).into()
259268
);
260269
assert_eq!(
261-
parse_into_nanoseconds("ms").unwrap_err(),
270+
parse_into_milliseconds("ms").unwrap_err(),
262271
DateHistogramParseError::NumberMissing("ms".to_string()).into()
263272
);
264273
}
265274

266275
#[test]
267-
fn test_parse_offset_into_nanosecs() {
268-
assert_eq!(parse_offset_into_nanosecs("1m").unwrap(), 60_000_000_000);
269-
assert_eq!(parse_offset_into_nanosecs("+1m").unwrap(), 60_000_000_000);
270-
assert_eq!(parse_offset_into_nanosecs("-1m").unwrap(), -60_000_000_000);
271-
assert_eq!(parse_offset_into_nanosecs("2m").unwrap(), 120_000_000_000);
272-
assert_eq!(parse_offset_into_nanosecs("+2m").unwrap(), 120_000_000_000);
273-
assert_eq!(parse_offset_into_nanosecs("-2m").unwrap(), -120_000_000_000);
274-
assert_eq!(parse_offset_into_nanosecs("-2ms").unwrap(), -2_000_000);
276+
fn test_parse_offset_into_milliseconds() {
277+
assert_eq!(parse_offset_into_milliseconds("1m").unwrap(), 60_000);
278+
assert_eq!(parse_offset_into_milliseconds("+1m").unwrap(), 60_000);
279+
assert_eq!(parse_offset_into_milliseconds("-1m").unwrap(), -60_000);
280+
assert_eq!(parse_offset_into_milliseconds("2m").unwrap(), 120_000);
281+
assert_eq!(parse_offset_into_milliseconds("+2m").unwrap(), 120_000);
282+
assert_eq!(parse_offset_into_milliseconds("-2m").unwrap(), -120_000);
283+
assert_eq!(parse_offset_into_milliseconds("-2ms").unwrap(), -2);
275284
assert_eq!(
276-
parse_offset_into_nanosecs("2y").unwrap_err(),
285+
parse_offset_into_milliseconds("2y").unwrap_err(),
277286
DateHistogramParseError::UnitNotRecognized("y".to_string()).into()
278287
);
279288
assert_eq!(
280-
parse_offset_into_nanosecs("2000").unwrap_err(),
289+
parse_offset_into_milliseconds("2000").unwrap_err(),
281290
DateHistogramParseError::UnitMissing("2000".to_string()).into()
282291
);
283292
assert_eq!(
284-
parse_offset_into_nanosecs("ms").unwrap_err(),
293+
parse_offset_into_milliseconds("ms").unwrap_err(),
285294
DateHistogramParseError::NumberMissing("ms".to_string()).into()
286295
);
287296
}
288297

289298
#[test]
290299
fn test_parse_into_milliseconds_do_not_accept_non_ascii() {
291-
assert!(parse_into_nanoseconds("1m").is_err());
300+
assert!(parse_into_milliseconds("1m").is_err());
292301
}
293302

294303
pub fn get_test_index_from_docs(
@@ -369,7 +378,7 @@ mod tests {
369378
"buckets" : [
370379
{
371380
"key_as_string" : "2015-01-01T00:00:00Z",
372-
"key" : 1420070400000000000.0,
381+
"key" : 1420070400000.0,
373382
"doc_count" : 4
374383
}
375384
]
@@ -407,7 +416,7 @@ mod tests {
407416
"buckets" : [
408417
{
409418
"key_as_string" : "2015-01-01T00:00:00Z",
410-
"key" : 1420070400000000000.0,
419+
"key" : 1420070400000.0,
411420
"doc_count" : 4,
412421
"texts": {
413422
"buckets": [
@@ -456,32 +465,32 @@ mod tests {
456465
"buckets": [
457466
{
458467
"doc_count": 2,
459-
"key": 1420070400000000000.0,
468+
"key": 1420070400000.0,
460469
"key_as_string": "2015-01-01T00:00:00Z"
461470
},
462471
{
463472
"doc_count": 1,
464-
"key": 1420156800000000000.0,
473+
"key": 1420156800000.0,
465474
"key_as_string": "2015-01-02T00:00:00Z"
466475
},
467476
{
468477
"doc_count": 0,
469-
"key": 1420243200000000000.0,
478+
"key": 1420243200000.0,
470479
"key_as_string": "2015-01-03T00:00:00Z"
471480
},
472481
{
473482
"doc_count": 0,
474-
"key": 1420329600000000000.0,
483+
"key": 1420329600000.0,
475484
"key_as_string": "2015-01-04T00:00:00Z"
476485
},
477486
{
478487
"doc_count": 0,
479-
"key": 1420416000000000000.0,
488+
"key": 1420416000000.0,
480489
"key_as_string": "2015-01-05T00:00:00Z"
481490
},
482491
{
483492
"doc_count": 1,
484-
"key": 1420502400000000000.0,
493+
"key": 1420502400000.0,
485494
"key_as_string": "2015-01-06T00:00:00Z"
486495
}
487496
]
@@ -499,8 +508,8 @@ mod tests {
499508
"field": "date",
500509
"fixed_interval": "1d",
501510
"extended_bounds": {
502-
"min": 1419984000000000000.0,
503-
"max": 1420588800000000000.0
511+
"min": 1419984000000.0,
512+
"max": 1420588800000.0
504513
}
505514
}
506515
}
@@ -517,42 +526,42 @@ mod tests {
517526
"buckets": [
518527
{
519528
"doc_count": 0,
520-
"key": 1419984000000000000.0,
529+
"key": 1419984000000.0,
521530
"key_as_string": "2014-12-31T00:00:00Z"
522531
},
523532
{
524533
"doc_count": 2,
525-
"key": 1420070400000000000.0,
534+
"key": 1420070400000.0,
526535
"key_as_string": "2015-01-01T00:00:00Z"
527536
},
528537
{
529538
"doc_count": 1,
530-
"key": 1420156800000000000.0,
539+
"key": 1420156800000.0,
531540
"key_as_string": "2015-01-02T00:00:00Z"
532541
},
533542
{
534543
"doc_count": 0,
535-
"key": 1420243200000000000.0,
544+
"key": 1420243200000.0,
536545
"key_as_string": "2015-01-03T00:00:00Z"
537546
},
538547
{
539548
"doc_count": 0,
540-
"key": 1420329600000000000.0,
549+
"key": 1420329600000.0,
541550
"key_as_string": "2015-01-04T00:00:00Z"
542551
},
543552
{
544553
"doc_count": 0,
545-
"key": 1420416000000000000.0,
554+
"key": 1420416000000.0,
546555
"key_as_string": "2015-01-05T00:00:00Z"
547556
},
548557
{
549558
"doc_count": 1,
550-
"key": 1420502400000000000.0,
559+
"key": 1420502400000.0,
551560
"key_as_string": "2015-01-06T00:00:00Z"
552561
},
553562
{
554563
"doc_count": 0,
555-
"key": 1.4205888e18,
564+
"key": 1420588800000.0,
556565
"key_as_string": "2015-01-07T00:00:00Z"
557566
}
558567
]
@@ -569,8 +578,8 @@ mod tests {
569578
"field": "date",
570579
"fixed_interval": "1d",
571580
"hard_bounds": {
572-
"min": 1420156800000000000.0,
573-
"max": 1420243200000000000.0
581+
"min": 1420156800000.0,
582+
"max": 1420243200000.0
574583
}
575584
}
576585
}
@@ -587,7 +596,7 @@ mod tests {
587596
"buckets": [
588597
{
589598
"doc_count": 1,
590-
"key": 1420156800000000000.0,
599+
"key": 1420156800000.0,
591600
"key_as_string": "2015-01-02T00:00:00Z"
592601
}
593602
]

0 commit comments

Comments
 (0)