Skip to content

Commit 80d764e

Browse files
committed
2 parents 4350878 + 1465a73 commit 80d764e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

80 files changed

+822
-500
lines changed

ARCHITECTURE.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ The token positions of all of the terms are then stored in a separate file with
254254
The [TermInfo](src/postings/term_info.rs) gives an offset (expressed in position this time) in this file. As we iterate through the docset,
255255
we advance the position reader by the number of term frequencies of the current document.
256256

257-
## [fieldnorms/](src/fieldnorms): Here is my doc, how many tokens in this field?
257+
## [fieldnorm/](src/fieldnorm): Here is my doc, how many tokens in this field?
258258

259259
The [BM25](https://en.wikipedia.org/wiki/Okapi_BM25) formula also requires to know the number of tokens stored in a specific field for a given document. We store this information on one byte per document in the fieldnorm.
260260
The fieldnorm is therefore compressed. Values up to 40 are encoded unchanged.

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ once_cell = "1.10.0"
2222
regex = { version = "1.5.5", default-features = false, features = ["std", "unicode"] }
2323
aho-corasick = "1.0"
2424
tantivy-fst = "0.4.0"
25-
memmap2 = { version = "0.5.3", optional = true }
25+
memmap2 = { version = "0.6.0", optional = true }
2626
lz4_flex = { version = "0.10", default-features = false, features = ["checked-decode"], optional = true }
2727
brotli = { version = "3.3.4", optional = true }
2828
zstd = { version = "0.12", optional = true, default-features = false }

columnar/src/columnar/column_type.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ impl fmt::Display for ColumnType {
3434
ColumnType::IpAddr => "ip",
3535
ColumnType::DateTime => "datetime",
3636
};
37-
write!(f, "{}", short_str)
37+
write!(f, "{short_str}")
3838
}
3939
}
4040

columnar/src/dynamic_column.rs

+8-8
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,14 @@ impl fmt::Debug for DynamicColumn {
2626
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
2727
write!(f, "[{} {} |", self.get_cardinality(), self.column_type())?;
2828
match self {
29-
DynamicColumn::Bool(col) => write!(f, " {:?}", col)?,
30-
DynamicColumn::I64(col) => write!(f, " {:?}", col)?,
31-
DynamicColumn::U64(col) => write!(f, " {:?}", col)?,
32-
DynamicColumn::F64(col) => write!(f, "{:?}", col)?,
33-
DynamicColumn::IpAddr(col) => write!(f, "{:?}", col)?,
34-
DynamicColumn::DateTime(col) => write!(f, "{:?}", col)?,
35-
DynamicColumn::Bytes(col) => write!(f, "{:?}", col)?,
36-
DynamicColumn::Str(col) => write!(f, "{:?}", col)?,
29+
DynamicColumn::Bool(col) => write!(f, " {col:?}")?,
30+
DynamicColumn::I64(col) => write!(f, " {col:?}")?,
31+
DynamicColumn::U64(col) => write!(f, " {col:?}")?,
32+
DynamicColumn::F64(col) => write!(f, "{col:?}")?,
33+
DynamicColumn::IpAddr(col) => write!(f, "{col:?}")?,
34+
DynamicColumn::DateTime(col) => write!(f, "{col:?}")?,
35+
DynamicColumn::Bytes(col) => write!(f, "{col:?}")?,
36+
DynamicColumn::Str(col) => write!(f, "{col:?}")?,
3737
}
3838
write!(f, "]")
3939
}

common/src/byte_count.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ impl ByteCount {
3737
for (suffix, threshold) in SUFFIX_AND_THRESHOLD.iter().rev() {
3838
if self.get_bytes() >= *threshold {
3939
let unit_num = self.get_bytes() as f64 / *threshold as f64;
40-
return format!("{:.2} {}", unit_num, suffix);
40+
return format!("{unit_num:.2} {suffix}");
4141
}
4242
}
4343
format!("{:.2} B", self.get_bytes())

common/src/vint.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ mod tests {
261261
let mut buffer2 = [0u8; 8];
262262
let len_vint = VInt(val as u64).serialize_into(&mut buffer);
263263
let res2 = serialize_vint_u32(val, &mut buffer2);
264-
assert_eq!(&buffer[..len_vint], res2, "array wrong for {}", val);
264+
assert_eq!(&buffer[..len_vint], res2, "array wrong for {val}");
265265
}
266266

267267
#[test]

examples/index_from_multiple_threads.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ fn main() -> tantivy::Result<()> {
9696
let mut index_writer_wlock = index_writer.write().unwrap();
9797
index_writer_wlock.commit()?
9898
};
99-
println!("committed with opstamp {}", opstamp);
99+
println!("committed with opstamp {opstamp}");
100100
thread::sleep(Duration::from_millis(500));
101101
}
102102

examples/iterating_docs_and_positions.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ fn main() -> tantivy::Result<()> {
8484
// Doc 0: TermFreq 2: [0, 4]
8585
// Doc 2: TermFreq 1: [0]
8686
// ```
87-
println!("Doc {}: TermFreq {}: {:?}", doc_id, term_freq, positions);
87+
println!("Doc {doc_id}: TermFreq {term_freq}: {positions:?}");
8888
doc_id = segment_postings.advance();
8989
}
9090
}
@@ -125,7 +125,7 @@ fn main() -> tantivy::Result<()> {
125125
// Once again these docs MAY contains deleted documents as well.
126126
let docs = block_segment_postings.docs();
127127
// Prints `Docs [0, 2].`
128-
println!("Docs {:?}", docs);
128+
println!("Docs {docs:?}");
129129
block_segment_postings.advance();
130130
}
131131
}

examples/snippet.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ fn main() -> tantivy::Result<()> {
5656
for (score, doc_address) in top_docs {
5757
let doc = searcher.doc(doc_address)?;
5858
let snippet = snippet_generator.snippet_from_doc(&doc);
59-
println!("Document score {}:", score);
59+
println!("Document score {score}:");
6060
println!(
6161
"title: {}",
6262
doc.get_first(title).unwrap().as_text().unwrap()

examples/stop_words.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ fn main() -> tantivy::Result<()> {
106106

107107
for (score, doc_address) in top_docs {
108108
let retrieved_doc = searcher.doc(doc_address)?;
109-
println!("\n==\nDocument score {}:", score);
109+
println!("\n==\nDocument score {score}:");
110110
println!("{}", schema.to_json(&retrieved_doc));
111111
}
112112

ownedbytes/src/lib.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ impl fmt::Debug for OwnedBytes {
160160
} else {
161161
self.as_slice()
162162
};
163-
write!(f, "OwnedBytes({:?}, len={})", bytes_truncated, self.len())
163+
write!(f, "OwnedBytes({bytes_truncated:?}, len={})", self.len())
164164
}
165165
}
166166

@@ -259,12 +259,12 @@ mod tests {
259259
fn test_owned_bytes_debug() {
260260
let short_bytes = OwnedBytes::new(b"abcd".as_ref());
261261
assert_eq!(
262-
format!("{:?}", short_bytes),
262+
format!("{short_bytes:?}"),
263263
"OwnedBytes([97, 98, 99, 100], len=4)"
264264
);
265265
let long_bytes = OwnedBytes::new(b"abcdefghijklmnopq".as_ref());
266266
assert_eq!(
267-
format!("{:?}", long_bytes),
267+
format!("{long_bytes:?}"),
268268
"OwnedBytes([97, 98, 99, 100, 101, 102, 103, 104, 105, 106], len=17)"
269269
);
270270
}

query-grammar/src/query_grammar.rs

+8-10
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ fn word<'a>() -> impl Parser<&'a str, Output = String> {
5656
!c.is_whitespace() && ![':', '^', '{', '}', '"', '[', ']', '(', ')'].contains(&c)
5757
})),
5858
)
59-
.map(|(s1, s2): (char, String)| format!("{}{}", s1, s2))
59+
.map(|(s1, s2): (char, String)| format!("{s1}{s2}"))
6060
.and_then(|s: String| match s.as_str() {
6161
"OR" | "AND " | "NOT" => Err(StringStreamError::UnexpectedParse),
6262
_ => Ok(s),
@@ -74,7 +74,7 @@ fn relaxed_word<'a>() -> impl Parser<&'a str, Output = String> {
7474
!c.is_whitespace() && !['{', '}', '"', '[', ']', '(', ')'].contains(&c)
7575
})),
7676
)
77-
.map(|(s1, s2): (char, String)| format!("{}{}", s1, s2))
77+
.map(|(s1, s2): (char, String)| format!("{s1}{s2}"))
7878
}
7979

8080
/// Parses a date time according to rfc3339
@@ -178,9 +178,9 @@ fn negative_number<'a>() -> impl Parser<&'a str, Output = String> {
178178
)
179179
.map(|(s1, s2, s3): (char, String, Option<(char, String)>)| {
180180
if let Some(('.', s3)) = s3 {
181-
format!("{}{}.{}", s1, s2, s3)
181+
format!("{s1}{s2}.{s3}")
182182
} else {
183-
format!("{}{}", s1, s2)
183+
format!("{s1}{s2}")
184184
}
185185
})
186186
}
@@ -419,9 +419,7 @@ mod test {
419419
fn assert_nearly_equals(expected: f64, val: f64) {
420420
assert!(
421421
nearly_equals(val, expected),
422-
"Got {}, expected {}.",
423-
val,
424-
expected
422+
"Got {val}, expected {expected}."
425423
);
426424
}
427425

@@ -468,7 +466,7 @@ mod test {
468466

469467
fn test_parse_query_to_ast_helper(query: &str, expected: &str) {
470468
let query = parse_to_ast().parse(query).unwrap().0;
471-
let query_str = format!("{:?}", query);
469+
let query_str = format!("{query:?}");
472470
assert_eq!(query_str, expected);
473471
}
474472

@@ -554,7 +552,7 @@ mod test {
554552
fn test_occur_leaf() {
555553
let ((occur, ast), _) = super::occur_leaf().parse("+abc").unwrap();
556554
assert_eq!(occur, Some(Occur::Must));
557-
assert_eq!(format!("{:?}", ast), "\"abc\"");
555+
assert_eq!(format!("{ast:?}"), "\"abc\"");
558556
}
559557

560558
#[test]
@@ -613,7 +611,7 @@ mod test {
613611
let escaped_special_chars_re = Regex::new(ESCAPED_SPECIAL_CHARS_PATTERN).unwrap();
614612
for special_char in SPECIAL_CHARS.iter() {
615613
assert_eq!(
616-
escaped_special_chars_re.replace_all(&format!("\\{}", special_char), "$1"),
614+
escaped_special_chars_re.replace_all(&format!("\\{special_char}"), "$1"),
617615
special_char.to_string()
618616
);
619617
}

query-grammar/src/user_input_ast.rs

+12-12
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ impl Debug for UserInputLeaf {
2828
ref upper,
2929
} => {
3030
if let Some(ref field) = field {
31-
write!(formatter, "\"{}\":", field)?;
31+
write!(formatter, "\"{field}\":")?;
3232
}
3333
lower.display_lower(formatter)?;
3434
write!(formatter, " TO ")?;
@@ -37,14 +37,14 @@ impl Debug for UserInputLeaf {
3737
}
3838
UserInputLeaf::Set { field, elements } => {
3939
if let Some(ref field) = field {
40-
write!(formatter, "\"{}\": ", field)?;
40+
write!(formatter, "\"{field}\": ")?;
4141
}
4242
write!(formatter, "IN [")?;
4343
for (i, element) in elements.iter().enumerate() {
4444
if i != 0 {
4545
write!(formatter, " ")?;
4646
}
47-
write!(formatter, "\"{}\"", element)?;
47+
write!(formatter, "\"{element}\"")?;
4848
}
4949
write!(formatter, "]")
5050
}
@@ -63,7 +63,7 @@ pub struct UserInputLiteral {
6363
impl fmt::Debug for UserInputLiteral {
6464
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
6565
if let Some(ref field) = self.field_name {
66-
write!(formatter, "\"{}\":", field)?;
66+
write!(formatter, "\"{field}\":")?;
6767
}
6868
write!(formatter, "\"{}\"", self.phrase)?;
6969
if self.slop > 0 {
@@ -83,16 +83,16 @@ pub enum UserInputBound {
8383
impl UserInputBound {
8484
fn display_lower(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
8585
match *self {
86-
UserInputBound::Inclusive(ref word) => write!(formatter, "[\"{}\"", word),
87-
UserInputBound::Exclusive(ref word) => write!(formatter, "{{\"{}\"", word),
86+
UserInputBound::Inclusive(ref word) => write!(formatter, "[\"{word}\""),
87+
UserInputBound::Exclusive(ref word) => write!(formatter, "{{\"{word}\""),
8888
UserInputBound::Unbounded => write!(formatter, "{{\"*\""),
8989
}
9090
}
9191

9292
fn display_upper(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> {
9393
match *self {
94-
UserInputBound::Inclusive(ref word) => write!(formatter, "\"{}\"]", word),
95-
UserInputBound::Exclusive(ref word) => write!(formatter, "\"{}\"}}", word),
94+
UserInputBound::Inclusive(ref word) => write!(formatter, "\"{word}\"]"),
95+
UserInputBound::Exclusive(ref word) => write!(formatter, "\"{word}\"}}"),
9696
UserInputBound::Unbounded => write!(formatter, "\"*\"}}"),
9797
}
9898
}
@@ -163,9 +163,9 @@ fn print_occur_ast(
163163
formatter: &mut fmt::Formatter,
164164
) -> fmt::Result {
165165
if let Some(occur) = occur_opt {
166-
write!(formatter, "{}{:?}", occur, ast)?;
166+
write!(formatter, "{occur}{ast:?}")?;
167167
} else {
168-
write!(formatter, "*{:?}", ast)?;
168+
write!(formatter, "*{ast:?}")?;
169169
}
170170
Ok(())
171171
}
@@ -187,8 +187,8 @@ impl fmt::Debug for UserInputAst {
187187
}
188188
Ok(())
189189
}
190-
UserInputAst::Leaf(ref subquery) => write!(formatter, "{:?}", subquery),
191-
UserInputAst::Boost(ref leaf, boost) => write!(formatter, "({:?})^{}", leaf, boost),
190+
UserInputAst::Leaf(ref subquery) => write!(formatter, "{subquery:?}"),
191+
UserInputAst::Boost(ref leaf, boost) => write!(formatter, "({leaf:?})^{boost}"),
192192
}
193193
}
194194
}

src/aggregation/agg_limits.rs

+58-19
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
use std::collections::HashMap;
2-
use std::sync::atomic::AtomicU64;
2+
use std::sync::atomic::{AtomicU64, Ordering};
33
use std::sync::Arc;
44

55
use common::ByteCount;
66

77
use super::collector::DEFAULT_MEMORY_LIMIT;
88
use super::{AggregationError, DEFAULT_BUCKET_LIMIT};
9-
use crate::TantivyError;
109

1110
/// An estimate for memory consumption. Non recursive
1211
pub trait MemoryConsumption {
@@ -68,28 +67,68 @@ impl AggregationLimits {
6867
bucket_limit: bucket_limit.unwrap_or(DEFAULT_BUCKET_LIMIT),
6968
}
7069
}
71-
pub(crate) fn validate_memory_consumption(&self) -> crate::Result<()> {
72-
if self.get_memory_consumed() > self.memory_limit {
73-
return Err(TantivyError::AggregationError(
74-
AggregationError::MemoryExceeded {
75-
limit: self.memory_limit,
76-
current: self.get_memory_consumed(),
77-
},
78-
));
70+
71+
/// Create a new ResourceLimitGuard, that will release the memory when dropped.
72+
pub fn new_guard(&self) -> ResourceLimitGuard {
73+
ResourceLimitGuard {
74+
/// The counter which is shared between the aggregations for one request.
75+
memory_consumption: Arc::clone(&self.memory_consumption),
76+
/// The memory_limit in bytes
77+
memory_limit: self.memory_limit,
78+
allocated_with_the_guard: 0,
7979
}
80-
Ok(())
8180
}
82-
pub(crate) fn add_memory_consumed(&self, num_bytes: u64) {
83-
self.memory_consumption
84-
.fetch_add(num_bytes, std::sync::atomic::Ordering::Relaxed);
85-
}
86-
/// Returns the estimated memory consumed by the aggregations
87-
pub fn get_memory_consumed(&self) -> ByteCount {
81+
82+
pub(crate) fn add_memory_consumed(&self, num_bytes: u64) -> crate::Result<()> {
8883
self.memory_consumption
89-
.load(std::sync::atomic::Ordering::Relaxed)
90-
.into()
84+
.fetch_add(num_bytes, Ordering::Relaxed);
85+
validate_memory_consumption(&self.memory_consumption, self.memory_limit)?;
86+
Ok(())
9187
}
88+
9289
pub(crate) fn get_bucket_limit(&self) -> u32 {
9390
self.bucket_limit
9491
}
9592
}
93+
94+
fn validate_memory_consumption(
95+
memory_consumption: &AtomicU64,
96+
memory_limit: ByteCount,
97+
) -> Result<(), AggregationError> {
98+
// Load the estimated memory consumed by the aggregations
99+
let memory_consumed: ByteCount = memory_consumption.load(Ordering::Relaxed).into();
100+
if memory_consumed > memory_limit {
101+
return Err(AggregationError::MemoryExceeded {
102+
limit: memory_limit,
103+
current: memory_consumed,
104+
});
105+
}
106+
Ok(())
107+
}
108+
109+
pub struct ResourceLimitGuard {
110+
/// The counter which is shared between the aggregations for one request.
111+
memory_consumption: Arc<AtomicU64>,
112+
/// The memory_limit in bytes
113+
memory_limit: ByteCount,
114+
/// Allocated memory with this guard.
115+
allocated_with_the_guard: u64,
116+
}
117+
118+
impl ResourceLimitGuard {
119+
pub(crate) fn add_memory_consumed(&self, num_bytes: u64) -> crate::Result<()> {
120+
self.memory_consumption
121+
.fetch_add(num_bytes, Ordering::Relaxed);
122+
validate_memory_consumption(&self.memory_consumption, self.memory_limit)?;
123+
Ok(())
124+
}
125+
}
126+
127+
impl Drop for ResourceLimitGuard {
128+
/// Removes the memory consumed tracked by this _instance_ of AggregationLimits.
129+
/// This is used to clear the segment specific memory consumption all at once.
130+
fn drop(&mut self) {
131+
self.memory_consumption
132+
.fetch_sub(self.allocated_with_the_guard, Ordering::Relaxed);
133+
}
134+
}

0 commit comments

Comments
 (0)