Skip to content

Commit 0081fcc

Browse files
bors[bot]brunocodutrataiki-e
authored
Merge #789 #797
789: Add fn force_push to ArrayQueue r=taiki-e a=brunocodutra This is an attempt to implement a straightforward MPMC ring-buffer and close #680. This proposal adds a new method `push_or_swap` to `ArrayQueue`, that atomically swaps the oldest element when the queue is full, instead of returning `Err` back to the caller like `push` does. As such, `push_or_swap` never fails to insert the element into the queue. I couldn't find any benchmarks I could run, (am I missing anything obvious?), however I did run benchmarks from [ring-channel](https://github.com/brunocodutra/ring-channel) where I compared this implementation against an emulation of a ring-buffer, that keeps popping elements until pushing succeeds, i.e. something like the following: ``` while let Err(v) = q.push(value) { q.pop(); value = v; } ``` I got the results below on my machine, which show that `push_or_swap` fares much better when capacity is low and the probability that pushing fails is high (the baseline was set to the `push`-based implementation). * **Note 1:** the relevant metric in the benchmarks below is the throughput, which is scaled by the "channel efficiency", defined as `total_number_of_messages_received / total_number_of_messages_sent`. * **Note 2:** benchmark names are suffixed by `SB/PxR/C`, where S is the size of the element in bytes, P is the number of threads producing, R the number of threads consuming, and C is the capacity of the ring-buffer: * **Note 3:** the source code for the benchmarks is [available here](https://github.com/brunocodutra/ring-channel/blob/master/benches/throughput.rs#L18-L48). ``` Benchmarking mpmc/Block/32B/8x8/1: Warming up for 3.0000 s Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 7.2s, enable flat sampling, or reduce sample count to 50. mpmc/Block/32B/8x8/1 time: [1.3466 ms 1.3982 ms 1.4507 ms] thrpt: [1.4117 Melem/s 1.4647 Melem/s 1.5209 Melem/s] change: time: [-33.037% -28.797% -24.494%] (p = 0.00 < 0.05) thrpt: [+32.440% +40.443% +49.337%] Performance has improved. Found 2 outliers among 100 measurements (2.00%) 1 (1.00%) low mild 1 (1.00%) high mild mpmc/Block/32B/8x8/16 time: [367.57 us 374.55 us 382.12 us] thrpt: [5.3596 Melem/s 5.4679 Melem/s 5.5717 Melem/s] change: time: [-2.1237% +0.3288% +2.6459%] (p = 0.79 > 0.05) thrpt: [-2.5777% -0.3277% +2.1698%] No change in performance detected. Found 3 outliers among 100 measurements (3.00%) 3 (3.00%) high mild Benchmarking mpsc/Block/32B/15x1/1: Warming up for 3.0000 s Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 9.9s, enable flat sampling, or reduce sample count to 50. mpsc/Block/32B/15x1/1 time: [3.5292 ms 3.7286 ms 3.9535 ms] thrpt: [971.28 Kelem/s 1.0299 Melem/s 1.0881 Melem/s] change: time: [-51.773% -43.940% -34.318%] (p = 0.00 < 0.05) thrpt: [+52.248% +78.380% +107.35%] Performance has improved. Found 5 outliers among 100 measurements (5.00%) 4 (4.00%) high mild 1 (1.00%) high severe mpsc/Block/32B/15x1/16 time: [853.29 us 873.07 us 895.27 us] thrpt: [4.2892 Melem/s 4.3983 Melem/s 4.5003 Melem/s] change: time: [-8.3188% +0.1727% +9.3995%] (p = 0.97 > 0.05) thrpt: [-8.5919% -0.1724% +9.0736%] No change in performance detected. Found 5 outliers among 100 measurements (5.00%) 3 (3.00%) high mild 2 (2.00%) high severe spmc/Block/32B/1x15/1 time: [163.94 us 169.05 us 173.89 us] thrpt: [1.4722 Melem/s 1.5144 Melem/s 1.5616 Melem/s] change: time: [-6.0575% -1.4457% +3.5710%] (p = 0.55 > 0.05) thrpt: [-3.4479% +1.4669% +6.4481%] No change in performance detected. Found 7 outliers among 100 measurements (7.00%) 6 (6.00%) low mild 1 (1.00%) high mild spmc/Block/32B/1x15/16 time: [49.955 us 53.012 us 56.021 us] thrpt: [4.5697 Melem/s 4.8291 Melem/s 5.1246 Melem/s] change: time: [-9.8603% -3.6168% +3.6703%] (p = 0.31 > 0.05) thrpt: [-3.5403% +3.7526% +10.939%] No change in performance detected. spsc/Block/32B/1x1/1 time: [92.707 us 98.294 us 103.02 us] thrpt: [2.4851 Melem/s 2.6044 Melem/s 2.7614 Melem/s] change: time: [-13.073% -5.2960% +2.5130%] (p = 0.21 > 0.05) thrpt: [-2.4514% +5.5922% +15.039%] No change in performance detected. spsc/Block/32B/1x1/2 time: [79.525 us 87.271 us 94.110 us] thrpt: [2.7202 Melem/s 2.9334 Melem/s 3.2191 Melem/s] change: time: [-18.141% -8.7754% +0.3419%] (p = 0.07 > 0.05) thrpt: [-0.3407% +9.6196% +22.162%] No change in performance detected. ``` 797: Update to stabilized const_fn_trait_bound r=taiki-e a=taiki-e const_fn_trait_bound has been stabilized on Rust 1.61. Co-authored-by: Bruno Dutra <[email protected]> Co-authored-by: Taiki Endo <[email protected]>
3 parents b11f1a8 + bd75c3c + 7b2d65f commit 0081fcc

File tree

6 files changed

+219
-42
lines changed

6 files changed

+219
-42
lines changed

crossbeam-epoch/Cargo.toml

+7-2
Original file line numberDiff line numberDiff line change
@@ -27,22 +27,26 @@ std = ["alloc", "crossbeam-utils/std", "lazy_static"]
2727
# NOTE: Disabling both `std` *and* `alloc` features is not supported yet.
2828
alloc = []
2929

30+
# These features are no longer used.
31+
# TODO: remove in the next major version.
3032
# Enable to use of unstable functionality.
3133
# This is disabled by default and requires recent nightly compiler.
3234
#
3335
# NOTE: This feature is outside of the normal semver guarantees and minor or
3436
# patch versions of crossbeam may make breaking changes to them at any time.
35-
nightly = ["crossbeam-utils/nightly", "const_fn"]
37+
nightly = ["crossbeam-utils/nightly"]
3638

3739
# Enable the use of loom for concurrency testing.
3840
#
3941
# NOTE: This feature is outside of the normal semver guarantees and minor or
4042
# patch versions of crossbeam may make breaking changes to them at any time.
4143
loom = ["loom-crate", "crossbeam-utils/loom"]
4244

45+
[build-dependencies]
46+
autocfg = "1"
47+
4348
[dependencies]
4449
cfg-if = "1"
45-
const_fn = { version = "0.4.4", optional = true }
4650
memoffset = "0.6"
4751

4852
# Enable the use of loom for concurrency testing.
@@ -67,3 +71,4 @@ default-features = false
6771

6872
[dev-dependencies]
6973
rand = "0.8"
74+
rustversion = "1"

crossbeam-epoch/build.rs

+17
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,18 @@ fn main() {
2929
}
3030
};
3131

32+
let cfg = match autocfg::AutoCfg::new() {
33+
Ok(cfg) => cfg,
34+
Err(e) => {
35+
println!(
36+
"cargo:warning={}: unable to determine rustc version: {}",
37+
env!("CARGO_PKG_NAME"),
38+
e
39+
);
40+
return;
41+
}
42+
};
43+
3244
// Note that this is `no_*`, not `has_*`. This allows treating
3345
// `cfg(target_has_atomic = "ptr")` as true when the build script doesn't
3446
// run. This is needed for compatibility with non-cargo build systems that
@@ -37,5 +49,10 @@ fn main() {
3749
println!("cargo:rustc-cfg=crossbeam_no_atomic_cas");
3850
}
3951

52+
if cfg.probe_rustc_version(1, 61) {
53+
// TODO: invert cfg once Rust 1.61 became stable.
54+
println!("cargo:rustc-cfg=crossbeam_const_fn_trait_bound");
55+
}
56+
4057
println!("cargo:rerun-if-changed=no_atomic.rs");
4158
}

crossbeam-epoch/src/atomic.rs

+11-3
Original file line numberDiff line numberDiff line change
@@ -342,8 +342,16 @@ impl<T: ?Sized + Pointable> Atomic<T> {
342342
///
343343
/// let a = Atomic::<i32>::null();
344344
/// ```
345-
///
346-
#[cfg_attr(all(feature = "nightly", not(crossbeam_loom)), const_fn::const_fn)]
345+
#[cfg(all(crossbeam_const_fn_trait_bound, not(crossbeam_loom)))]
346+
pub const fn null() -> Atomic<T> {
347+
Self {
348+
data: AtomicUsize::new(0),
349+
_marker: PhantomData,
350+
}
351+
}
352+
353+
/// Returns a new null atomic pointer.
354+
#[cfg(not(all(crossbeam_const_fn_trait_bound, not(crossbeam_loom))))]
347355
pub fn null() -> Atomic<T> {
348356
Self {
349357
data: AtomicUsize::new(0),
@@ -1594,7 +1602,7 @@ mod tests {
15941602
Shared::<i64>::null().with_tag(7);
15951603
}
15961604

1597-
#[cfg(feature = "nightly")]
1605+
#[rustversion::since(1.61)]
15981606
#[test]
15991607
fn const_atomic_null() {
16001608
use super::Atomic;

crossbeam-epoch/src/lib.rs

-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@
6262
unreachable_pub
6363
)]
6464
#![cfg_attr(not(feature = "std"), no_std)]
65-
#![cfg_attr(feature = "nightly", feature(const_fn_trait_bound))]
6665

6766
#[cfg(crossbeam_loom)]
6867
extern crate loom_crate as loom;

crossbeam-queue/src/array_queue.rs

+92-35
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,11 @@ struct Slot<T> {
2727
///
2828
/// This queue allocates a fixed-capacity buffer on construction, which is used to store pushed
2929
/// elements. The queue cannot hold more elements than the buffer allows. Attempting to push an
30-
/// element into a full queue will fail. Having a buffer allocated upfront makes this queue a bit
31-
/// faster than [`SegQueue`].
30+
/// element into a full queue will fail. Alternatively, [`force_push`] makes it possible for
31+
/// this queue to be used as a ring-buffer. Having a buffer allocated upfront makes this queue
32+
/// a bit faster than [`SegQueue`].
3233
///
34+
/// [`force_push`]: ArrayQueue::force_push
3335
/// [`SegQueue`]: super::SegQueue
3436
///
3537
/// # Examples
@@ -120,21 +122,10 @@ impl<T> ArrayQueue<T> {
120122
}
121123
}
122124

123-
/// Attempts to push an element into the queue.
124-
///
125-
/// If the queue is full, the element is returned back as an error.
126-
///
127-
/// # Examples
128-
///
129-
/// ```
130-
/// use crossbeam_queue::ArrayQueue;
131-
///
132-
/// let q = ArrayQueue::new(1);
133-
///
134-
/// assert_eq!(q.push(10), Ok(()));
135-
/// assert_eq!(q.push(20), Err(20));
136-
/// ```
137-
pub fn push(&self, value: T) -> Result<(), T> {
125+
fn push_or_else<F>(&self, mut value: T, f: F) -> Result<(), T>
126+
where
127+
F: Fn(T, usize, usize, &Slot<T>) -> Result<T, T>,
128+
{
138129
let backoff = Backoff::new();
139130
let mut tail = self.tail.load(Ordering::Relaxed);
140131

@@ -143,23 +134,23 @@ impl<T> ArrayQueue<T> {
143134
let index = tail & (self.one_lap - 1);
144135
let lap = tail & !(self.one_lap - 1);
145136

137+
let new_tail = if index + 1 < self.cap {
138+
// Same lap, incremented index.
139+
// Set to `{ lap: lap, index: index + 1 }`.
140+
tail + 1
141+
} else {
142+
// One lap forward, index wraps around to zero.
143+
// Set to `{ lap: lap.wrapping_add(1), index: 0 }`.
144+
lap.wrapping_add(self.one_lap)
145+
};
146+
146147
// Inspect the corresponding slot.
147148
debug_assert!(index < self.buffer.len());
148149
let slot = unsafe { self.buffer.get_unchecked(index) };
149150
let stamp = slot.stamp.load(Ordering::Acquire);
150151

151152
// If the tail and the stamp match, we may attempt to push.
152153
if tail == stamp {
153-
let new_tail = if index + 1 < self.cap {
154-
// Same lap, incremented index.
155-
// Set to `{ lap: lap, index: index + 1 }`.
156-
tail + 1
157-
} else {
158-
// One lap forward, index wraps around to zero.
159-
// Set to `{ lap: lap.wrapping_add(1), index: 0 }`.
160-
lap.wrapping_add(self.one_lap)
161-
};
162-
163154
// Try moving the tail.
164155
match self.tail.compare_exchange_weak(
165156
tail,
@@ -182,14 +173,7 @@ impl<T> ArrayQueue<T> {
182173
}
183174
} else if stamp.wrapping_add(self.one_lap) == tail + 1 {
184175
atomic::fence(Ordering::SeqCst);
185-
let head = self.head.load(Ordering::Relaxed);
186-
187-
// If the head lags one lap behind the tail as well...
188-
if head.wrapping_add(self.one_lap) == tail {
189-
// ...then the queue is full.
190-
return Err(value);
191-
}
192-
176+
value = f(value, tail, new_tail, slot)?;
193177
backoff.spin();
194178
tail = self.tail.load(Ordering::Relaxed);
195179
} else {
@@ -200,6 +184,79 @@ impl<T> ArrayQueue<T> {
200184
}
201185
}
202186

187+
/// Attempts to push an element into the queue.
188+
///
189+
/// If the queue is full, the element is returned back as an error.
190+
///
191+
/// # Examples
192+
///
193+
/// ```
194+
/// use crossbeam_queue::ArrayQueue;
195+
///
196+
/// let q = ArrayQueue::new(1);
197+
///
198+
/// assert_eq!(q.push(10), Ok(()));
199+
/// assert_eq!(q.push(20), Err(20));
200+
/// ```
201+
pub fn push(&self, value: T) -> Result<(), T> {
202+
self.push_or_else(value, |v, tail, _, _| {
203+
let head = self.head.load(Ordering::Relaxed);
204+
205+
// If the head lags one lap behind the tail as well...
206+
if head.wrapping_add(self.one_lap) == tail {
207+
// ...then the queue is full.
208+
Err(v)
209+
} else {
210+
Ok(v)
211+
}
212+
})
213+
}
214+
215+
/// Pushes an element into the queue, replacing the oldest element if necessary.
216+
///
217+
/// If the queue is full, the oldest element is replaced and returned,
218+
/// otherwise `None` is returned.
219+
///
220+
/// # Examples
221+
///
222+
/// ```
223+
/// use crossbeam_queue::ArrayQueue;
224+
///
225+
/// let q = ArrayQueue::new(2);
226+
///
227+
/// assert_eq!(q.force_push(10), None);
228+
/// assert_eq!(q.force_push(20), None);
229+
/// assert_eq!(q.force_push(30), Some(10));
230+
/// assert_eq!(q.pop(), Some(20));
231+
/// ```
232+
pub fn force_push(&self, value: T) -> Option<T> {
233+
self.push_or_else(value, |v, tail, new_tail, slot| {
234+
let head = tail.wrapping_sub(self.one_lap);
235+
let new_head = new_tail.wrapping_sub(self.one_lap);
236+
237+
// Try moving the head.
238+
if self
239+
.head
240+
.compare_exchange_weak(head, new_head, Ordering::SeqCst, Ordering::Relaxed)
241+
.is_ok()
242+
{
243+
// Move the tail.
244+
self.tail.store(new_tail, Ordering::SeqCst);
245+
246+
// Swap the previous value.
247+
let old = unsafe { slot.value.get().replace(MaybeUninit::new(v)).assume_init() };
248+
249+
// Update the stamp.
250+
slot.stamp.store(tail + 1, Ordering::Release);
251+
252+
Err(old)
253+
} else {
254+
Ok(v)
255+
}
256+
})
257+
.err()
258+
}
259+
203260
/// Attempts to pop an element from the queue.
204261
///
205262
/// If the queue is empty, `None` is returned.

crossbeam-queue/tests/array_queue.rs

+92-1
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,45 @@ fn spsc() {
144144
.unwrap();
145145
}
146146

147+
#[cfg_attr(miri, ignore)] // Miri is too slow
148+
#[test]
149+
fn spsc_ring_buffer() {
150+
const COUNT: usize = 100_000;
151+
152+
let t = AtomicUsize::new(1);
153+
let q = ArrayQueue::<usize>::new(3);
154+
let v = (0..COUNT).map(|_| AtomicUsize::new(0)).collect::<Vec<_>>();
155+
156+
scope(|scope| {
157+
scope.spawn(|_| loop {
158+
match t.load(Ordering::SeqCst) {
159+
0 if q.is_empty() => break,
160+
161+
_ => {
162+
while let Some(n) = q.pop() {
163+
v[n].fetch_add(1, Ordering::SeqCst);
164+
}
165+
}
166+
}
167+
});
168+
169+
scope.spawn(|_| {
170+
for i in 0..COUNT {
171+
if let Some(n) = q.force_push(i) {
172+
v[n].fetch_add(1, Ordering::SeqCst);
173+
}
174+
}
175+
176+
t.fetch_sub(1, Ordering::SeqCst);
177+
});
178+
})
179+
.unwrap();
180+
181+
for c in v {
182+
assert_eq!(c.load(Ordering::SeqCst), 1);
183+
}
184+
}
185+
147186
#[cfg_attr(miri, ignore)] // Miri is too slow
148187
#[test]
149188
fn mpmc() {
@@ -181,6 +220,50 @@ fn mpmc() {
181220
}
182221
}
183222

223+
#[cfg_attr(miri, ignore)] // Miri is too slow
224+
#[test]
225+
fn mpmc_ring_buffer() {
226+
const COUNT: usize = 25_000;
227+
const THREADS: usize = 4;
228+
229+
let t = AtomicUsize::new(THREADS);
230+
let q = ArrayQueue::<usize>::new(3);
231+
let v = (0..COUNT).map(|_| AtomicUsize::new(0)).collect::<Vec<_>>();
232+
233+
scope(|scope| {
234+
for _ in 0..THREADS {
235+
scope.spawn(|_| loop {
236+
match t.load(Ordering::SeqCst) {
237+
0 if q.is_empty() => break,
238+
239+
_ => {
240+
while let Some(n) = q.pop() {
241+
v[n].fetch_add(1, Ordering::SeqCst);
242+
}
243+
}
244+
}
245+
});
246+
}
247+
248+
for _ in 0..THREADS {
249+
scope.spawn(|_| {
250+
for i in 0..COUNT {
251+
if let Some(n) = q.force_push(i) {
252+
v[n].fetch_add(1, Ordering::SeqCst);
253+
}
254+
}
255+
256+
t.fetch_sub(1, Ordering::SeqCst);
257+
});
258+
}
259+
})
260+
.unwrap();
261+
262+
for c in v {
263+
assert_eq!(c.load(Ordering::SeqCst), THREADS);
264+
}
265+
}
266+
184267
#[cfg_attr(miri, ignore)] // Miri is too slow
185268
#[test]
186269
fn drops() {
@@ -244,13 +327,21 @@ fn linearizable() {
244327
let q = ArrayQueue::new(THREADS);
245328

246329
scope(|scope| {
247-
for _ in 0..THREADS {
330+
for _ in 0..THREADS / 2 {
248331
scope.spawn(|_| {
249332
for _ in 0..COUNT {
250333
while q.push(0).is_err() {}
251334
q.pop().unwrap();
252335
}
253336
});
337+
338+
scope.spawn(|_| {
339+
for _ in 0..COUNT {
340+
if q.force_push(0).is_none() {
341+
q.pop().unwrap();
342+
}
343+
}
344+
});
254345
}
255346
})
256347
.unwrap();

0 commit comments

Comments
 (0)