forked from UoB-HPC/BabelStream
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrayon_stream.rs
77 lines (67 loc) · 1.92 KB
/
rayon_stream.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
use std::iter::Sum;
use rayon::prelude::*;
use rayon::ThreadPool;
use crate::stream::{AllocatorType, ArrayType, RustStream, StreamData};
pub struct RayonDevice {
pub(crate) pool: ThreadPool,
}
// Rayon version, it should be semantically equal to the single threaded version
impl<T: ArrayType + Sync + Send + Sum, A: AllocatorType + Sync + Send> RustStream<T>
for StreamData<T, RayonDevice, A>
{
fn init_arrays(&mut self) {
let init = self.init;
self.a.par_iter_mut().for_each(|v| *v = init.0);
self.b.par_iter_mut().for_each(|v| *v = init.1);
self.c.par_iter_mut().for_each(|v| *v = init.2);
}
fn copy(&mut self) {
let a = &self.a;
let c = &mut self.c;
self.device.pool.install(|| {
(*c).par_iter_mut().enumerate().for_each(|(i, c)| *c = a[i]);
});
}
fn mul(&mut self) {
let scalar = self.scalar;
let c = &self.c;
let b = &mut self.b;
self
.device
.pool
.install(|| (*b).par_iter_mut().enumerate().for_each(|(i, b)| *b = scalar * c[i]));
}
fn add(&mut self) {
let a = &self.a;
let b = &self.b;
let c = &mut self.c;
self.device.pool.install(|| (*c).par_iter_mut().enumerate().for_each(|(i, c)| *c = a[i] + b[i]))
}
fn triad(&mut self) {
let scalar = self.scalar;
let a = &mut self.a;
let b = &self.b;
let c = &self.c;
self
.device
.pool
.install(|| (*a).par_iter_mut().enumerate().for_each(|(i, a)| *a = b[i] + scalar * c[i]))
}
fn nstream(&mut self) {
let scalar = self.scalar;
let a = &mut self.a;
let b = &self.b;
let c = &self.c;
self
.device
.pool
.install(|| (*a).par_iter_mut().enumerate().for_each(|(i, a)| *a += b[i] + scalar * c[i]))
}
fn dot(&mut self) -> T {
let a = &self.a;
let b = &self.b;
self.device.pool.install(|| {
(0..self.size).into_par_iter().fold(|| T::default(), |acc, i| acc + a[i] * b[i]).sum::<T>()
})
}
}