Skip to content

Commit f2aafea

Browse files
committed
Add spawn_datadeps for OMP-like task model
1 parent 7754329 commit f2aafea

File tree

10 files changed

+876
-12
lines changed

10 files changed

+876
-12
lines changed

Diff for: Manifest.toml

+46-12
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,13 @@
22

33
julia_version = "1.8.5"
44
manifest_format = "2.0"
5-
project_hash = "8da7911e4788068aaea8c0ef8589d674bce0fb39"
5+
project_hash = "63ad89f514e49fbb0061c336a95c9098f89440c9"
6+
7+
[[deps.ArnoldiMethod]]
8+
deps = ["LinearAlgebra", "Random", "StaticArrays"]
9+
git-tree-sha1 = "62e51b39331de8911e4a7ff6f5aaf38a5f4cc0ae"
10+
uuid = "ec485272-7323-5ecc-a04f-4719b315124d"
11+
version = "0.2.0"
612

713
[[deps.Artifacts]]
814
uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
@@ -12,9 +18,9 @@ uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
1218

1319
[[deps.ChainRulesCore]]
1420
deps = ["Compat", "LinearAlgebra", "SparseArrays"]
15-
git-tree-sha1 = "2118cb2765f8197b08e5958cdd17c165427425ee"
21+
git-tree-sha1 = "0d12ee16b3f62e4e33c3277773730a5b21a74152"
1622
uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
17-
version = "1.19.0"
23+
version = "1.20.0"
1824

1925
[[deps.ChangesOfVariables]]
2026
deps = ["InverseFunctions", "LinearAlgebra", "Test"]
@@ -23,26 +29,26 @@ uuid = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0"
2329
version = "0.1.8"
2430

2531
[[deps.Compat]]
26-
deps = ["Dates", "LinearAlgebra", "UUIDs"]
27-
git-tree-sha1 = "886826d76ea9e72b35fcd000e535588f7b60f21d"
32+
deps = ["Dates", "LinearAlgebra", "TOML", "UUIDs"]
33+
git-tree-sha1 = "75bd5b6fc5089df449b5d35fa501c846c9b6549b"
2834
uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
29-
version = "4.10.1"
35+
version = "4.12.0"
3036

3137
[[deps.CompilerSupportLibraries_jll]]
3238
deps = ["Artifacts", "Libdl"]
3339
uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
3440
version = "1.0.1+0"
3541

3642
[[deps.DataAPI]]
37-
git-tree-sha1 = "8da84edb865b0b5b0100c0666a9bc9a0b71c553c"
43+
git-tree-sha1 = "abe83f3a2f1b857aac70ef8b269080af17764bbe"
3844
uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
39-
version = "1.15.0"
45+
version = "1.16.0"
4046

4147
[[deps.DataStructures]]
4248
deps = ["Compat", "InteractiveUtils", "OrderedCollections"]
43-
git-tree-sha1 = "3dbd312d370723b6bb43ba9d02fc36abade4518d"
49+
git-tree-sha1 = "ac67408d9ddf207de5cfa9a97e114352430f01ed"
4450
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
45-
version = "0.18.15"
51+
version = "0.18.16"
4652

4753
[[deps.Dates]]
4854
deps = ["Printf"]
@@ -58,11 +64,22 @@ git-tree-sha1 = "2fb1e02f2b635d0845df5d7c167fec4dd739b00d"
5864
uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
5965
version = "0.9.3"
6066

67+
[[deps.Graphs]]
68+
deps = ["ArnoldiMethod", "Compat", "DataStructures", "Distributed", "Inflate", "LinearAlgebra", "Random", "SharedArrays", "SimpleTraits", "SparseArrays", "Statistics"]
69+
git-tree-sha1 = "899050ace26649433ef1af25bc17a815b3db52b7"
70+
uuid = "86223c79-3864-5bf0-83f7-82e725a168b6"
71+
version = "1.9.0"
72+
6173
[[deps.HashArrayMappedTries]]
6274
git-tree-sha1 = "2eaa69a7cab70a52b9687c8bf950a5a93ec895ae"
6375
uuid = "076d061b-32b6-4027-95e0-9a2c6f6d7e74"
6476
version = "0.2.0"
6577

78+
[[deps.Inflate]]
79+
git-tree-sha1 = "ea8031dea4aff6bd41f1df8f2fdfb25b33626381"
80+
uuid = "d25df0c9-e2be-5dd7-82c8-3ad0b3e990b9"
81+
version = "0.1.4"
82+
6683
[[deps.InteractiveUtils]]
6784
deps = ["Markdown"]
6885
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
@@ -100,9 +117,9 @@ uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
100117

101118
[[deps.MacroTools]]
102119
deps = ["Markdown", "Random"]
103-
git-tree-sha1 = "b211c553c199c111d998ecdaf7623d1b89b69f93"
120+
git-tree-sha1 = "2fa9ee3e63fd3a4f7a9a4f4744a52f4856de82df"
104121
uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
105-
version = "0.5.12"
122+
version = "0.5.13"
106123

107124
[[deps.Markdown]]
108125
deps = ["Base64"]
@@ -184,6 +201,12 @@ uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
184201
deps = ["Distributed", "Mmap", "Random", "Serialization"]
185202
uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
186203

204+
[[deps.SimpleTraits]]
205+
deps = ["InteractiveUtils", "MacroTools"]
206+
git-tree-sha1 = "5d7e3f4e11935503d3ecaf7186eac40602e7d231"
207+
uuid = "699a6c99-e7fa-54fc-8d76-47d257e15c1d"
208+
version = "0.9.4"
209+
187210
[[deps.Sockets]]
188211
uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
189212

@@ -197,6 +220,17 @@ version = "1.2.1"
197220
deps = ["LinearAlgebra", "Random"]
198221
uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
199222

223+
[[deps.StaticArrays]]
224+
deps = ["LinearAlgebra", "PrecompileTools", "Random", "StaticArraysCore", "Statistics"]
225+
git-tree-sha1 = "f68dd04d131d9a8a8eb836173ee8f105c360b0c5"
226+
uuid = "90137ffa-7385-5640-81b9-e52037218182"
227+
version = "1.9.1"
228+
229+
[[deps.StaticArraysCore]]
230+
git-tree-sha1 = "36b3d696ce6366023a0ea192b4cd442268995a0d"
231+
uuid = "1e83bf80-4336-4d27-bf5d-d5a4f845583c"
232+
version = "1.4.2"
233+
200234
[[deps.Statistics]]
201235
deps = ["LinearAlgebra", "SparseArrays"]
202236
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"

Diff for: Project.toml

+2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ version = "0.18.6"
55
[deps]
66
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
77
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
8+
Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
89
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
910
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
1011
MemPool = "f9f48841-c794-520a-933b-121f7ba6ed94"
@@ -23,6 +24,7 @@ UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
2324

2425
[compat]
2526
DataStructures = "0.18"
27+
Graphs = "1"
2628
MacroTools = "0.5"
2729
MemPool = "0.4.6"
2830
PrecompileTools = "1.2"

Diff for: docs/make.jl

+1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ makedocs(;
2222
"Scopes" => "scopes.md",
2323
"Processors" => "processors.md",
2424
"Task Queues" => "task-queues.md",
25+
"Datadeps" => "datadeps.md",
2526
"Option Propagation" => "propagation.md",
2627
"Logging and Graphing" => "logging.md",
2728
"Checkpointing" => "checkpointing.md",

Diff for: docs/src/datadeps.md

+96
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
# Datadeps (Data Dependencies)
2+
3+
For many programs, the restriction that tasks cannot write to their arguments
4+
feels overly restrictive and makes certain kinds of programs (such as in-place
5+
linear algebra) hard to express efficiently in Dagger. Thankfully, there is a
6+
solution: `spawn_datadeps`. This function constructs a "datadeps region",
7+
within which tasks are allowed to write to their arguments, with parallelism
8+
controlled via dependencies specified via argument annotations. Let's look at
9+
a simple example to make things concrete:
10+
11+
```julia
12+
A = rand(1000)
13+
B = rand(1000)
14+
C = zeros(1000)
15+
add!(X, Y) = X .+= Y
16+
Dagger.spawn_datadeps() do
17+
Dagger.@spawn add!(InOut(B), In(A))
18+
Dagger.@spawn copyto!(Out(C), In(B))
19+
end
20+
```
21+
22+
In this example, we have two Dagger tasks being launched, one adding `A` into
23+
`B`, and the other copying `B` into `C`. The `add!` task is specifying that
24+
`A` is being only read from (`In` for "input"), and that `B` is being read
25+
from and written to (`Out` for "output", `InOut` for "input and output"). The
26+
`copyto` task, similarly, is specifying that `B` is being read from, and `C`
27+
is only being written to.
28+
29+
Without `spawn_datadeps` and `In`, `Out`, and `InOut`, the result of these
30+
tasks would be undefined; the two tasks could execute in parallel, or the
31+
`copyto!` could occur before the `add!`, resulting in all kinds of mayhem.
32+
However, `spawn_datadeps` changes things: because we have told Dagger how our
33+
tasks access their arguments, Dagger knows to control the parallelism and
34+
ordering, and ensure that `add!` executes and finishes before `copyto!`
35+
begins, ensuring that `copyto!` "sees" the changes to `B` before executing.
36+
37+
There is another important aspect of `spawn_datadeps` that makes the above
38+
code work: if all of the `Dagger.@spawn` macros are removed, along with the
39+
dependency specifiers, the program would still produce the same results,
40+
without using Dagger. In other words, the parallel (Dagger) version of the
41+
program produces identical results to the serial (non-Dagger) version of the
42+
program. This is similar to using Dagger with purely functional tasks and
43+
without `spawn_datadeps` - removing `Dagger.@spawn` will still result in a
44+
correct (sequential and possibly slower) version of the program. Basically,
45+
`spawn_datadeps` will ensure that Dagger respects the ordering and
46+
dependencies of a program, while still providing parallelism, where possible.
47+
48+
But where is the parallelism? The above example doesn't actually have any
49+
parallelism to exploit! Let's take a look at another example to see the
50+
datadeps model truly shine:
51+
52+
```julia
53+
# Tree reduction of multiple arrays into the first array
54+
function tree_reduce!(op::Base.Callable, As::Vector{<:Array})
55+
Dagger.spawn_datadeps() do
56+
to_reduce = Vector[]
57+
push!(to_reduce, As)
58+
while !isempty(to_reduce)
59+
As = pop!(to_reduce)
60+
n = length(As)
61+
if n == 2
62+
Dagger.@spawn Base.mapreducedim!(identity, op, InOut(As[1]), In(As[2]))
63+
elseif n > 2
64+
push!(to_reduce, [As[1], As[div(n,2)+1]])
65+
push!(to_reduce, As[1:div(n,2)])
66+
push!(to_reduce, As[div(n,2)+1:end])
67+
end
68+
end
69+
end
70+
return As[1]
71+
end
72+
73+
As = [rand(1000) for _ in 1:1000]
74+
Bs = copy.(As)
75+
tree_reduce!(+, As)
76+
@assert isapprox(As[1], reduce((x,y)->x .+ y, Bs))
77+
```
78+
79+
In the above implementation of `tree_reduce!` (which is designed to perform an
80+
elementwise reduction across a vector of arrays), we have a tree reduction
81+
operation where pairs of arrays are reduced, starting with neighboring pairs,
82+
and then reducing pairs of reduction results, etc. until the final result is in
83+
`As[1]`. We can see that the application of Dagger to this algorithm is simple -
84+
only the single `Base.mapreducedim!` call is passed to Dagger - yet due to the
85+
data dependencies and the algorithm's structure, there should be plenty of
86+
parallelism to be exploited across each of the parallel reductions at each
87+
"level" of the reduction tree. Specifically, any two `Dagger.@spawn` calls
88+
which access completely different pairs of arrays can execute in parallel,
89+
while any call which has an `In` on an array will wait for any previous call
90+
which has an `InOut` on that same array.
91+
92+
Additionally, we can notice a powerful feature of this model - if the
93+
`Dagger.@spawn` macro is removed, the code still remains correct, but simply
94+
runs sequentially. This means that the structure of the program doesn't have to
95+
change in order to use Dagger for parallelization, which can make applying
96+
Dagger to existing algorithms quite effortless.

Diff for: src/Dagger.jl

+4
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ include("queue.jl")
3434
include("thunk.jl")
3535
include("submission.jl")
3636
include("chunks.jl")
37+
include("memory-spaces.jl")
3738

3839
# Task scheduling
3940
include("compute.jl")
@@ -42,6 +43,9 @@ include("utils/system_uuid.jl")
4243
include("utils/caching.jl")
4344
include("sch/Sch.jl"); using .Sch
4445

46+
# Data dependency task queue
47+
include("datadeps.jl")
48+
4549
# Array computations
4650
include("array/darray.jl")
4751
include("array/alloc.jl")

0 commit comments

Comments
 (0)