-
-
Notifications
You must be signed in to change notification settings - Fork 73
/
Copy pathnmf.jl
127 lines (120 loc) · 5.3 KB
/
nmf.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
function nnmf(X, W, H)
# H update
H = (H .* (W' * (X ./ (W * H)))
./ (sum(W; dims=1))')
# W update
W = (W .* ((X ./ (W * H)) * (H'))
./ (sum(H; dims=2)'))
# error estimate
X - W * H
end
theory_flops(nrow, ncol, nfeatures) = 11 * ncol * nrow * nfeatures + 2 * (ncol + nrow) * nfeatures
function nmf_suite(ctx; method, accels)
suite = BenchmarkGroup()
accel = !isempty(accels) ? only(accels) : "cpu"
X = Ref{Any}()
W = Ref{Any}()
H = Ref{Any}()
for scale in scales
ncol = 2001 * scale
nrow = 1002
nfeatures = 12
if method == "raw"
suite["NNMF scaled by: $scale"] = @benchmarkable begin
nnmf($X[], $W[], $H[])
end setup=begin
_scale = $scale
@info "Starting non-Dagger NNMF (scale by $_scale)"
if $accel == "cuda"
$X[] = CUDA.rand(Float32, $nrow, $ncol)
$W[] = CUDA.rand(Float32, $nrow, $nfeatures)
$H[] = CUDA.rand(Float32, $nfeatures, $ncol)
elseif $accel == "amdgpu"
$X[] = ROCArray(rand(Float32, $nrow, $ncol))
$W[] = ROCArray(rand(Float32, $nrow, $nfeatures))
$H[] = ROCArray(rand(Float32, $nfeatures, $ncol))
elseif $accel == "cpu"
$X[] = rand(Float32, $nrow, $ncol)
$W[] = rand(Float32, $nrow, $nfeatures)
$H[] = rand(Float32, $nfeatures, $ncol)
end
end teardown=begin
$X[] = nothing
$W[] = nothing
$H[] = nothing
@everywhere GC.gc()
end
elseif method == "dagger"
RENDERS[scale] = Dict{Int,Vector}()
nw = length(workers())
nsuite = BenchmarkGroup()
while nw > 0
scope = if accel == "cuda"
error("Not implemented")
Dagger.Sch.SchedulerOptions(;proctypes=[
DaggerGPU.CuArrayDeviceProc
])
elseif accel == "amdgpu"
error("Not implemented")
Dagger.Sch.SchedulerOptions(;proctypes=[
DaggerGPU.ROCArrayProc
])
elseif accel == "cpu"
scope = Dagger.scope(;workers=workers()[1:nw])
else
error("Unknown accelerator $accel")
end
#bsz = ncol ÷ length(workers())
bsz = ncol ÷ 64
nsuite["Workers: $nw"] = @benchmarkable begin
Dagger.with_options(;scope=$scope) do
fetch(nnmf($X[], $W[], $H[]))
end
end setup=begin
_nw, _scale = $nw, $scale
@info "Starting $_nw worker Dagger NNMF (scale by $_scale)"
if $accel == "cuda"
# FIXME: Allocate with CUDA.rand if possible
$X[] = Dagger.mapchunks(CuArray, rand(Blocks($bsz, $bsz), Float32, $nrow, $ncol))
$W[] = Dagger.mapchunks(CuArray, rand(Blocks($bsz, $bsz), Float32, $nrow, $nfeatures))
$H[] = Dagger.mapchunks(CuArray, rand(Blocks($bsz, $bsz), Float32, $nfeatures, $ncol))
elseif $accel == "amdgpu"
$X[] = Dagger.mapchunks(ROCArray, rand(Blocks($bsz, $bsz), Float32, $nrow, $ncol))
$W[] = Dagger.mapchunks(ROCArray, rand(Blocks($bsz, $bsz), Float32, $nrow, $nfeatures))
$H[] = Dagger.mapchunks(ROCArray, rand(Blocks($bsz, $bsz), Float32, $nfeatures, $ncol))
elseif $accel == "cpu"
$X[] = rand(Blocks($bsz, $bsz), Float32, $nrow, $ncol)
$W[] = rand(Blocks($bsz, $bsz), Float32, $nrow, $nfeatures)
$H[] = rand(Blocks($bsz, $bsz), Float32, $nfeatures, $ncol)
end
end teardown=begin
if render != "" && !live
Dagger.continue_rendering[] = false
for i in 1:5
isready(Dagger.render_results) && break
sleep(1)
end
if isready(Dagger.render_results)
video_paths = take!(Dagger.render_results)
try
video_data = Dict(key=>read(video_paths[key]) for key in keys(video_paths))
push!(get!(()->[], RENDERS[$scale], $nw), video_data)
catch err
@error "Failed to process render results" exception=(err,catch_backtrace())
end
else
@warn "Failed to fetch render results"
end
end
$X[] = nothing
$W[] = nothing
$H[] = nothing
@everywhere GC.gc()
end
nw ÷= 2
end
suite["NNMF scaled by: $scale"] = nsuite
end
end
suite
end