@@ -9,7 +9,7 @@ const LAUNCH_KWARGS = [:global_size, :local_size, :queue]
9
9
10
10
macro opencl (ex... )
11
11
call = ex[end ]
12
- kwargs = map (ex[1 : end - 1 ]) do kwarg
12
+ kwargs = map (ex[1 : ( end - 1 ) ]) do kwarg
13
13
if kwarg isa Symbol
14
14
:($ kwarg = $ kwarg)
15
15
elseif Meta. isexpr (kwarg, :(= ))
@@ -31,14 +31,14 @@ macro opencl(ex...)
31
31
macro_kwargs, compiler_kwargs, call_kwargs, other_kwargs =
32
32
split_kwargs (kwargs, MACRO_KWARGS, COMPILER_KWARGS, LAUNCH_KWARGS)
33
33
if ! isempty (other_kwargs)
34
- key,val = first (other_kwargs). args
34
+ key, val = first (other_kwargs). args
35
35
throw (ArgumentError (" Unsupported keyword argument '$key '" ))
36
36
end
37
37
38
38
# handle keyword arguments that influence the macro's behavior
39
39
launch = true
40
40
for kwarg in macro_kwargs
41
- key,val = kwarg. args
41
+ key, val = kwarg. args
42
42
if key == :launch
43
43
isa (val, Bool) || throw (ArgumentError (" `launch` keyword argument to @opencl should be a constant value" ))
44
44
launch = val:: Bool
@@ -56,7 +56,8 @@ macro opencl(ex...)
56
56
57
57
# convert the arguments, call the compiler and launch the kernel
58
58
# while keeping the original arguments alive
59
- push! (code. args,
59
+ push! (
60
+ code. args,
60
61
quote
61
62
$ f_var = $ f
62
63
GC. @preserve $ (vars... ) $ f_var begin
@@ -69,13 +70,16 @@ macro opencl(ex...)
69
70
end
70
71
$ kernel
71
72
end
72
- end )
73
+ end
74
+ )
73
75
74
- return esc (quote
75
- let
76
- $ code
76
+ return esc (
77
+ quote
78
+ let
79
+ $ code
80
+ end
77
81
end
78
- end )
82
+ )
79
83
end
80
84
81
85
@@ -101,21 +105,23 @@ end
101
105
# Base.RefValue isn't GPU compatible, so provide a compatible alternative
102
106
# TODO : port improvements from CUDA.jl
103
107
struct CLRefValue{T} <: Ref{T}
104
- x:: T
108
+ x:: T
105
109
end
106
110
Base. getindex (r:: CLRefValue ) = r. x
107
111
Adapt. adapt_structure (to:: KernelAdaptor , r:: Base.RefValue ) = CLRefValue (adapt (to, r[]))
108
112
109
113
# broadcast sometimes passes a ref(type), resulting in a GPU-incompatible DataType box.
110
114
# avoid that by using a special kind of ref that knows about the boxed type.
111
115
struct CLRefType{T} <: Ref{DataType} end
112
- Base. getindex (r:: CLRefType{T} ) where T = T
113
- Adapt. adapt_structure (to:: KernelAdaptor , r:: Base.RefValue{<:Union{DataType,Type}} ) =
116
+ Base. getindex (r:: CLRefType{T} ) where {T} = T
117
+ Adapt. adapt_structure (to:: KernelAdaptor , r:: Base.RefValue{<:Union{DataType, Type}} ) =
114
118
CLRefType {r[]} ()
115
119
116
120
# case where type is the function being broadcasted
117
- Adapt. adapt_structure (to:: KernelAdaptor ,
118
- bc:: Broadcast.Broadcasted{Style, <:Any, Type{T}} ) where {Style, T} =
121
+ Adapt. adapt_structure (
122
+ to:: KernelAdaptor ,
123
+ bc:: Broadcast.Broadcasted{Style, <:Any, Type{T}}
124
+ ) where {Style, T} =
119
125
Broadcast. Broadcasted {Style} ((x... ) -> T (x... ), adapt (to, bc. args), bc. axes)
120
126
121
127
"""
@@ -131,29 +137,30 @@ register methods for the the `OpenCL.KernelAdaptor` type.
131
137
The `pointers` argument is used to collect pointers to indirect SVM buffers, which need to
132
138
be registered with OpenCL before invoking the kernel.
133
139
"""
134
- function clconvert (arg, pointers:: Vector{Ptr{Cvoid}} = Ptr{Cvoid}[])
135
- adapt (KernelAdaptor (pointers), arg)
140
+ function clconvert (arg, pointers:: Vector{Ptr{Cvoid}} = Ptr{Cvoid}[])
141
+ return adapt (KernelAdaptor (pointers), arg)
136
142
end
137
143
138
144
139
-
140
145
# # abstract kernel functionality
141
146
142
- abstract type AbstractKernel{F,TT} end
147
+ abstract type AbstractKernel{F, TT} end
143
148
144
- @inline @generated function (kernel:: AbstractKernel{F,TT} )(args... ;
145
- call_kwargs... ) where {F,TT}
149
+ @inline @generated function (kernel:: AbstractKernel{F, TT} )(
150
+ args... ;
151
+ call_kwargs...
152
+ ) where {F, TT}
146
153
sig = Tuple{F, TT. parameters... } # Base.signature_type with a function type
147
- args = (:(kernel. f), (:( clconvert (args[$ i], svm_pointers) ) for i in 1 : length (args)). .. )
154
+ args = (:(kernel. f), (:(clconvert (args[$ i], svm_pointers)) for i in 1 : length (args)). .. )
148
155
149
156
# filter out ghost arguments that shouldn't be passed
150
157
predicate = dt -> GPUCompiler. isghosttype (dt) || Core. Compiler. isconstType (dt)
151
158
to_pass = map (! predicate, sig. parameters)
152
- call_t = Type[x[1 ] for x in zip (sig. parameters, to_pass) if x[2 ]]
153
- call_args = Union{Expr,Symbol}[x[1 ] for x in zip (args, to_pass) if x[2 ]]
159
+ call_t = Type[x[1 ] for x in zip (sig. parameters, to_pass) if x[2 ]]
160
+ call_args = Union{Expr, Symbol}[x[1 ] for x in zip (args, to_pass) if x[2 ]]
154
161
155
162
# replace non-isbits arguments (they should be unused, or compilation would have failed)
156
- for (i,dt) in enumerate (call_t)
163
+ for (i, dt) in enumerate (call_t)
157
164
if ! isbitstype (dt)
158
165
call_t[i] = Ptr{Any}
159
166
call_args[i] = :C_NULL
@@ -163,17 +170,16 @@ abstract type AbstractKernel{F,TT} end
163
170
# finalize types
164
171
call_tt = Base. to_tuple_type (call_t)
165
172
166
- quote
173
+ return quote
167
174
svm_pointers = Ptr{Cvoid}[]
168
175
$ cl. clcall (kernel. fun, $ call_tt, $ (call_args... ); svm_pointers, call_kwargs... )
169
176
end
170
177
end
171
178
172
179
173
-
174
180
# # host-side kernels
175
181
176
- struct HostKernel{F,TT} <: AbstractKernel{F,TT}
182
+ struct HostKernel{F, TT} <: AbstractKernel{F, TT}
177
183
f:: F
178
184
fun:: cl.Kernel
179
185
end
183
189
184
190
const clfunction_lock = ReentrantLock ()
185
191
186
- function clfunction (f:: F , tt:: TT = Tuple{}; kwargs... ) where {F,TT}
192
+ function clfunction (f:: F , tt:: TT = Tuple{}; kwargs... ) where {F, TT}
187
193
ctx = context ()
188
194
dev = device ()
189
195
@@ -200,10 +206,10 @@ function clfunction(f::F, tt::TT=Tuple{}; kwargs...) where {F,TT}
200
206
kernel = get (_kernel_instances, h, nothing )
201
207
if kernel === nothing
202
208
# create the kernel state object
203
- kernel = HostKernel {F,tt} (f, fun)
209
+ kernel = HostKernel {F, tt} (f, fun)
204
210
_kernel_instances[h] = kernel
205
211
end
206
- return kernel:: HostKernel{F,tt}
212
+ return kernel:: HostKernel{F, tt}
207
213
end
208
214
end
209
215
0 commit comments