Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 4dd0acc

Browse files
committedFeb 10, 2025·
Allow opt-out of implicit bounds-checking
KernelAbstractions currently creates kernels that look like: ``` if __validindex(ctx) # Body end ``` This is problematic due to the convergence requirement on `@synchronize`.
1 parent f038d8c commit 4dd0acc

File tree

3 files changed

+21
-10
lines changed

3 files changed

+21
-10
lines changed
 

‎examples/histogram.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ function create_histogram(input)
1313
end
1414

1515
# This a 1D histogram kernel where the histogramming happens on shmem
16-
@kernel function histogram_kernel!(histogram_output, input)
16+
@kernel implicit_validindex = false function histogram_kernel!(histogram_output, input)
1717
tid = @index(Global, Linear)
1818
lid = @index(Local, Linear)
1919

‎src/KernelAbstractions.jl

+7-3
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ synchronize(backend)
5050
```
5151
"""
5252
macro kernel(expr)
53-
return __kernel(expr, #=force_inbounds=# false)
53+
return __kernel(expr, #=force_inbounds=# false, #=implicit_validindex=# true)
5454
end
5555

5656
"""
@@ -71,8 +71,9 @@ This allows for two different configurations:
7171
"""
7272
macro kernel(ex...)
7373
if length(ex) == 1
74-
return __kernel(ex[1], false)
74+
return __kernel(ex[1], false, true)
7575
else
76+
implicit_validindex = true
7677
force_inbounds = false
7778
for i in 1:(length(ex) - 1)
7879
if ex[i] isa Expr && ex[i].head == :(=) &&
@@ -81,6 +82,9 @@ macro kernel(ex...)
8182
elseif ex[i] isa Expr && ex[i].head == :(=) &&
8283
ex[i].args[1] == :inbounds && ex[i].args[2] isa Bool
8384
force_inbounds = ex[i].args[2]
85+
elseif ex[i] isa Expr && ex[i].head == :(=) &&
86+
ex[i].args[1] == :implicit_validindex && ex[i].args[2] isa Bool
87+
implicit_validindex = ex[i].args[2]
8488
else
8589
error(
8690
"Configuration should be of form:\n" *
@@ -90,7 +94,7 @@ macro kernel(ex...)
9094
)
9195
end
9296
end
93-
return __kernel(ex[end], force_inbounds)
97+
return __kernel(ex[end], force_inbounds, implicit_validindex)
9498
end
9599
end
96100

‎src/macros.jl

+13-6
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ function find_return(stmt)
1010
end
1111

1212
# XXX: Proper errors
13-
function __kernel(expr, force_inbounds = false)
13+
function __kernel(expr, force_inbounds = false, implicit_validindex = true)
1414
def = splitdef(expr)
1515
name = def[:name]
1616
args = def[:args]
@@ -30,7 +30,7 @@ function __kernel(expr, force_inbounds = false)
3030

3131
def_gpu = deepcopy(def)
3232
def_gpu[:name] = gpu_name = Symbol(:gpu_, name)
33-
transform_gpu!(def_gpu, constargs, force_inbounds)
33+
transform_gpu!(def_gpu, constargs, force_inbounds, implicit_validindex)
3434
gpu_function = combinedef(def_gpu)
3535

3636
# create constructor functions
@@ -50,7 +50,7 @@ end
5050

5151
# The easy case, transform the function for GPU execution
5252
# - mark constant arguments by applying `constify`.
53-
function transform_gpu!(def, constargs, force_inbounds)
53+
function transform_gpu!(def, constargs, force_inbounds, implicit_validindex)
5454
let_constargs = Expr[]
5555
for (i, arg) in enumerate(def[:args])
5656
if constargs[i]
@@ -64,11 +64,18 @@ function transform_gpu!(def, constargs, force_inbounds)
6464
@inbounds $(body)
6565
end
6666
end
67-
body = quote
68-
if $__validindex(__ctx__)
67+
if implicit_validindex
68+
body = quote
69+
if $__validindex(__ctx__)
70+
$(body)
71+
end
72+
return nothing
73+
end
74+
else
75+
body = quote
6976
$(body)
77+
return nothing
7078
end
71-
return nothing
7279
end
7380
def[:body] = Expr(
7481
:let,

0 commit comments

Comments
 (0)
Please sign in to comment.