Skip to content

Commit 3bb80ac

Browse files
committed
Use POCL as a CPU backend
1 parent 31d5b44 commit 3bb80ac

25 files changed

+2541
-411
lines changed

.github/workflows/ci.yml

+1-5
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,8 @@ jobs:
2222
fail-fast: false
2323
matrix:
2424
version:
25-
- '1.6'
26-
- '1.7'
27-
- '1.8'
28-
- '1.9'
2925
- '1.10'
30-
- '~1.11.0-0'
26+
- '1.11'
3127
os:
3228
- ubuntu-latest
3329
- macOS-latest

Project.toml

+18-17
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,32 @@
11
name = "KernelAbstractions"
22
uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
33
authors = ["Valentin Churavy <[email protected]> and contributors"]
4-
version = "0.9.33"
4+
version = "0.10.0-dev"
55

66
[deps]
77
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
88
Atomix = "a9b6321e-bd34-4604-b9c9-b65b8de01458"
9-
EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"
9+
GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55"
1010
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
11-
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
11+
LLVM = "929cbde3-209d-540e-8aea-75f648917ca0"
1212
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
13+
OpenCL_jll = "6cb37087-e8b6-5417-8430-1f242f1e46e4"
1314
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
14-
Requires = "ae029012-a4dd-5104-9daa-d747884805df"
15-
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
15+
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
16+
SPIRVIntrinsics = "71d1d633-e7e8-4a92-83a1-de8814b09ba8"
1617
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
1718
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
19+
pocl_jll = "627d6b7a-bbe6-5189-83e7-98cc0a5aeadd"
20+
21+
[weakdeps]
22+
EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"
23+
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
24+
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
25+
26+
[extensions]
27+
EnzymeExt = "EnzymeCore"
28+
LinearAlgebraExt = "LinearAlgebra"
29+
SparseArraysExt = "SparseArrays"
1830

1931
[compat]
2032
Adapt = "0.4, 1.0, 2.0, 3.0, 4"
@@ -24,23 +36,12 @@ InteractiveUtils = "1.6"
2436
LinearAlgebra = "1.6"
2537
MacroTools = "0.5"
2638
PrecompileTools = "1"
27-
Requires = "1.3"
2839
SparseArrays = "<0.0.1, 1.6"
2940
StaticArrays = "0.12, 1.0"
3041
UUIDs = "<0.0.1, 1.6"
31-
julia = "1.6"
32-
33-
[extensions]
34-
EnzymeExt = "EnzymeCore"
35-
LinearAlgebraExt = "LinearAlgebra"
36-
SparseArraysExt = "SparseArrays"
42+
julia = "1.10"
3743

3844
[extras]
3945
EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"
4046
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
4147
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
42-
43-
[weakdeps]
44-
EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"
45-
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
46-
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"

examples/histogram.jl

+24-28
Original file line numberDiff line numberDiff line change
@@ -74,32 +74,28 @@ function move(backend, input)
7474
end
7575

7676
@testset "histogram tests" begin
77-
if Base.VERSION < v"1.7.0" && !KernelAbstractions.isgpu(backend)
78-
@test_skip false
79-
else
80-
rand_input = [rand(1:128) for i in 1:1000]
81-
linear_input = [i for i in 1:1024]
82-
all_two = [2 for i in 1:512]
83-
84-
histogram_rand_baseline = create_histogram(rand_input)
85-
histogram_linear_baseline = create_histogram(linear_input)
86-
histogram_two_baseline = create_histogram(all_two)
87-
88-
rand_input = move(backend, rand_input)
89-
linear_input = move(backend, linear_input)
90-
all_two = move(backend, all_two)
91-
92-
rand_histogram = KernelAbstractions.zeros(backend, Int, 128)
93-
linear_histogram = KernelAbstractions.zeros(backend, Int, 1024)
94-
two_histogram = KernelAbstractions.zeros(backend, Int, 2)
95-
96-
histogram!(rand_histogram, rand_input)
97-
histogram!(linear_histogram, linear_input)
98-
histogram!(two_histogram, all_two)
99-
KernelAbstractions.synchronize(CPU())
100-
101-
@test isapprox(Array(rand_histogram), histogram_rand_baseline)
102-
@test isapprox(Array(linear_histogram), histogram_linear_baseline)
103-
@test isapprox(Array(two_histogram), histogram_two_baseline)
104-
end
77+
rand_input = [rand(1:128) for i in 1:1000]
78+
linear_input = [i for i in 1:1024]
79+
all_two = [2 for i in 1:512]
80+
81+
histogram_rand_baseline = create_histogram(rand_input)
82+
histogram_linear_baseline = create_histogram(linear_input)
83+
histogram_two_baseline = create_histogram(all_two)
84+
85+
rand_input = move(backend, rand_input)
86+
linear_input = move(backend, linear_input)
87+
all_two = move(backend, all_two)
88+
89+
rand_histogram = KernelAbstractions.zeros(backend, Int, 128)
90+
linear_histogram = KernelAbstractions.zeros(backend, Int, 1024)
91+
two_histogram = KernelAbstractions.zeros(backend, Int, 2)
92+
93+
histogram!(rand_histogram, rand_input)
94+
histogram!(linear_histogram, linear_input)
95+
histogram!(two_histogram, all_two)
96+
KernelAbstractions.synchronize(CPU())
97+
98+
@test isapprox(Array(rand_histogram), histogram_rand_baseline)
99+
@test isapprox(Array(linear_histogram), histogram_linear_baseline)
100+
@test isapprox(Array(two_histogram), histogram_two_baseline)
105101
end

examples/naive_transpose.jl

+1-2
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@ function naive_transpose!(a, b)
1515
end
1616
backend = get_backend(a)
1717
@assert get_backend(b) == backend
18-
groupsize = KernelAbstractions.isgpu(backend) ? 256 : 1024
19-
kernel! = naive_transpose_kernel!(backend, groupsize)
18+
kernel! = naive_transpose_kernel!(backend, 256)
2019
kernel!(a, b, ndrange = size(a))
2120
return
2221
end

src/KernelAbstractions.jl

+32-48
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ synchronize(backend)
5050
```
5151
"""
5252
macro kernel(expr)
53-
return __kernel(expr, #=generate_cpu=# true, #=force_inbounds=# false)
53+
return __kernel(expr, #=force_inbounds=# false)
5454
end
5555

5656
"""
@@ -65,17 +65,19 @@ This allows for two different configurations:
6565
6666
!!! warn
6767
This is an experimental feature.
68+
69+
!!! note
70+
`cpu={true, false}` is deprecated for KernelAbstractions 1.0
6871
"""
6972
macro kernel(ex...)
7073
if length(ex) == 1
71-
return __kernel(ex[1], true, false)
74+
return __kernel(ex[1], false)
7275
else
73-
generate_cpu = true
7476
force_inbounds = false
7577
for i in 1:(length(ex) - 1)
7678
if ex[i] isa Expr && ex[i].head == :(=) &&
7779
ex[i].args[1] == :cpu && ex[i].args[2] isa Bool
78-
generate_cpu = ex[i].args[2]
80+
#deprecated
7981
elseif ex[i] isa Expr && ex[i].head == :(=) &&
8082
ex[i].args[1] == :inbounds && ex[i].args[2] isa Bool
8183
force_inbounds = ex[i].args[2]
@@ -88,7 +90,7 @@ macro kernel(ex...)
8890
)
8991
end
9092
end
91-
return __kernel(ex[end], generate_cpu, force_inbounds)
93+
return __kernel(ex[end], force_inbounds)
9294
end
9395
end
9496

@@ -184,6 +186,8 @@ After releasing the memory of an array, it should no longer be accessed.
184186
"""
185187
function unsafe_free! end
186188

189+
unsafe_free!(::AbstractArray) = return
190+
187191
###
188192
# Kernel language
189193
# - @localmem
@@ -248,6 +252,9 @@ For storage that only persists between `@synchronize` statements, an `MArray` ca
248252
instead.
249253
250254
See also [`@uniform`](@ref).
255+
256+
!!! note
257+
`@private` is deprecated for KernelAbstractions 1.0
251258
"""
252259
macro private(T, dims)
253260
if dims isa Integer
@@ -263,6 +270,9 @@ end
263270
264271
Creates a private local of `mem` per item in the workgroup. This can be safely used
265272
across [`@synchronize`](@ref) statements.
273+
274+
!!! note
275+
`@private` is deprecated for KernelAbstractions 1.0
266276
"""
267277
macro private(expr)
268278
return esc(expr)
@@ -273,6 +283,9 @@ end
273283
274284
`expr` is evaluated outside the workitem scope. This is useful for variable declarations
275285
that span workitems, or are reused across `@synchronize` statements.
286+
287+
!!! note
288+
`@uniform` is deprecated for KernelAbstractions 1.0
276289
"""
277290
macro uniform(value)
278291
return esc(value)
@@ -324,6 +337,8 @@ Access the hidden context object used by KernelAbstractions.
324337
!!! warn
325338
Only valid to be used from a kernel with `cpu=false`.
326339
340+
!!! note
341+
`@context` will be supported on all backends in KernelAbstractions 1.0
327342
```
328343
function f(@context, a)
329344
I = @index(Global, Linear)
@@ -472,31 +487,11 @@ Abstract type for all GPU based KernelAbstractions backends.
472487
473488
!!! note
474489
New backend implementations **must** sub-type this abstract type.
475-
"""
476-
abstract type GPU <: Backend end
477-
478-
"""
479-
CPU(; static=false)
480-
481-
Instantiate a CPU (multi-threaded) backend.
482-
483-
## Options:
484-
- `static`: Uses a static thread assignment, this can be beneficial for NUMA aware code.
485-
Defaults to false.
486-
"""
487-
struct CPU <: Backend
488-
static::Bool
489-
CPU(; static::Bool = false) = new(static)
490-
end
491-
492-
"""
493-
isgpu(::Backend)::Bool
494490
495-
Returns true for all [`GPU`](@ref) backends.
491+
!!! note
492+
`GPU` will be removed in KernelAbstractions v1.0
496493
"""
497-
isgpu(::GPU) = true
498-
isgpu(::CPU) = false
499-
494+
abstract type GPU <: Backend end
500495

501496
"""
502497
get_backend(A::AbstractArray)::Backend
@@ -512,12 +507,9 @@ function get_backend end
512507
# Should cover SubArray, ReshapedArray, ReinterpretArray, Hermitian, AbstractTriangular, etc.:
513508
get_backend(A::AbstractArray) = get_backend(parent(A))
514509

515-
get_backend(::Array) = CPU()
516-
517510
# Define:
518511
# adapt_storage(::Backend, a::Array) = adapt(BackendArray, a)
519512
# adapt_storage(::Backend, a::BackendArray) = a
520-
Adapt.adapt_storage(::CPU, a::Array) = a
521513

522514
"""
523515
allocate(::Backend, Type, dims...)::AbstractArray
@@ -737,7 +729,7 @@ Partition a kernel for the given ndrange and workgroupsize.
737729
return iterspace, dynamic
738730
end
739731

740-
function construct(backend::Backend, ::S, ::NDRange, xpu_name::XPUName) where {Backend <: Union{CPU, GPU}, S <: _Size, NDRange <: _Size, XPUName}
732+
function construct(backend::Backend, ::S, ::NDRange, xpu_name::XPUName) where {Backend <: GPU, S <: _Size, NDRange <: _Size, XPUName}
741733
return Kernel{Backend, S, NDRange, XPUName}(backend, xpu_name)
742734
end
743735

@@ -754,6 +746,10 @@ include("compiler.jl")
754746
function __workitems_iterspace end
755747
function __validindex end
756748

749+
# for reflection
750+
function mkcontext end
751+
function launch_config end
752+
757753
include("macros.jl")
758754

759755
###
@@ -823,8 +819,11 @@ end
823819
end
824820

825821
# CPU backend
822+
include("pocl/pocl.jl")
823+
using .POCL
824+
export POCLBackend
826825

827-
include("cpu.jl")
826+
const CPU = POCLBackend
828827

829828
# precompile
830829
PrecompileTools.@compile_workload begin
@@ -838,19 +837,4 @@ PrecompileTools.@compile_workload begin
838837
end
839838
end
840839

841-
if !isdefined(Base, :get_extension)
842-
using Requires
843-
end
844-
845-
@static if !isdefined(Base, :get_extension)
846-
function __init__()
847-
@require EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869" include("../ext/EnzymeExt.jl")
848-
end
849-
end
850-
851-
if !isdefined(Base, :get_extension)
852-
include("../ext/LinearAlgebraExt.jl")
853-
include("../ext/SparseArraysExt.jl")
854-
end
855-
856840
end #module

src/cpu.jl

-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
unsafe_free!(::AbstractArray) = return
21
synchronize(::CPU) = nothing
32

43
allocate(::CPU, ::Type{T}, dims::Tuple) where {T} = Array{T}(undef, dims)

0 commit comments

Comments
 (0)