Skip to content

Commit f038d8c

Browse files
committed
Use POCL as a CPU backend
1 parent b435bb2 commit f038d8c

22 files changed

+2534
-494
lines changed

.github/workflows/ci.yml

+1-5
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,8 @@ jobs:
2222
fail-fast: false
2323
matrix:
2424
version:
25-
- '1.6'
26-
- '1.7'
27-
- '1.8'
28-
- '1.9'
2925
- '1.10'
30-
- '~1.11.0-0'
26+
- '1.11'
3127
os:
3228
- ubuntu-latest
3329
- macOS-latest

Project.toml

+18-17
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,32 @@
11
name = "KernelAbstractions"
22
uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
33
authors = ["Valentin Churavy <[email protected]> and contributors"]
4-
version = "0.9.33"
4+
version = "0.10.0-dev"
55

66
[deps]
77
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
88
Atomix = "a9b6321e-bd34-4604-b9c9-b65b8de01458"
9-
EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"
9+
GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55"
1010
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
11-
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
11+
LLVM = "929cbde3-209d-540e-8aea-75f648917ca0"
1212
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
13+
OpenCL_jll = "6cb37087-e8b6-5417-8430-1f242f1e46e4"
1314
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
14-
Requires = "ae029012-a4dd-5104-9daa-d747884805df"
15-
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
15+
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
16+
SPIRVIntrinsics = "71d1d633-e7e8-4a92-83a1-de8814b09ba8"
1617
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
1718
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
19+
pocl_jll = "627d6b7a-bbe6-5189-83e7-98cc0a5aeadd"
20+
21+
[weakdeps]
22+
EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"
23+
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
24+
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
25+
26+
[extensions]
27+
EnzymeExt = "EnzymeCore"
28+
LinearAlgebraExt = "LinearAlgebra"
29+
SparseArraysExt = "SparseArrays"
1830

1931
[compat]
2032
Adapt = "0.4, 1.0, 2.0, 3.0, 4"
@@ -24,23 +36,12 @@ InteractiveUtils = "1.6"
2436
LinearAlgebra = "1.6"
2537
MacroTools = "0.5"
2638
PrecompileTools = "1"
27-
Requires = "1.3"
2839
SparseArrays = "<0.0.1, 1.6"
2940
StaticArrays = "0.12, 1.0"
3041
UUIDs = "<0.0.1, 1.6"
31-
julia = "1.6"
32-
33-
[extensions]
34-
EnzymeExt = "EnzymeCore"
35-
LinearAlgebraExt = "LinearAlgebra"
36-
SparseArraysExt = "SparseArrays"
42+
julia = "1.10"
3743

3844
[extras]
3945
EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"
4046
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
4147
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
42-
43-
[weakdeps]
44-
EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"
45-
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
46-
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"

examples/histogram.jl

+24-28
Original file line numberDiff line numberDiff line change
@@ -74,32 +74,28 @@ function move(backend, input)
7474
end
7575

7676
@testset "histogram tests" begin
77-
if Base.VERSION < v"1.7.0" && !KernelAbstractions.isgpu(backend)
78-
@test_skip false
79-
else
80-
rand_input = [rand(1:128) for i in 1:1000]
81-
linear_input = [i for i in 1:1024]
82-
all_two = [2 for i in 1:512]
83-
84-
histogram_rand_baseline = create_histogram(rand_input)
85-
histogram_linear_baseline = create_histogram(linear_input)
86-
histogram_two_baseline = create_histogram(all_two)
87-
88-
rand_input = move(backend, rand_input)
89-
linear_input = move(backend, linear_input)
90-
all_two = move(backend, all_two)
91-
92-
rand_histogram = KernelAbstractions.zeros(backend, Int, 128)
93-
linear_histogram = KernelAbstractions.zeros(backend, Int, 1024)
94-
two_histogram = KernelAbstractions.zeros(backend, Int, 2)
95-
96-
histogram!(rand_histogram, rand_input)
97-
histogram!(linear_histogram, linear_input)
98-
histogram!(two_histogram, all_two)
99-
KernelAbstractions.synchronize(CPU())
100-
101-
@test isapprox(Array(rand_histogram), histogram_rand_baseline)
102-
@test isapprox(Array(linear_histogram), histogram_linear_baseline)
103-
@test isapprox(Array(two_histogram), histogram_two_baseline)
104-
end
77+
rand_input = [rand(1:128) for i in 1:1000]
78+
linear_input = [i for i in 1:1024]
79+
all_two = [2 for i in 1:512]
80+
81+
histogram_rand_baseline = create_histogram(rand_input)
82+
histogram_linear_baseline = create_histogram(linear_input)
83+
histogram_two_baseline = create_histogram(all_two)
84+
85+
rand_input = move(backend, rand_input)
86+
linear_input = move(backend, linear_input)
87+
all_two = move(backend, all_two)
88+
89+
rand_histogram = KernelAbstractions.zeros(backend, Int, 128)
90+
linear_histogram = KernelAbstractions.zeros(backend, Int, 1024)
91+
two_histogram = KernelAbstractions.zeros(backend, Int, 2)
92+
93+
histogram!(rand_histogram, rand_input)
94+
histogram!(linear_histogram, linear_input)
95+
histogram!(two_histogram, all_two)
96+
KernelAbstractions.synchronize(CPU())
97+
98+
@test isapprox(Array(rand_histogram), histogram_rand_baseline)
99+
@test isapprox(Array(linear_histogram), histogram_linear_baseline)
100+
@test isapprox(Array(two_histogram), histogram_two_baseline)
105101
end

src/KernelAbstractions.jl

+32-48
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ synchronize(backend)
5050
```
5151
"""
5252
macro kernel(expr)
53-
return __kernel(expr, #=generate_cpu=# true, #=force_inbounds=# false)
53+
return __kernel(expr, #=force_inbounds=# false)
5454
end
5555

5656
"""
@@ -65,17 +65,19 @@ This allows for two different configurations:
6565
6666
!!! warn
6767
This is an experimental feature.
68+
69+
!!! note
70+
`cpu={true, false}` is deprecated for KernelAbstractions 1.0
6871
"""
6972
macro kernel(ex...)
7073
if length(ex) == 1
71-
return __kernel(ex[1], true, false)
74+
return __kernel(ex[1], false)
7275
else
73-
generate_cpu = true
7476
force_inbounds = false
7577
for i in 1:(length(ex) - 1)
7678
if ex[i] isa Expr && ex[i].head == :(=) &&
7779
ex[i].args[1] == :cpu && ex[i].args[2] isa Bool
78-
generate_cpu = ex[i].args[2]
80+
#deprecated
7981
elseif ex[i] isa Expr && ex[i].head == :(=) &&
8082
ex[i].args[1] == :inbounds && ex[i].args[2] isa Bool
8183
force_inbounds = ex[i].args[2]
@@ -88,7 +90,7 @@ macro kernel(ex...)
8890
)
8991
end
9092
end
91-
return __kernel(ex[end], generate_cpu, force_inbounds)
93+
return __kernel(ex[end], force_inbounds)
9294
end
9395
end
9496

@@ -184,6 +186,8 @@ After releasing the memory of an array, it should no longer be accessed.
184186
"""
185187
function unsafe_free! end
186188

189+
unsafe_free!(::AbstractArray) = return
190+
187191
###
188192
# Kernel language
189193
# - @localmem
@@ -248,6 +252,9 @@ For storage that only persists between `@synchronize` statements, an `MArray` ca
248252
instead.
249253
250254
See also [`@uniform`](@ref).
255+
256+
!!! note
257+
`@private` is deprecated for KernelAbstractions 1.0
251258
"""
252259
macro private(T, dims)
253260
if dims isa Integer
@@ -263,6 +270,9 @@ end
263270
264271
Creates a private local of `mem` per item in the workgroup. This can be safely used
265272
across [`@synchronize`](@ref) statements.
273+
274+
!!! note
275+
`@private` is deprecated for KernelAbstractions 1.0
266276
"""
267277
macro private(expr)
268278
return esc(expr)
@@ -273,6 +283,9 @@ end
273283
274284
`expr` is evaluated outside the workitem scope. This is useful for variable declarations
275285
that span workitems, or are reused across `@synchronize` statements.
286+
287+
!!! note
288+
`@uniform` is deprecated for KernelAbstractions 1.0
276289
"""
277290
macro uniform(value)
278291
return esc(value)
@@ -316,6 +329,8 @@ Access the hidden context object used by KernelAbstractions.
316329
!!! warn
317330
Only valid to be used from a kernel with `cpu=false`.
318331
332+
!!! note
333+
`@context` will be supported on all backends in KernelAbstractions 1.0
319334
```
320335
function f(@context, a)
321336
I = @index(Global, Linear)
@@ -464,31 +479,11 @@ Abstract type for all GPU based KernelAbstractions backends.
464479
465480
!!! note
466481
New backend implementations **must** sub-type this abstract type.
467-
"""
468-
abstract type GPU <: Backend end
469-
470-
"""
471-
CPU(; static=false)
472-
473-
Instantiate a CPU (multi-threaded) backend.
474-
475-
## Options:
476-
- `static`: Uses a static thread assignment, this can be beneficial for NUMA aware code.
477-
Defaults to false.
478-
"""
479-
struct CPU <: Backend
480-
static::Bool
481-
CPU(; static::Bool = false) = new(static)
482-
end
483-
484-
"""
485-
isgpu(::Backend)::Bool
486482
487-
Returns true for all [`GPU`](@ref) backends.
483+
!!! note
484+
`GPU` will be removed in KernelAbstractions v1.0
488485
"""
489-
isgpu(::GPU) = true
490-
isgpu(::CPU) = false
491-
486+
abstract type GPU <: Backend end
492487

493488
"""
494489
get_backend(A::AbstractArray)::Backend
@@ -504,12 +499,9 @@ function get_backend end
504499
# Should cover SubArray, ReshapedArray, ReinterpretArray, Hermitian, AbstractTriangular, etc.:
505500
get_backend(A::AbstractArray) = get_backend(parent(A))
506501

507-
get_backend(::Array) = CPU()
508-
509502
# Define:
510503
# adapt_storage(::Backend, a::Array) = adapt(BackendArray, a)
511504
# adapt_storage(::Backend, a::BackendArray) = a
512-
Adapt.adapt_storage(::CPU, a::Array) = a
513505

514506
"""
515507
allocate(::Backend, Type, dims...)::AbstractArray
@@ -729,7 +721,7 @@ Partition a kernel for the given ndrange and workgroupsize.
729721
return iterspace, dynamic
730722
end
731723

732-
function construct(backend::Backend, ::S, ::NDRange, xpu_name::XPUName) where {Backend <: Union{CPU, GPU}, S <: _Size, NDRange <: _Size, XPUName}
724+
function construct(backend::Backend, ::S, ::NDRange, xpu_name::XPUName) where {Backend <: GPU, S <: _Size, NDRange <: _Size, XPUName}
733725
return Kernel{Backend, S, NDRange, XPUName}(backend, xpu_name)
734726
end
735727

@@ -746,6 +738,10 @@ include("compiler.jl")
746738
function __workitems_iterspace end
747739
function __validindex end
748740

741+
# for reflection
742+
function mkcontext end
743+
function launch_config end
744+
749745
include("macros.jl")
750746

751747
###
@@ -815,8 +811,11 @@ end
815811
end
816812

817813
# CPU backend
814+
include("pocl/pocl.jl")
815+
using .POCL
816+
export POCLBackend
818817

819-
include("cpu.jl")
818+
const CPU = POCLBackend
820819

821820
# precompile
822821
PrecompileTools.@compile_workload begin
@@ -830,19 +829,4 @@ PrecompileTools.@compile_workload begin
830829
end
831830
end
832831

833-
if !isdefined(Base, :get_extension)
834-
using Requires
835-
end
836-
837-
@static if !isdefined(Base, :get_extension)
838-
function __init__()
839-
@require EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869" include("../ext/EnzymeExt.jl")
840-
end
841-
end
842-
843-
if !isdefined(Base, :get_extension)
844-
include("../ext/LinearAlgebraExt.jl")
845-
include("../ext/SparseArraysExt.jl")
846-
end
847-
848832
end #module

src/cpu.jl

-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
unsafe_free!(::AbstractArray) = return
21
synchronize(::CPU) = nothing
32

43
allocate(::CPU, ::Type{T}, dims::Tuple) where {T} = Array{T}(undef, dims)

0 commit comments

Comments
 (0)