|
1 | | -struct ROCArrayBackend <: AbstractGPUBackend end |
2 | | - |
3 | | -struct ROCKernelContext <: AbstractKernelContext end |
4 | | - |
5 | | -@inline function GPUArrays.gpu_call( |
6 | | - ::ROCArrayBackend, f, args, threads::Int, blocks::Int; name::Maybe{String}, |
7 | | -) |
8 | | - @roc gridsize=blocks groupsize=threads name=name f(ROCKernelContext(), args...) |
9 | | -end |
10 | | - |
11 | | -# indexing |
12 | | - |
13 | | -for (f, froc) in ( |
14 | | - (:blockidx, :blockIdx), |
15 | | - (:blockdim, :blockDim), |
16 | | - (:threadidx, :threadIdx), |
17 | | - (:griddim, :gridGroupDim) |
18 | | -) |
19 | | - @eval @inline GPUArrays.$f(::ROCKernelContext) = AMDGPU.$froc().x |
20 | | -end |
21 | | - |
22 | | -# math |
23 | | - |
24 | | -@inline GPUArrays.cos(::ROCKernelContext, x) = cos(x) |
25 | | -@inline GPUArrays.sin(::ROCKernelContext, x) = sin(x) |
26 | | -@inline GPUArrays.sqrt(::ROCKernelContext, x) = sqrt(x) |
27 | | -@inline GPUArrays.log(::ROCKernelContext, x) = log(x) |
28 | | - |
29 | | -# memory |
30 | | - |
31 | | -@inline function GPUArrays.LocalMemory(::ROCKernelContext, ::Type{T}, ::Val{dims}, ::Val{id}) where {T,dims,id} |
32 | | - ptr = AMDGPU.Device.alloc_special(Val{id}(), T, Val{AMDGPU.AS.Local}(), Val{prod(dims)}()) |
33 | | - ROCDeviceArray(dims, ptr) |
34 | | -end |
35 | | - |
36 | | -# synchronization |
37 | | - |
38 | | -@inline function GPUArrays.synchronize_threads(::ROCKernelContext) |
39 | | - sync_workgroup() |
40 | | - return |
41 | | -end |
42 | | - |
43 | | -GPUArrays.device(x::ROCArray) = x.buf[].device |
44 | | - |
45 | | -GPUArrays.backend(::Type{<:ROCArray}) = ROCArrayBackend() |
| 1 | +import KernelAbstractions |
46 | 2 |
|
47 | 3 | function GPUArrays.derive( |
48 | 4 | ::Type{T}, x::ROCArray, dims::Dims{N}, offset::Int, |
|
0 commit comments