-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathcuda_utils.py
More file actions
47 lines (37 loc) · 1.1 KB
/
cuda_utils.py
File metadata and controls
47 lines (37 loc) · 1.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from numba import cuda
import numba as nb
import numpy as np
def calc_dims(shape):
threadsperblock = (32,32)
blockspergrid = (
(shape[0] + (threadsperblock[0] - 1)) // threadsperblock[0],
(shape[1] + (threadsperblock[1] - 1)) // threadsperblock[1]
)
return blockspergrid, threadsperblock
@cuda.jit(device=True)
def add(a, b):
return float3(a[0]+b[0], a[1]+b[1], a[2]+b[2])
@cuda.jit(device=True)
def diff(a, b):
return float3(a[0]-b[0], a[1]-b[1], a[2]-b[2])
@cuda.jit(device=True)
def mul(a, b):
return float3(a[0]*b, a[1]*b, a[2]*b)
@cuda.jit(device=True)
def multColor(a, b):
return float3(a[0]*b[0], a[1]*b[1], a[2]*b[2])
@cuda.jit(device=True)
def dot(a, b):
return a[0]*b[0] + a[1]*b[1] + a[2]*b[2]
@cuda.jit(device=True)
def mix(a, b, k):
return add(mul(a, k), mul(b, 1-k))
@cuda.jit(device=True)
def make_float3(a, offset):
return float3(a[offset], a[offset+1], a[offset+2])
@cuda.jit(device=True)
def invert(a):
return float3(-a[0], -a[1], -a[2])
@cuda.jit(device=True)
def float3(a, b, c):
return (np.float32(a), np.float32(b), np.float32(c))