-
Notifications
You must be signed in to change notification settings - Fork 297
Expand file tree
/
Copy patharithmetic_example.py
More file actions
55 lines (43 loc) · 1.44 KB
/
arithmetic_example.py
File metadata and controls
55 lines (43 loc) · 1.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!python
# !python
from __future__ import annotations
import numpy as np
import pycuda.driver as cuda
from pycuda import gpuarray
from pycuda.compiler import SourceModule
# Converting the list into numpy array for faster access and putting it into the GPU for processing...
start = cuda.Event()
end = cuda.Event()
N = 222341
values = np.random.randn(N)
number_of_blocks = N/1024
# Calculating the (value-max)/max-min computation and storing it in a numpy array. Pre-calculating the maximum and minimum values.
# Space for the Kernel computation..
func_mod = SourceModule("""
// Needed to avoid name mangling so that PyCUDA can
// find the kernel function:
extern "C" {
__global__ void func(float *a, int N, float minval, int denom)
{
int idx = threadIdx.x+threadIdx.y*32+blockIdx.x*blockDim.x;
if (idx < N)
a[idx] = (a[idx]-minval)/denom;
}
}
""", no_extern_c=1)
func = func_mod.get_function("func")
x = np.asarray(values, np.float32)
x_gpu = gpuarray.to_gpu(x)
h_minval = np.float32(0)
h_denom = np.int32(255)
start.record()
# a function to the GPU to calculate the computation in the GPU.
func(x_gpu.gpudata, np.uint32(N), np.float32(h_minval), np.uint32(h_denom), block=(1024, 1, 1), grid=(number_of_blocks+1, 1, 1))
end.record()
end.synchronize()
secs = start.time_till(end)*1e-3
print("SourceModule time")
print("%fs" % (secs))
print("x: ", x[N-1])
print("Func(x): ", x_gpu.get()[N-1], "Actual: ", (values[N-1]-0)/(h_denom))
x_colors = x_gpu.get()