-
Notifications
You must be signed in to change notification settings - Fork 94
Expand file tree
/
Copy pathkernel.cpp
More file actions
110 lines (96 loc) · 3.62 KB
/
kernel.cpp
File metadata and controls
110 lines (96 loc) · 3.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#include <occa/internal/modes/cuda/kernel.hpp>
#include <occa/internal/modes/cuda/device.hpp>
#include <occa/internal/modes/cuda/utils.hpp>
#include <occa/internal/utils/env.hpp>
#include <occa/internal/io.hpp>
#include <occa/core/base.hpp>
namespace occa {
namespace cuda {
kernel::kernel(modeDevice_t *modeDevice_,
const std::string &name_,
const std::string &sourceFilename_,
CUmodule cuModule_,
const occa::json &properties_) :
occa::launchedModeKernel_t(modeDevice_, name_, sourceFilename_, properties_),
cuModule(cuModule_),
cuFunction(NULL) {
sharedMemBytes = properties_.get("sharedMemBytes", 0);
}
kernel::kernel(modeDevice_t *modeDevice_,
const std::string &name_,
const std::string &sourceFilename_,
CUfunction cuFunction_,
const occa::json &properties_) :
occa::launchedModeKernel_t(modeDevice_, name_, sourceFilename_, properties_),
cuModule(NULL),
cuFunction(cuFunction_) {
sharedMemBytes = properties_.get("sharedMemBytes", 0);
}
kernel::kernel(modeDevice_t *modeDevice_,
const std::string &name_,
const std::string &sourceFilename_,
CUmodule cuModule_,
CUfunction cuFunction_,
const occa::json &properties_) :
occa::launchedModeKernel_t(modeDevice_, name_, sourceFilename_, properties_),
cuModule(cuModule_),
cuFunction(cuFunction_) {
sharedMemBytes = properties_.get("sharedMemBytes", 0);
}
kernel::~kernel() {
if (cuModule) {
OCCA_CUDA_DESTRUCTOR_ERROR(
"Kernel (" + name + ") : Unloading Module",
cuModuleUnload(cuModule)
);
cuModule = NULL;
}
}
CUstream& kernel::getCuStream() const {
return ((device*) modeDevice)->getCuStream();
}
int kernel::maxDims() const {
return 3;
}
dim kernel::maxOuterDims() const {
return dim(occa::UDIM_DEFAULT, occa::UDIM_DEFAULT, occa::UDIM_DEFAULT);
}
dim kernel::maxInnerDims() const {
static dim maxInnerDims_(0);
if (maxInnerDims_.x == 0) {
int maxSize = 0;
OCCA_CUDA_ERROR("Kernel: Getting Maximum Inner-Dim Size",
cuFuncGetAttribute(&maxSize,
CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
cuFunction));
maxInnerDims_.x = (udim_t) maxSize;
}
return maxInnerDims_;
}
void kernel::deviceRun() const {
device *devicePtr = (device*) modeDevice;
const int args = (int) arguments.size();
if (!args) {
vArgs.resize(1);
} else if ((int) vArgs.size() < args) {
vArgs.resize(args);
}
// Set arguments
for (int i = 0; i < args; ++i) {
vArgs[i] = arguments[i].ptr();
// Set a proper NULL pointer
if (!vArgs[i]) {
vArgs[i] = devicePtr->getNullPtr();
}
}
devicePtr->setCudaContext();
OCCA_CUDA_ERROR("Set max dynamic shm", cuFuncSetAttribute(cuFunction, CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, sharedMemBytes));
OCCA_CUDA_ERROR("Launching Kernel",
cuLaunchKernel(cuFunction,
outerDims.x, outerDims.y, outerDims.z,
innerDims.x, innerDims.y, innerDims.z,
sharedMemBytes, getCuStream(),
&(vArgs[0]), NULL));
}
}
}