From 42b7e3b73e5aaa02cb0ce29210eade1a0493a958 Mon Sep 17 00:00:00 2001 From: Nikshay Shrivastava Date: Wed, 25 Feb 2026 00:00:30 -0800 Subject: [PATCH 1/4] Add cuDLA bindings Generated from cudla.h using cybind. Files added: - cycudla.pxd/pyx: Cython layer exposing C header types and functions - cudla.pxd/pyx: lowpp Python layer with POD classes, enums, and wrappers - _internal/cudla.pxd, cudla_linux.pyx, cudla_windows.pyx: dynamic library loading - docs/source/module/cudla.rst: API documentation - tests/cudla/: pytest unit tests for enums, POD types, error handling, API surface, and hardware-gated function tests (verified on L4T/Orin) Build/CI updates: - pyproject.toml: added cudla to cuda-toolkit optional dependencies - .github/actions/fetch_ctk/action.yml: added libcudla to CTK components - docs/source/api.rst: added cudla to toctree --- .github/actions/fetch_ctk/action.yml | 2 +- .../cuda/bindings/_internal/cudla.pxd | 22 + .../cuda/bindings/_internal/cudla_linux.pyx | 382 ++++ .../cuda/bindings/_internal/cudla_windows.pyx | 345 ++++ cuda_bindings/cuda/bindings/cudla.pxd | 49 + cuda_bindings/cuda/bindings/cudla.pyx | 1826 +++++++++++++++++ cuda_bindings/cuda/bindings/cycudla.pxd | 155 ++ cuda_bindings/cuda/bindings/cycudla.pyx | 61 + cuda_bindings/docs/source/api.rst | 1 + cuda_bindings/docs/source/module/cudla.rst | 67 + cuda_bindings/pyproject.toml | 2 +- cuda_bindings/tests/cudla/conftest.py | 13 + .../tests/cudla/test_cudla_bindings.py | 305 +++ 13 files changed, 3228 insertions(+), 2 deletions(-) create mode 100644 cuda_bindings/cuda/bindings/_internal/cudla.pxd create mode 100644 cuda_bindings/cuda/bindings/_internal/cudla_linux.pyx create mode 100644 cuda_bindings/cuda/bindings/_internal/cudla_windows.pyx create mode 100644 cuda_bindings/cuda/bindings/cudla.pxd create mode 100644 cuda_bindings/cuda/bindings/cudla.pyx create mode 100644 cuda_bindings/cuda/bindings/cycudla.pxd create mode 100644 cuda_bindings/cuda/bindings/cycudla.pyx create mode 100644 cuda_bindings/docs/source/module/cudla.rst create mode 100644 cuda_bindings/tests/cudla/conftest.py create mode 100644 cuda_bindings/tests/cudla/test_cudla_bindings.py diff --git a/.github/actions/fetch_ctk/action.yml b/.github/actions/fetch_ctk/action.yml index a150628e0f9..d0f088e47bd 100644 --- a/.github/actions/fetch_ctk/action.yml +++ b/.github/actions/fetch_ctk/action.yml @@ -14,7 +14,7 @@ inputs: cuda-components: description: "A list of the CTK components to install as a comma-separated list. e.g. 'cuda_nvcc,cuda_nvrtc,cuda_cudart'" required: false - default: "cuda_nvcc,cuda_cudart,cuda_crt,libnvvm,cuda_nvrtc,cuda_profiler_api,cuda_cccl,cuda_cupti,libnvjitlink,libcufile,libnvfatbin" + default: "cuda_nvcc,cuda_cudart,cuda_crt,libnvvm,cuda_nvrtc,cuda_profiler_api,cuda_cccl,cuda_cupti,libnvjitlink,libcufile,libnvfatbin,libcudla" cuda-path: description: "where the CTK components will be installed to, relative to $PWD" required: false diff --git a/cuda_bindings/cuda/bindings/_internal/cudla.pxd b/cuda_bindings/cuda/bindings/_internal/cudla.pxd new file mode 100644 index 00000000000..57fef6a7323 --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/cudla.pxd @@ -0,0 +1,22 @@ +# This code was automatically generated with version 1.5.0, generator version 0.3.1.dev1465+gc5c5c8652. Do not modify it directly. + +from ..cycudla cimport * + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef cudlaStatus _cudlaGetVersion(uint64_t* const version) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil +cdef cudlaStatus _cudlaDeviceGetCount(uint64_t* const pNumDevices) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil +cdef cudlaStatus _cudlaCreateDevice(const uint64_t device, cudlaDevHandle* const devHandle, const uint32_t flags) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil +cdef cudlaStatus _cudlaMemRegister(const cudlaDevHandle devHandle, const uint64_t* const ptr, const size_t size, uint64_t** const devPtr, const uint32_t flags) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil +cdef cudlaStatus _cudlaModuleLoadFromMemory(const cudlaDevHandle devHandle, const uint8_t* const pModule, const size_t moduleSize, cudlaModule* const hModule, const uint32_t flags) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil +cdef cudlaStatus _cudlaModuleGetAttributes(const cudlaModule hModule, const cudlaModuleAttributeType attrType, cudlaModuleAttribute* const attribute) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil +cdef cudlaStatus _cudlaModuleUnload(const cudlaModule hModule, const uint32_t flags) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil +cdef cudlaStatus _cudlaSubmitTask(const cudlaDevHandle devHandle, const cudlaTask* const ptrToTasks, const uint32_t numTasks, void* const stream, const uint32_t flags) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil +cdef cudlaStatus _cudlaDeviceGetAttribute(const cudlaDevHandle devHandle, const cudlaDevAttributeType attrib, cudlaDevAttribute* const pAttribute) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil +cdef cudlaStatus _cudlaMemUnregister(const cudlaDevHandle devHandle, const uint64_t* const devPtr) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil +cdef cudlaStatus _cudlaGetLastError(const cudlaDevHandle devHandle) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil +cdef cudlaStatus _cudlaDestroyDevice(const cudlaDevHandle devHandle) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil +cdef cudlaStatus _cudlaSetTaskTimeoutInMs(const cudlaDevHandle devHandle, const uint32_t timeout) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil diff --git a/cuda_bindings/cuda/bindings/_internal/cudla_linux.pyx b/cuda_bindings/cuda/bindings/_internal/cudla_linux.pyx new file mode 100644 index 00000000000..6a0f4f2984b --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/cudla_linux.pyx @@ -0,0 +1,382 @@ +# This code was automatically generated with version 1.5.0, generator version 0.3.1.dev1465+gc5c5c8652. Do not modify it directly. + +from libc.stdint cimport intptr_t, uintptr_t + +import threading +from .utils import FunctionNotFoundError, NotSupportedError + +from cuda.pathfinder import load_nvidia_dynamic_lib + + +############################################################################### +# Extern +############################################################################### + +# You must 'from .utils import NotSupportedError' before using this template + +cdef extern from "" nogil: + void* dlopen(const char*, int) + char* dlerror() + void* dlsym(void*, const char*) + int dlclose(void*) + + enum: + RTLD_LAZY + RTLD_NOW + RTLD_GLOBAL + RTLD_LOCAL + + const void* RTLD_DEFAULT 'RTLD_DEFAULT' + +cdef int get_cuda_version(): + cdef void* handle = NULL + cdef int err, driver_ver = 0 + + # Load driver to check version + handle = dlopen('libcuda.so.1', RTLD_NOW | RTLD_GLOBAL) + if handle == NULL: + err_msg = dlerror() + raise NotSupportedError(f'CUDA driver is not found ({err_msg.decode()})') + cuDriverGetVersion = dlsym(handle, "cuDriverGetVersion") + if cuDriverGetVersion == NULL: + raise RuntimeError('Did not find cuDriverGetVersion symbol in libcuda.so.1') + err = (cuDriverGetVersion)(&driver_ver) + if err != 0: + raise RuntimeError(f'cuDriverGetVersion returned error code {err}') + + return driver_ver + + + +############################################################################### +# Wrapper init +############################################################################### + +cdef object __symbol_lock = threading.Lock() +cdef bint __py_cudla_init = False + +cdef void* __cudlaGetVersion = NULL +cdef void* __cudlaDeviceGetCount = NULL +cdef void* __cudlaCreateDevice = NULL +cdef void* __cudlaMemRegister = NULL +cdef void* __cudlaModuleLoadFromMemory = NULL +cdef void* __cudlaModuleGetAttributes = NULL +cdef void* __cudlaModuleUnload = NULL +cdef void* __cudlaSubmitTask = NULL +cdef void* __cudlaDeviceGetAttribute = NULL +cdef void* __cudlaMemUnregister = NULL +cdef void* __cudlaGetLastError = NULL +cdef void* __cudlaDestroyDevice = NULL +cdef void* __cudlaSetTaskTimeoutInMs = NULL + + +cdef void* load_library() except* with gil: + cdef uintptr_t handle = load_nvidia_dynamic_lib("cudla")._handle_uint + return handle + + +cdef int _init_cudla() except -1 nogil: + global __py_cudla_init + cdef void* handle = NULL + + with gil, __symbol_lock: + # Recheck the flag after obtaining the locks + if __py_cudla_init: + return 0 + + # Load function + global __cudlaGetVersion + __cudlaGetVersion = dlsym(RTLD_DEFAULT, 'cudlaGetVersion') + if __cudlaGetVersion == NULL: + if handle == NULL: + handle = load_library() + __cudlaGetVersion = dlsym(handle, 'cudlaGetVersion') + + global __cudlaDeviceGetCount + __cudlaDeviceGetCount = dlsym(RTLD_DEFAULT, 'cudlaDeviceGetCount') + if __cudlaDeviceGetCount == NULL: + if handle == NULL: + handle = load_library() + __cudlaDeviceGetCount = dlsym(handle, 'cudlaDeviceGetCount') + + global __cudlaCreateDevice + __cudlaCreateDevice = dlsym(RTLD_DEFAULT, 'cudlaCreateDevice') + if __cudlaCreateDevice == NULL: + if handle == NULL: + handle = load_library() + __cudlaCreateDevice = dlsym(handle, 'cudlaCreateDevice') + + global __cudlaMemRegister + __cudlaMemRegister = dlsym(RTLD_DEFAULT, 'cudlaMemRegister') + if __cudlaMemRegister == NULL: + if handle == NULL: + handle = load_library() + __cudlaMemRegister = dlsym(handle, 'cudlaMemRegister') + + global __cudlaModuleLoadFromMemory + __cudlaModuleLoadFromMemory = dlsym(RTLD_DEFAULT, 'cudlaModuleLoadFromMemory') + if __cudlaModuleLoadFromMemory == NULL: + if handle == NULL: + handle = load_library() + __cudlaModuleLoadFromMemory = dlsym(handle, 'cudlaModuleLoadFromMemory') + + global __cudlaModuleGetAttributes + __cudlaModuleGetAttributes = dlsym(RTLD_DEFAULT, 'cudlaModuleGetAttributes') + if __cudlaModuleGetAttributes == NULL: + if handle == NULL: + handle = load_library() + __cudlaModuleGetAttributes = dlsym(handle, 'cudlaModuleGetAttributes') + + global __cudlaModuleUnload + __cudlaModuleUnload = dlsym(RTLD_DEFAULT, 'cudlaModuleUnload') + if __cudlaModuleUnload == NULL: + if handle == NULL: + handle = load_library() + __cudlaModuleUnload = dlsym(handle, 'cudlaModuleUnload') + + global __cudlaSubmitTask + __cudlaSubmitTask = dlsym(RTLD_DEFAULT, 'cudlaSubmitTask') + if __cudlaSubmitTask == NULL: + if handle == NULL: + handle = load_library() + __cudlaSubmitTask = dlsym(handle, 'cudlaSubmitTask') + + global __cudlaDeviceGetAttribute + __cudlaDeviceGetAttribute = dlsym(RTLD_DEFAULT, 'cudlaDeviceGetAttribute') + if __cudlaDeviceGetAttribute == NULL: + if handle == NULL: + handle = load_library() + __cudlaDeviceGetAttribute = dlsym(handle, 'cudlaDeviceGetAttribute') + + global __cudlaMemUnregister + __cudlaMemUnregister = dlsym(RTLD_DEFAULT, 'cudlaMemUnregister') + if __cudlaMemUnregister == NULL: + if handle == NULL: + handle = load_library() + __cudlaMemUnregister = dlsym(handle, 'cudlaMemUnregister') + + global __cudlaGetLastError + __cudlaGetLastError = dlsym(RTLD_DEFAULT, 'cudlaGetLastError') + if __cudlaGetLastError == NULL: + if handle == NULL: + handle = load_library() + __cudlaGetLastError = dlsym(handle, 'cudlaGetLastError') + + global __cudlaDestroyDevice + __cudlaDestroyDevice = dlsym(RTLD_DEFAULT, 'cudlaDestroyDevice') + if __cudlaDestroyDevice == NULL: + if handle == NULL: + handle = load_library() + __cudlaDestroyDevice = dlsym(handle, 'cudlaDestroyDevice') + + global __cudlaSetTaskTimeoutInMs + __cudlaSetTaskTimeoutInMs = dlsym(RTLD_DEFAULT, 'cudlaSetTaskTimeoutInMs') + if __cudlaSetTaskTimeoutInMs == NULL: + if handle == NULL: + handle = load_library() + __cudlaSetTaskTimeoutInMs = dlsym(handle, 'cudlaSetTaskTimeoutInMs') + + __py_cudla_init = True + return 0 + + +cdef inline int _check_or_init_cudla() except -1 nogil: + if __py_cudla_init: + return 0 + + return _init_cudla() + + +cdef dict func_ptrs = None + + +cpdef dict _inspect_function_pointers(): + global func_ptrs + if func_ptrs is not None: + return func_ptrs + + _check_or_init_cudla() + cdef dict data = {} + + global __cudlaGetVersion + data["__cudlaGetVersion"] = __cudlaGetVersion + + global __cudlaDeviceGetCount + data["__cudlaDeviceGetCount"] = __cudlaDeviceGetCount + + global __cudlaCreateDevice + data["__cudlaCreateDevice"] = __cudlaCreateDevice + + global __cudlaMemRegister + data["__cudlaMemRegister"] = __cudlaMemRegister + + global __cudlaModuleLoadFromMemory + data["__cudlaModuleLoadFromMemory"] = __cudlaModuleLoadFromMemory + + global __cudlaModuleGetAttributes + data["__cudlaModuleGetAttributes"] = __cudlaModuleGetAttributes + + global __cudlaModuleUnload + data["__cudlaModuleUnload"] = __cudlaModuleUnload + + global __cudlaSubmitTask + data["__cudlaSubmitTask"] = __cudlaSubmitTask + + global __cudlaDeviceGetAttribute + data["__cudlaDeviceGetAttribute"] = __cudlaDeviceGetAttribute + + global __cudlaMemUnregister + data["__cudlaMemUnregister"] = __cudlaMemUnregister + + global __cudlaGetLastError + data["__cudlaGetLastError"] = __cudlaGetLastError + + global __cudlaDestroyDevice + data["__cudlaDestroyDevice"] = __cudlaDestroyDevice + + global __cudlaSetTaskTimeoutInMs + data["__cudlaSetTaskTimeoutInMs"] = __cudlaSetTaskTimeoutInMs + + func_ptrs = data + return data + + +cpdef _inspect_function_pointer(str name): + global func_ptrs + if func_ptrs is None: + func_ptrs = _inspect_function_pointers() + return func_ptrs[name] + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef cudlaStatus _cudlaGetVersion(uint64_t* const version) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + global __cudlaGetVersion + _check_or_init_cudla() + if __cudlaGetVersion == NULL: + with gil: + raise FunctionNotFoundError("function cudlaGetVersion is not found") + return (__cudlaGetVersion)( + version) + + +cdef cudlaStatus _cudlaDeviceGetCount(uint64_t* const pNumDevices) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + global __cudlaDeviceGetCount + _check_or_init_cudla() + if __cudlaDeviceGetCount == NULL: + with gil: + raise FunctionNotFoundError("function cudlaDeviceGetCount is not found") + return (__cudlaDeviceGetCount)( + pNumDevices) + + +cdef cudlaStatus _cudlaCreateDevice(const uint64_t device, cudlaDevHandle* const devHandle, const uint32_t flags) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + global __cudlaCreateDevice + _check_or_init_cudla() + if __cudlaCreateDevice == NULL: + with gil: + raise FunctionNotFoundError("function cudlaCreateDevice is not found") + return (__cudlaCreateDevice)( + device, devHandle, flags) + + +cdef cudlaStatus _cudlaMemRegister(const cudlaDevHandle devHandle, const uint64_t* const ptr, const size_t size, uint64_t** const devPtr, const uint32_t flags) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + global __cudlaMemRegister + _check_or_init_cudla() + if __cudlaMemRegister == NULL: + with gil: + raise FunctionNotFoundError("function cudlaMemRegister is not found") + return (__cudlaMemRegister)( + devHandle, ptr, size, devPtr, flags) + + +cdef cudlaStatus _cudlaModuleLoadFromMemory(const cudlaDevHandle devHandle, const uint8_t* const pModule, const size_t moduleSize, cudlaModule* const hModule, const uint32_t flags) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + global __cudlaModuleLoadFromMemory + _check_or_init_cudla() + if __cudlaModuleLoadFromMemory == NULL: + with gil: + raise FunctionNotFoundError("function cudlaModuleLoadFromMemory is not found") + return (__cudlaModuleLoadFromMemory)( + devHandle, pModule, moduleSize, hModule, flags) + + +cdef cudlaStatus _cudlaModuleGetAttributes(const cudlaModule hModule, const cudlaModuleAttributeType attrType, cudlaModuleAttribute* const attribute) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + global __cudlaModuleGetAttributes + _check_or_init_cudla() + if __cudlaModuleGetAttributes == NULL: + with gil: + raise FunctionNotFoundError("function cudlaModuleGetAttributes is not found") + return (__cudlaModuleGetAttributes)( + hModule, attrType, attribute) + + +cdef cudlaStatus _cudlaModuleUnload(const cudlaModule hModule, const uint32_t flags) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + global __cudlaModuleUnload + _check_or_init_cudla() + if __cudlaModuleUnload == NULL: + with gil: + raise FunctionNotFoundError("function cudlaModuleUnload is not found") + return (__cudlaModuleUnload)( + hModule, flags) + + +cdef cudlaStatus _cudlaSubmitTask(const cudlaDevHandle devHandle, const cudlaTask* const ptrToTasks, const uint32_t numTasks, void* const stream, const uint32_t flags) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + global __cudlaSubmitTask + _check_or_init_cudla() + if __cudlaSubmitTask == NULL: + with gil: + raise FunctionNotFoundError("function cudlaSubmitTask is not found") + return (__cudlaSubmitTask)( + devHandle, ptrToTasks, numTasks, stream, flags) + + +cdef cudlaStatus _cudlaDeviceGetAttribute(const cudlaDevHandle devHandle, const cudlaDevAttributeType attrib, cudlaDevAttribute* const pAttribute) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + global __cudlaDeviceGetAttribute + _check_or_init_cudla() + if __cudlaDeviceGetAttribute == NULL: + with gil: + raise FunctionNotFoundError("function cudlaDeviceGetAttribute is not found") + return (__cudlaDeviceGetAttribute)( + devHandle, attrib, pAttribute) + + +cdef cudlaStatus _cudlaMemUnregister(const cudlaDevHandle devHandle, const uint64_t* const devPtr) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + global __cudlaMemUnregister + _check_or_init_cudla() + if __cudlaMemUnregister == NULL: + with gil: + raise FunctionNotFoundError("function cudlaMemUnregister is not found") + return (__cudlaMemUnregister)( + devHandle, devPtr) + + +cdef cudlaStatus _cudlaGetLastError(const cudlaDevHandle devHandle) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + global __cudlaGetLastError + _check_or_init_cudla() + if __cudlaGetLastError == NULL: + with gil: + raise FunctionNotFoundError("function cudlaGetLastError is not found") + return (__cudlaGetLastError)( + devHandle) + + +cdef cudlaStatus _cudlaDestroyDevice(const cudlaDevHandle devHandle) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + global __cudlaDestroyDevice + _check_or_init_cudla() + if __cudlaDestroyDevice == NULL: + with gil: + raise FunctionNotFoundError("function cudlaDestroyDevice is not found") + return (__cudlaDestroyDevice)( + devHandle) + + +cdef cudlaStatus _cudlaSetTaskTimeoutInMs(const cudlaDevHandle devHandle, const uint32_t timeout) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + global __cudlaSetTaskTimeoutInMs + _check_or_init_cudla() + if __cudlaSetTaskTimeoutInMs == NULL: + with gil: + raise FunctionNotFoundError("function cudlaSetTaskTimeoutInMs is not found") + return (__cudlaSetTaskTimeoutInMs)( + devHandle, timeout) diff --git a/cuda_bindings/cuda/bindings/_internal/cudla_windows.pyx b/cuda_bindings/cuda/bindings/_internal/cudla_windows.pyx new file mode 100644 index 00000000000..2cddfd6d19e --- /dev/null +++ b/cuda_bindings/cuda/bindings/_internal/cudla_windows.pyx @@ -0,0 +1,345 @@ +# This code was automatically generated with version 1.5.0, generator version 0.3.1.dev1465+gc5c5c8652. Do not modify it directly. + +from libc.stdint cimport intptr_t + +import threading +from .utils import FunctionNotFoundError, NotSupportedError + +from cuda.pathfinder import load_nvidia_dynamic_lib + +from libc.stddef cimport wchar_t +from libc.stdint cimport uintptr_t +from cpython cimport PyUnicode_AsWideCharString, PyMem_Free + +# You must 'from .utils import NotSupportedError' before using this template + +cdef extern from "windows.h" nogil: + ctypedef void* HMODULE + ctypedef void* HANDLE + ctypedef void* FARPROC + ctypedef unsigned long DWORD + ctypedef const wchar_t *LPCWSTR + ctypedef const char *LPCSTR + + cdef DWORD LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800 + cdef DWORD LOAD_LIBRARY_SEARCH_DEFAULT_DIRS = 0x00001000 + cdef DWORD LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR = 0x00000100 + + HMODULE _LoadLibraryExW "LoadLibraryExW"( + LPCWSTR lpLibFileName, + HANDLE hFile, + DWORD dwFlags + ) + + FARPROC _GetProcAddress "GetProcAddress"(HMODULE hModule, LPCSTR lpProcName) + +cdef inline uintptr_t LoadLibraryExW(str path, HANDLE hFile, DWORD dwFlags): + cdef uintptr_t result + cdef wchar_t* wpath = PyUnicode_AsWideCharString(path, NULL) + with nogil: + result = _LoadLibraryExW( + wpath, + hFile, + dwFlags + ) + PyMem_Free(wpath) + return result + +cdef inline void *GetProcAddress(uintptr_t hModule, const char* lpProcName) nogil: + return _GetProcAddress(hModule, lpProcName) + +cdef int get_cuda_version(): + cdef int err, driver_ver = 0 + + # Load driver to check version + handle = LoadLibraryExW("nvcuda.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32) + if handle == 0: + raise NotSupportedError('CUDA driver is not found') + cuDriverGetVersion = GetProcAddress(handle, 'cuDriverGetVersion') + if cuDriverGetVersion == NULL: + raise RuntimeError('Did not find cuDriverGetVersion symbol in nvcuda.dll') + err = (cuDriverGetVersion)(&driver_ver) + if err != 0: + raise RuntimeError(f'cuDriverGetVersion returned error code {err}') + + return driver_ver + + + +############################################################################### +# Wrapper init +############################################################################### + +cdef object __symbol_lock = threading.Lock() +cdef bint __py_cudla_init = False + +cdef void* __cudlaGetVersion = NULL +cdef void* __cudlaDeviceGetCount = NULL +cdef void* __cudlaCreateDevice = NULL +cdef void* __cudlaMemRegister = NULL +cdef void* __cudlaModuleLoadFromMemory = NULL +cdef void* __cudlaModuleGetAttributes = NULL +cdef void* __cudlaModuleUnload = NULL +cdef void* __cudlaSubmitTask = NULL +cdef void* __cudlaDeviceGetAttribute = NULL +cdef void* __cudlaMemUnregister = NULL +cdef void* __cudlaGetLastError = NULL +cdef void* __cudlaDestroyDevice = NULL +cdef void* __cudlaSetTaskTimeoutInMs = NULL + + +cdef int _init_cudla() except -1 nogil: + global __py_cudla_init + + with gil, __symbol_lock: + # Recheck the flag after obtaining the locks + if __py_cudla_init: + return 0 + + # Load library + handle = load_nvidia_dynamic_lib("cudla")._handle_uint + + # Load function + global __cudlaGetVersion + __cudlaGetVersion = GetProcAddress(handle, 'cudlaGetVersion') + + global __cudlaDeviceGetCount + __cudlaDeviceGetCount = GetProcAddress(handle, 'cudlaDeviceGetCount') + + global __cudlaCreateDevice + __cudlaCreateDevice = GetProcAddress(handle, 'cudlaCreateDevice') + + global __cudlaMemRegister + __cudlaMemRegister = GetProcAddress(handle, 'cudlaMemRegister') + + global __cudlaModuleLoadFromMemory + __cudlaModuleLoadFromMemory = GetProcAddress(handle, 'cudlaModuleLoadFromMemory') + + global __cudlaModuleGetAttributes + __cudlaModuleGetAttributes = GetProcAddress(handle, 'cudlaModuleGetAttributes') + + global __cudlaModuleUnload + __cudlaModuleUnload = GetProcAddress(handle, 'cudlaModuleUnload') + + global __cudlaSubmitTask + __cudlaSubmitTask = GetProcAddress(handle, 'cudlaSubmitTask') + + global __cudlaDeviceGetAttribute + __cudlaDeviceGetAttribute = GetProcAddress(handle, 'cudlaDeviceGetAttribute') + + global __cudlaMemUnregister + __cudlaMemUnregister = GetProcAddress(handle, 'cudlaMemUnregister') + + global __cudlaGetLastError + __cudlaGetLastError = GetProcAddress(handle, 'cudlaGetLastError') + + global __cudlaDestroyDevice + __cudlaDestroyDevice = GetProcAddress(handle, 'cudlaDestroyDevice') + + global __cudlaSetTaskTimeoutInMs + __cudlaSetTaskTimeoutInMs = GetProcAddress(handle, 'cudlaSetTaskTimeoutInMs') + + __py_cudla_init = True + return 0 + + +cdef inline int _check_or_init_cudla() except -1 nogil: + if __py_cudla_init: + return 0 + + return _init_cudla() + + +cdef dict func_ptrs = None + + +cpdef dict _inspect_function_pointers(): + global func_ptrs + if func_ptrs is not None: + return func_ptrs + + _check_or_init_cudla() + cdef dict data = {} + + global __cudlaGetVersion + data["__cudlaGetVersion"] = __cudlaGetVersion + + global __cudlaDeviceGetCount + data["__cudlaDeviceGetCount"] = __cudlaDeviceGetCount + + global __cudlaCreateDevice + data["__cudlaCreateDevice"] = __cudlaCreateDevice + + global __cudlaMemRegister + data["__cudlaMemRegister"] = __cudlaMemRegister + + global __cudlaModuleLoadFromMemory + data["__cudlaModuleLoadFromMemory"] = __cudlaModuleLoadFromMemory + + global __cudlaModuleGetAttributes + data["__cudlaModuleGetAttributes"] = __cudlaModuleGetAttributes + + global __cudlaModuleUnload + data["__cudlaModuleUnload"] = __cudlaModuleUnload + + global __cudlaSubmitTask + data["__cudlaSubmitTask"] = __cudlaSubmitTask + + global __cudlaDeviceGetAttribute + data["__cudlaDeviceGetAttribute"] = __cudlaDeviceGetAttribute + + global __cudlaMemUnregister + data["__cudlaMemUnregister"] = __cudlaMemUnregister + + global __cudlaGetLastError + data["__cudlaGetLastError"] = __cudlaGetLastError + + global __cudlaDestroyDevice + data["__cudlaDestroyDevice"] = __cudlaDestroyDevice + + global __cudlaSetTaskTimeoutInMs + data["__cudlaSetTaskTimeoutInMs"] = __cudlaSetTaskTimeoutInMs + + func_ptrs = data + return data + + +cpdef _inspect_function_pointer(str name): + global func_ptrs + if func_ptrs is None: + func_ptrs = _inspect_function_pointers() + return func_ptrs[name] + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef cudlaStatus _cudlaGetVersion(uint64_t* const version) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + global __cudlaGetVersion + _check_or_init_cudla() + if __cudlaGetVersion == NULL: + with gil: + raise FunctionNotFoundError("function cudlaGetVersion is not found") + return (__cudlaGetVersion)( + version) + + +cdef cudlaStatus _cudlaDeviceGetCount(uint64_t* const pNumDevices) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + global __cudlaDeviceGetCount + _check_or_init_cudla() + if __cudlaDeviceGetCount == NULL: + with gil: + raise FunctionNotFoundError("function cudlaDeviceGetCount is not found") + return (__cudlaDeviceGetCount)( + pNumDevices) + + +cdef cudlaStatus _cudlaCreateDevice(const uint64_t device, cudlaDevHandle* const devHandle, const uint32_t flags) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + global __cudlaCreateDevice + _check_or_init_cudla() + if __cudlaCreateDevice == NULL: + with gil: + raise FunctionNotFoundError("function cudlaCreateDevice is not found") + return (__cudlaCreateDevice)( + device, devHandle, flags) + + +cdef cudlaStatus _cudlaMemRegister(const cudlaDevHandle devHandle, const uint64_t* const ptr, const size_t size, uint64_t** const devPtr, const uint32_t flags) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + global __cudlaMemRegister + _check_or_init_cudla() + if __cudlaMemRegister == NULL: + with gil: + raise FunctionNotFoundError("function cudlaMemRegister is not found") + return (__cudlaMemRegister)( + devHandle, ptr, size, devPtr, flags) + + +cdef cudlaStatus _cudlaModuleLoadFromMemory(const cudlaDevHandle devHandle, const uint8_t* const pModule, const size_t moduleSize, cudlaModule* const hModule, const uint32_t flags) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + global __cudlaModuleLoadFromMemory + _check_or_init_cudla() + if __cudlaModuleLoadFromMemory == NULL: + with gil: + raise FunctionNotFoundError("function cudlaModuleLoadFromMemory is not found") + return (__cudlaModuleLoadFromMemory)( + devHandle, pModule, moduleSize, hModule, flags) + + +cdef cudlaStatus _cudlaModuleGetAttributes(const cudlaModule hModule, const cudlaModuleAttributeType attrType, cudlaModuleAttribute* const attribute) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + global __cudlaModuleGetAttributes + _check_or_init_cudla() + if __cudlaModuleGetAttributes == NULL: + with gil: + raise FunctionNotFoundError("function cudlaModuleGetAttributes is not found") + return (__cudlaModuleGetAttributes)( + hModule, attrType, attribute) + + +cdef cudlaStatus _cudlaModuleUnload(const cudlaModule hModule, const uint32_t flags) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + global __cudlaModuleUnload + _check_or_init_cudla() + if __cudlaModuleUnload == NULL: + with gil: + raise FunctionNotFoundError("function cudlaModuleUnload is not found") + return (__cudlaModuleUnload)( + hModule, flags) + + +cdef cudlaStatus _cudlaSubmitTask(const cudlaDevHandle devHandle, const cudlaTask* const ptrToTasks, const uint32_t numTasks, void* const stream, const uint32_t flags) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + global __cudlaSubmitTask + _check_or_init_cudla() + if __cudlaSubmitTask == NULL: + with gil: + raise FunctionNotFoundError("function cudlaSubmitTask is not found") + return (__cudlaSubmitTask)( + devHandle, ptrToTasks, numTasks, stream, flags) + + +cdef cudlaStatus _cudlaDeviceGetAttribute(const cudlaDevHandle devHandle, const cudlaDevAttributeType attrib, cudlaDevAttribute* const pAttribute) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + global __cudlaDeviceGetAttribute + _check_or_init_cudla() + if __cudlaDeviceGetAttribute == NULL: + with gil: + raise FunctionNotFoundError("function cudlaDeviceGetAttribute is not found") + return (__cudlaDeviceGetAttribute)( + devHandle, attrib, pAttribute) + + +cdef cudlaStatus _cudlaMemUnregister(const cudlaDevHandle devHandle, const uint64_t* const devPtr) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + global __cudlaMemUnregister + _check_or_init_cudla() + if __cudlaMemUnregister == NULL: + with gil: + raise FunctionNotFoundError("function cudlaMemUnregister is not found") + return (__cudlaMemUnregister)( + devHandle, devPtr) + + +cdef cudlaStatus _cudlaGetLastError(const cudlaDevHandle devHandle) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + global __cudlaGetLastError + _check_or_init_cudla() + if __cudlaGetLastError == NULL: + with gil: + raise FunctionNotFoundError("function cudlaGetLastError is not found") + return (__cudlaGetLastError)( + devHandle) + + +cdef cudlaStatus _cudlaDestroyDevice(const cudlaDevHandle devHandle) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + global __cudlaDestroyDevice + _check_or_init_cudla() + if __cudlaDestroyDevice == NULL: + with gil: + raise FunctionNotFoundError("function cudlaDestroyDevice is not found") + return (__cudlaDestroyDevice)( + devHandle) + + +cdef cudlaStatus _cudlaSetTaskTimeoutInMs(const cudlaDevHandle devHandle, const uint32_t timeout) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + global __cudlaSetTaskTimeoutInMs + _check_or_init_cudla() + if __cudlaSetTaskTimeoutInMs == NULL: + with gil: + raise FunctionNotFoundError("function cudlaSetTaskTimeoutInMs is not found") + return (__cudlaSetTaskTimeoutInMs)( + devHandle, timeout) diff --git a/cuda_bindings/cuda/bindings/cudla.pxd b/cuda_bindings/cuda/bindings/cudla.pxd new file mode 100644 index 00000000000..09b55c579b6 --- /dev/null +++ b/cuda_bindings/cuda/bindings/cudla.pxd @@ -0,0 +1,49 @@ +# This code was automatically generated with version 1.5.0, generator version 0.3.1.dev1465+gc5c5c8652. Do not modify it directly. + +from libc.stdint cimport intptr_t + +from .cycudla cimport * + + + + +############################################################################### +# Types +############################################################################### + +ctypedef cudlaDevHandle DevHandle +ctypedef cudlaModule Module + + +############################################################################### +# Enum +############################################################################### + +ctypedef cudlaStatus _Status +ctypedef cudlaMode _Mode +ctypedef cudlaModuleAttributeType _ModuleAttributeType +ctypedef cudlaFenceType _FenceType +ctypedef cudlaModuleLoadFlags _ModuleLoadFlags +ctypedef cudlaSubmissionFlags _SubmissionFlags +ctypedef cudlaAccessPermissionFlags _AccessPermissionFlags +ctypedef cudlaDevAttributeType _DevAttributeType + + +############################################################################### +# Functions +############################################################################### + +cpdef uint64_t get_version() except? -1 +cpdef uint64_t device_get_count() except? -1 +cpdef intptr_t create_device(uint64_t device, uint32_t flags) except * +cpdef intptr_t mem_register(intptr_t dev_handle, intptr_t ptr, size_t size, uint32_t flags) except * +cpdef intptr_t module_load_from_memory(intptr_t dev_handle, p_module, size_t module_size, uint32_t flags) except * +cpdef module_unload(intptr_t h_module, uint32_t flags) +cpdef submit_task(intptr_t dev_handle, intptr_t ptr_to_tasks, uint32_t num_tasks, intptr_t stream, uint32_t flags) +cpdef object device_get_attribute(intptr_t dev_handle, int attrib) except * +cpdef mem_unregister(intptr_t dev_handle, intptr_t dev_ptr) +cpdef int get_last_error(intptr_t dev_handle) except? 0 +cpdef destroy_device(intptr_t dev_handle) +cpdef set_task_timeout_in_ms(intptr_t dev_handle, uint32_t timeout) + +cpdef module_get_attributes(intptr_t h_module, int attr_type) except * diff --git a/cuda_bindings/cuda/bindings/cudla.pyx b/cuda_bindings/cuda/bindings/cudla.pyx new file mode 100644 index 00000000000..c11b2494e1b --- /dev/null +++ b/cuda_bindings/cuda/bindings/cudla.pyx @@ -0,0 +1,1826 @@ +# This code was automatically generated with version 1.5.0, generator version 0.3.1.dev1465+gc5c5c8652. Do not modify it directly. + +cimport cython # NOQA +from libc.stdint cimport intptr_t, uintptr_t + +from ._internal.utils cimport get_buffer_pointer + +from enum import IntEnum as _IntEnum + +from libc.stdlib cimport calloc, free, malloc +from cython cimport view +cimport cpython.buffer +cimport cpython.memoryview +cimport cpython +from libc.string cimport memcmp, memcpy +import numpy as _numpy + + +cdef __from_data(data, dtype_name, expected_dtype, lowpp_type): + # _numpy.recarray is a subclass of _numpy.ndarray, so implicitly handled here. + if isinstance(data, lowpp_type): + return data + if not isinstance(data, _numpy.ndarray): + raise TypeError("data argument must be a NumPy ndarray") + if data.size != 1: + raise ValueError("data array must have a size of 1") + if data.dtype != expected_dtype: + raise ValueError(f"data array must be of dtype {dtype_name}") + return lowpp_type.from_ptr(data.ctypes.data, not data.flags.writeable, data) + + +cdef __from_buffer(buffer, size, lowpp_type): + cdef Py_buffer view + if cpython.PyObject_GetBuffer(buffer, &view, cpython.PyBUF_SIMPLE) != 0: + raise TypeError("buffer argument does not support the buffer protocol") + try: + if view.itemsize != 1: + raise ValueError("buffer itemsize must be 1 byte") + if view.len != size: + raise ValueError(f"buffer length must be {size} bytes") + return lowpp_type.from_ptr(view.buf, not view.readonly, buffer) + finally: + cpython.PyBuffer_Release(&view) + + +cdef __getbuffer(object self, cpython.Py_buffer *buffer, void *ptr, int size, bint readonly): + buffer.buf = ptr + buffer.format = 'b' + buffer.internal = NULL + buffer.itemsize = 1 + buffer.len = size + buffer.ndim = 1 + buffer.obj = self + buffer.readonly = readonly + buffer.shape = &buffer.len + buffer.strides = &buffer.itemsize + buffer.suboffsets = NULL + + + + +############################################################################### +# POD +############################################################################### + +cdef _get_external_memory_handle_desc_dtype_offsets(): + cdef cudlaExternalMemoryHandleDesc_t pod = cudlaExternalMemoryHandleDesc_t() + return _numpy.dtype({ + 'names': ['ext_buf_object', 'size_'], + 'formats': [_numpy.intp, _numpy.uint64], + 'offsets': [ + (&(pod.extBufObject)) - (&pod), + (&(pod.size)) - (&pod), + ], + 'itemsize': sizeof(cudlaExternalMemoryHandleDesc_t), + }) + +external_memory_handle_desc_dtype = _get_external_memory_handle_desc_dtype_offsets() + +cdef class ExternalMemoryHandleDesc: + """Empty-initialize an instance of `cudlaExternalMemoryHandleDesc_t`. + + + .. seealso:: `cudlaExternalMemoryHandleDesc_t` + """ + cdef: + cudlaExternalMemoryHandleDesc_t *_ptr + object _owner + bint _owned + bint _readonly + + def __init__(self): + self._ptr = calloc(1, sizeof(cudlaExternalMemoryHandleDesc_t)) + if self._ptr == NULL: + raise MemoryError("Error allocating ExternalMemoryHandleDesc") + self._owner = None + self._owned = True + self._readonly = False + + def __dealloc__(self): + cdef cudlaExternalMemoryHandleDesc_t *ptr + if self._owned and self._ptr != NULL: + ptr = self._ptr + self._ptr = NULL + free(ptr) + + def __repr__(self): + return f"<{__name__}.ExternalMemoryHandleDesc object at {hex(id(self))}>" + + @property + def ptr(self): + """Get the pointer address to the data as Python :class:`int`.""" + return (self._ptr) + + cdef intptr_t _get_ptr(self): + return (self._ptr) + + def __int__(self): + return (self._ptr) + + def __eq__(self, other): + cdef ExternalMemoryHandleDesc other_ + if not isinstance(other, ExternalMemoryHandleDesc): + return False + other_ = other + return (memcmp((self._ptr), (other_._ptr), sizeof(cudlaExternalMemoryHandleDesc_t)) == 0) + + def __getbuffer__(self, Py_buffer *buffer, int flags): + __getbuffer(self, buffer, self._ptr, sizeof(cudlaExternalMemoryHandleDesc_t), self._readonly) + + def __releasebuffer__(self, Py_buffer *buffer): + pass + + def __setitem__(self, key, val): + if key == 0 and isinstance(val, _numpy.ndarray): + self._ptr = malloc(sizeof(cudlaExternalMemoryHandleDesc_t)) + if self._ptr == NULL: + raise MemoryError("Error allocating ExternalMemoryHandleDesc") + memcpy(self._ptr, val.ctypes.data, sizeof(cudlaExternalMemoryHandleDesc_t)) + self._owner = None + self._owned = True + self._readonly = not val.flags.writeable + else: + setattr(self, key, val) + + @property + def ext_buf_object(self): + """int: """ + return (self._ptr[0].extBufObject) + + @ext_buf_object.setter + def ext_buf_object(self, val): + if self._readonly: + raise ValueError("This ExternalMemoryHandleDesc instance is read-only") + self._ptr[0].extBufObject = val + + @property + def size_(self): + """int: """ + return self._ptr[0].size + + @size_.setter + def size_(self, val): + if self._readonly: + raise ValueError("This ExternalMemoryHandleDesc instance is read-only") + self._ptr[0].size = val + + @staticmethod + def from_buffer(buffer): + """Create an ExternalMemoryHandleDesc instance with the memory from the given buffer.""" + return __from_buffer(buffer, sizeof(cudlaExternalMemoryHandleDesc_t), ExternalMemoryHandleDesc) + + @staticmethod + def from_data(data): + """Create an ExternalMemoryHandleDesc instance wrapping the given NumPy array. + + Args: + data (_numpy.ndarray): a single-element array of dtype `external_memory_handle_desc_dtype` holding the data. + """ + return __from_data(data, "external_memory_handle_desc_dtype", external_memory_handle_desc_dtype, ExternalMemoryHandleDesc) + + @staticmethod + def from_ptr(intptr_t ptr, bint readonly=False, object owner=None): + """Create an ExternalMemoryHandleDesc instance wrapping the given pointer. + + Args: + ptr (intptr_t): pointer address as Python :class:`int` to the data. + owner (object): The Python object that owns the pointer. If not provided, data will be copied. + readonly (bool): whether the data is read-only (to the user). default is `False`. + """ + if ptr == 0: + raise ValueError("ptr must not be null (0)") + cdef ExternalMemoryHandleDesc obj = ExternalMemoryHandleDesc.__new__(ExternalMemoryHandleDesc) + if owner is None: + obj._ptr = malloc(sizeof(cudlaExternalMemoryHandleDesc_t)) + if obj._ptr == NULL: + raise MemoryError("Error allocating ExternalMemoryHandleDesc") + memcpy((obj._ptr), ptr, sizeof(cudlaExternalMemoryHandleDesc_t)) + obj._owner = None + obj._owned = True + else: + obj._ptr = ptr + obj._owner = owner + obj._owned = False + obj._readonly = readonly + return obj + + +cdef _get_external_semaphore_handle_desc_dtype_offsets(): + cdef cudlaExternalSemaphoreHandleDesc_t pod = cudlaExternalSemaphoreHandleDesc_t() + return _numpy.dtype({ + 'names': ['ext_sync_object'], + 'formats': [_numpy.intp], + 'offsets': [ + (&(pod.extSyncObject)) - (&pod), + ], + 'itemsize': sizeof(cudlaExternalSemaphoreHandleDesc_t), + }) + +external_semaphore_handle_desc_dtype = _get_external_semaphore_handle_desc_dtype_offsets() + +cdef class ExternalSemaphoreHandleDesc: + """Empty-initialize an instance of `cudlaExternalSemaphoreHandleDesc_t`. + + + .. seealso:: `cudlaExternalSemaphoreHandleDesc_t` + """ + cdef: + cudlaExternalSemaphoreHandleDesc_t *_ptr + object _owner + bint _owned + bint _readonly + + def __init__(self): + self._ptr = calloc(1, sizeof(cudlaExternalSemaphoreHandleDesc_t)) + if self._ptr == NULL: + raise MemoryError("Error allocating ExternalSemaphoreHandleDesc") + self._owner = None + self._owned = True + self._readonly = False + + def __dealloc__(self): + cdef cudlaExternalSemaphoreHandleDesc_t *ptr + if self._owned and self._ptr != NULL: + ptr = self._ptr + self._ptr = NULL + free(ptr) + + def __repr__(self): + return f"<{__name__}.ExternalSemaphoreHandleDesc object at {hex(id(self))}>" + + @property + def ptr(self): + """Get the pointer address to the data as Python :class:`int`.""" + return (self._ptr) + + cdef intptr_t _get_ptr(self): + return (self._ptr) + + def __int__(self): + return (self._ptr) + + def __eq__(self, other): + cdef ExternalSemaphoreHandleDesc other_ + if not isinstance(other, ExternalSemaphoreHandleDesc): + return False + other_ = other + return (memcmp((self._ptr), (other_._ptr), sizeof(cudlaExternalSemaphoreHandleDesc_t)) == 0) + + def __getbuffer__(self, Py_buffer *buffer, int flags): + __getbuffer(self, buffer, self._ptr, sizeof(cudlaExternalSemaphoreHandleDesc_t), self._readonly) + + def __releasebuffer__(self, Py_buffer *buffer): + pass + + def __setitem__(self, key, val): + if key == 0 and isinstance(val, _numpy.ndarray): + self._ptr = malloc(sizeof(cudlaExternalSemaphoreHandleDesc_t)) + if self._ptr == NULL: + raise MemoryError("Error allocating ExternalSemaphoreHandleDesc") + memcpy(self._ptr, val.ctypes.data, sizeof(cudlaExternalSemaphoreHandleDesc_t)) + self._owner = None + self._owned = True + self._readonly = not val.flags.writeable + else: + setattr(self, key, val) + + @property + def ext_sync_object(self): + """int: """ + return (self._ptr[0].extSyncObject) + + @ext_sync_object.setter + def ext_sync_object(self, val): + if self._readonly: + raise ValueError("This ExternalSemaphoreHandleDesc instance is read-only") + self._ptr[0].extSyncObject = val + + @staticmethod + def from_buffer(buffer): + """Create an ExternalSemaphoreHandleDesc instance with the memory from the given buffer.""" + return __from_buffer(buffer, sizeof(cudlaExternalSemaphoreHandleDesc_t), ExternalSemaphoreHandleDesc) + + @staticmethod + def from_data(data): + """Create an ExternalSemaphoreHandleDesc instance wrapping the given NumPy array. + + Args: + data (_numpy.ndarray): a single-element array of dtype `external_semaphore_handle_desc_dtype` holding the data. + """ + return __from_data(data, "external_semaphore_handle_desc_dtype", external_semaphore_handle_desc_dtype, ExternalSemaphoreHandleDesc) + + @staticmethod + def from_ptr(intptr_t ptr, bint readonly=False, object owner=None): + """Create an ExternalSemaphoreHandleDesc instance wrapping the given pointer. + + Args: + ptr (intptr_t): pointer address as Python :class:`int` to the data. + owner (object): The Python object that owns the pointer. If not provided, data will be copied. + readonly (bool): whether the data is read-only (to the user). default is `False`. + """ + if ptr == 0: + raise ValueError("ptr must not be null (0)") + cdef ExternalSemaphoreHandleDesc obj = ExternalSemaphoreHandleDesc.__new__(ExternalSemaphoreHandleDesc) + if owner is None: + obj._ptr = malloc(sizeof(cudlaExternalSemaphoreHandleDesc_t)) + if obj._ptr == NULL: + raise MemoryError("Error allocating ExternalSemaphoreHandleDesc") + memcpy((obj._ptr), ptr, sizeof(cudlaExternalSemaphoreHandleDesc_t)) + obj._owner = None + obj._owned = True + else: + obj._ptr = ptr + obj._owner = owner + obj._owned = False + obj._readonly = readonly + return obj + + +cdef _get_module_tensor_descriptor_dtype_offsets(): + cdef cudlaModuleTensorDescriptor pod = cudlaModuleTensorDescriptor() + return _numpy.dtype({ + 'names': ['name', 'size_', 'n', 'c', 'h', 'w', 'data_format', 'data_type', 'data_category', 'pixel_format', 'pixel_mapping', 'stride'], + 'formats': [(_numpy.int8, 81), _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint8, _numpy.uint8, _numpy.uint8, _numpy.uint8, _numpy.uint8, (_numpy.uint32, 8)], + 'offsets': [ + (&(pod.name)) - (&pod), + (&(pod.size)) - (&pod), + (&(pod.n)) - (&pod), + (&(pod.c)) - (&pod), + (&(pod.h)) - (&pod), + (&(pod.w)) - (&pod), + (&(pod.dataFormat)) - (&pod), + (&(pod.dataType)) - (&pod), + (&(pod.dataCategory)) - (&pod), + (&(pod.pixelFormat)) - (&pod), + (&(pod.pixelMapping)) - (&pod), + (&(pod.stride)) - (&pod), + ], + 'itemsize': sizeof(cudlaModuleTensorDescriptor), + }) + +module_tensor_descriptor_dtype = _get_module_tensor_descriptor_dtype_offsets() + +cdef class ModuleTensorDescriptor: + """Empty-initialize an instance of `cudlaModuleTensorDescriptor`. + + + .. seealso:: `cudlaModuleTensorDescriptor` + """ + cdef: + cudlaModuleTensorDescriptor *_ptr + object _owner + bint _owned + bint _readonly + + def __init__(self): + self._ptr = calloc(1, sizeof(cudlaModuleTensorDescriptor)) + if self._ptr == NULL: + raise MemoryError("Error allocating ModuleTensorDescriptor") + self._owner = None + self._owned = True + self._readonly = False + + def __dealloc__(self): + cdef cudlaModuleTensorDescriptor *ptr + if self._owned and self._ptr != NULL: + ptr = self._ptr + self._ptr = NULL + free(ptr) + + def __repr__(self): + return f"<{__name__}.ModuleTensorDescriptor object at {hex(id(self))}>" + + @property + def ptr(self): + """Get the pointer address to the data as Python :class:`int`.""" + return (self._ptr) + + cdef intptr_t _get_ptr(self): + return (self._ptr) + + def __int__(self): + return (self._ptr) + + def __eq__(self, other): + cdef ModuleTensorDescriptor other_ + if not isinstance(other, ModuleTensorDescriptor): + return False + other_ = other + return (memcmp((self._ptr), (other_._ptr), sizeof(cudlaModuleTensorDescriptor)) == 0) + + def __getbuffer__(self, Py_buffer *buffer, int flags): + __getbuffer(self, buffer, self._ptr, sizeof(cudlaModuleTensorDescriptor), self._readonly) + + def __releasebuffer__(self, Py_buffer *buffer): + pass + + def __setitem__(self, key, val): + if key == 0 and isinstance(val, _numpy.ndarray): + self._ptr = malloc(sizeof(cudlaModuleTensorDescriptor)) + if self._ptr == NULL: + raise MemoryError("Error allocating ModuleTensorDescriptor") + memcpy(self._ptr, val.ctypes.data, sizeof(cudlaModuleTensorDescriptor)) + self._owner = None + self._owned = True + self._readonly = not val.flags.writeable + else: + setattr(self, key, val) + + @property + def name(self): + """~_numpy.int8: (array of length 81).""" + return cpython.PyUnicode_FromString(self._ptr[0].name) + + @name.setter + def name(self, val): + if self._readonly: + raise ValueError("This ModuleTensorDescriptor instance is read-only") + cdef bytes buf = val.encode() + if len(buf) >= 81: + raise ValueError("String too long for field name, max length is 80") + cdef char *ptr = buf + memcpy((self._ptr[0].name), ptr, 81) + + @property + def size_(self): + """int: """ + return self._ptr[0].size + + @size_.setter + def size_(self, val): + if self._readonly: + raise ValueError("This ModuleTensorDescriptor instance is read-only") + self._ptr[0].size = val + + @property + def n(self): + """int: """ + return self._ptr[0].n + + @n.setter + def n(self, val): + if self._readonly: + raise ValueError("This ModuleTensorDescriptor instance is read-only") + self._ptr[0].n = val + + @property + def c(self): + """int: """ + return self._ptr[0].c + + @c.setter + def c(self, val): + if self._readonly: + raise ValueError("This ModuleTensorDescriptor instance is read-only") + self._ptr[0].c = val + + @property + def h(self): + """int: """ + return self._ptr[0].h + + @h.setter + def h(self, val): + if self._readonly: + raise ValueError("This ModuleTensorDescriptor instance is read-only") + self._ptr[0].h = val + + @property + def w(self): + """int: """ + return self._ptr[0].w + + @w.setter + def w(self, val): + if self._readonly: + raise ValueError("This ModuleTensorDescriptor instance is read-only") + self._ptr[0].w = val + + @property + def data_format(self): + """int: """ + return self._ptr[0].dataFormat + + @data_format.setter + def data_format(self, val): + if self._readonly: + raise ValueError("This ModuleTensorDescriptor instance is read-only") + self._ptr[0].dataFormat = val + + @property + def data_type(self): + """int: """ + return self._ptr[0].dataType + + @data_type.setter + def data_type(self, val): + if self._readonly: + raise ValueError("This ModuleTensorDescriptor instance is read-only") + self._ptr[0].dataType = val + + @property + def data_category(self): + """int: """ + return self._ptr[0].dataCategory + + @data_category.setter + def data_category(self, val): + if self._readonly: + raise ValueError("This ModuleTensorDescriptor instance is read-only") + self._ptr[0].dataCategory = val + + @property + def pixel_format(self): + """int: """ + return self._ptr[0].pixelFormat + + @pixel_format.setter + def pixel_format(self, val): + if self._readonly: + raise ValueError("This ModuleTensorDescriptor instance is read-only") + self._ptr[0].pixelFormat = val + + @property + def pixel_mapping(self): + """int: """ + return self._ptr[0].pixelMapping + + @pixel_mapping.setter + def pixel_mapping(self, val): + if self._readonly: + raise ValueError("This ModuleTensorDescriptor instance is read-only") + self._ptr[0].pixelMapping = val + + @property + def stride(self): + """~_numpy.uint32: (array of length 8).""" + cdef view.array arr = view.array(shape=(8,), itemsize=sizeof(uint32_t), format="I", mode="c", allocate_buffer=False) + arr.data = (&(self._ptr[0].stride)) + return _numpy.asarray(arr) + + @stride.setter + def stride(self, val): + if self._readonly: + raise ValueError("This ModuleTensorDescriptor instance is read-only") + if len(val) != 8: + raise ValueError(f"Expected length { 8 } for field stride, got {len(val)}") + cdef view.array arr = view.array(shape=(8,), itemsize=sizeof(uint32_t), format="I", mode="c") + arr[:] = _numpy.asarray(val, dtype=_numpy.uint32) + memcpy((&(self._ptr[0].stride)), (arr.data), sizeof(uint32_t) * len(val)) + + @staticmethod + def from_buffer(buffer): + """Create an ModuleTensorDescriptor instance with the memory from the given buffer.""" + return __from_buffer(buffer, sizeof(cudlaModuleTensorDescriptor), ModuleTensorDescriptor) + + @staticmethod + def from_data(data): + """Create an ModuleTensorDescriptor instance wrapping the given NumPy array. + + Args: + data (_numpy.ndarray): a single-element array of dtype `module_tensor_descriptor_dtype` holding the data. + """ + return __from_data(data, "module_tensor_descriptor_dtype", module_tensor_descriptor_dtype, ModuleTensorDescriptor) + + @staticmethod + def from_ptr(intptr_t ptr, bint readonly=False, object owner=None): + """Create an ModuleTensorDescriptor instance wrapping the given pointer. + + Args: + ptr (intptr_t): pointer address as Python :class:`int` to the data. + owner (object): The Python object that owns the pointer. If not provided, data will be copied. + readonly (bool): whether the data is read-only (to the user). default is `False`. + """ + if ptr == 0: + raise ValueError("ptr must not be null (0)") + cdef ModuleTensorDescriptor obj = ModuleTensorDescriptor.__new__(ModuleTensorDescriptor) + if owner is None: + obj._ptr = malloc(sizeof(cudlaModuleTensorDescriptor)) + if obj._ptr == NULL: + raise MemoryError("Error allocating ModuleTensorDescriptor") + memcpy((obj._ptr), ptr, sizeof(cudlaModuleTensorDescriptor)) + obj._owner = None + obj._owned = True + else: + obj._ptr = ptr + obj._owner = owner + obj._owned = False + obj._readonly = readonly + return obj + + +cdef _get_fence_dtype_offsets(): + cdef CudlaFence pod = CudlaFence() + return _numpy.dtype({ + 'names': ['fence', 'type'], + 'formats': [_numpy.intp, _numpy.int32], + 'offsets': [ + (&(pod.fence)) - (&pod), + (&(pod.type)) - (&pod), + ], + 'itemsize': sizeof(CudlaFence), + }) + +fence_dtype = _get_fence_dtype_offsets() + +cdef class Fence: + """Empty-initialize an instance of `CudlaFence`. + + + .. seealso:: `CudlaFence` + """ + cdef: + CudlaFence *_ptr + object _owner + bint _owned + bint _readonly + + def __init__(self): + self._ptr = calloc(1, sizeof(CudlaFence)) + if self._ptr == NULL: + raise MemoryError("Error allocating Fence") + self._owner = None + self._owned = True + self._readonly = False + + def __dealloc__(self): + cdef CudlaFence *ptr + if self._owned and self._ptr != NULL: + ptr = self._ptr + self._ptr = NULL + free(ptr) + + def __repr__(self): + return f"<{__name__}.Fence object at {hex(id(self))}>" + + @property + def ptr(self): + """Get the pointer address to the data as Python :class:`int`.""" + return (self._ptr) + + cdef intptr_t _get_ptr(self): + return (self._ptr) + + def __int__(self): + return (self._ptr) + + def __eq__(self, other): + cdef Fence other_ + if not isinstance(other, Fence): + return False + other_ = other + return (memcmp((self._ptr), (other_._ptr), sizeof(CudlaFence)) == 0) + + def __getbuffer__(self, Py_buffer *buffer, int flags): + __getbuffer(self, buffer, self._ptr, sizeof(CudlaFence), self._readonly) + + def __releasebuffer__(self, Py_buffer *buffer): + pass + + def __setitem__(self, key, val): + if key == 0 and isinstance(val, _numpy.ndarray): + self._ptr = malloc(sizeof(CudlaFence)) + if self._ptr == NULL: + raise MemoryError("Error allocating Fence") + memcpy(self._ptr, val.ctypes.data, sizeof(CudlaFence)) + self._owner = None + self._owned = True + self._readonly = not val.flags.writeable + else: + setattr(self, key, val) + + @property + def fence(self): + """int: """ + return (self._ptr[0].fence) + + @fence.setter + def fence(self, val): + if self._readonly: + raise ValueError("This Fence instance is read-only") + self._ptr[0].fence = val + + @property + def type(self): + """int: """ + return (self._ptr[0].type) + + @type.setter + def type(self, val): + if self._readonly: + raise ValueError("This Fence instance is read-only") + self._ptr[0].type = val + + @staticmethod + def from_buffer(buffer): + """Create an Fence instance with the memory from the given buffer.""" + return __from_buffer(buffer, sizeof(CudlaFence), Fence) + + @staticmethod + def from_data(data): + """Create an Fence instance wrapping the given NumPy array. + + Args: + data (_numpy.ndarray): a single-element array of dtype `fence_dtype` holding the data. + """ + return __from_data(data, "fence_dtype", fence_dtype, Fence) + + @staticmethod + def from_ptr(intptr_t ptr, bint readonly=False, object owner=None): + """Create an Fence instance wrapping the given pointer. + + Args: + ptr (intptr_t): pointer address as Python :class:`int` to the data. + owner (object): The Python object that owns the pointer. If not provided, data will be copied. + readonly (bool): whether the data is read-only (to the user). default is `False`. + """ + if ptr == 0: + raise ValueError("ptr must not be null (0)") + cdef Fence obj = Fence.__new__(Fence) + if owner is None: + obj._ptr = malloc(sizeof(CudlaFence)) + if obj._ptr == NULL: + raise MemoryError("Error allocating Fence") + memcpy((obj._ptr), ptr, sizeof(CudlaFence)) + obj._owner = None + obj._owned = True + else: + obj._ptr = ptr + obj._owner = owner + obj._owned = False + obj._readonly = readonly + return obj + + +dev_attribute_dtype = _numpy.dtype(( + _numpy.dtype((_numpy.void, sizeof(cudlaDevAttribute))), + { + "unified_addressing_supported": (_numpy.uint8, 0), + "device_version": (_numpy.uint32, 0), + } + )) + + +cdef class DevAttribute: + """Empty-initialize an instance of `cudlaDevAttribute`. + + + .. seealso:: `cudlaDevAttribute` + """ + cdef: + cudlaDevAttribute *_ptr + object _owner + bint _owned + bint _readonly + + def __init__(self): + self._ptr = calloc(1, sizeof(cudlaDevAttribute)) + if self._ptr == NULL: + raise MemoryError("Error allocating DevAttribute") + self._owner = None + self._owned = True + self._readonly = False + + def __dealloc__(self): + cdef cudlaDevAttribute *ptr + if self._owned and self._ptr != NULL: + ptr = self._ptr + self._ptr = NULL + free(ptr) + + def __repr__(self): + return f"<{__name__}.DevAttribute object at {hex(id(self))}>" + + @property + def ptr(self): + """Get the pointer address to the data as Python :class:`int`.""" + return (self._ptr) + + cdef intptr_t _get_ptr(self): + return (self._ptr) + + def __int__(self): + return (self._ptr) + + def __eq__(self, other): + cdef DevAttribute other_ + if not isinstance(other, DevAttribute): + return False + other_ = other + return (memcmp((self._ptr), (other_._ptr), sizeof(cudlaDevAttribute)) == 0) + + def __getbuffer__(self, Py_buffer *buffer, int flags): + __getbuffer(self, buffer, self._ptr, sizeof(cudlaDevAttribute), self._readonly) + + def __releasebuffer__(self, Py_buffer *buffer): + pass + + def __setitem__(self, key, val): + if key == 0 and isinstance(val, _numpy.ndarray): + self._ptr = malloc(sizeof(cudlaDevAttribute)) + if self._ptr == NULL: + raise MemoryError("Error allocating DevAttribute") + memcpy(self._ptr, val.ctypes.data, sizeof(cudlaDevAttribute)) + self._owner = None + self._owned = True + self._readonly = not val.flags.writeable + else: + setattr(self, key, val) + + @property + def unified_addressing_supported(self): + """int: """ + return self._ptr[0].unifiedAddressingSupported + + @unified_addressing_supported.setter + def unified_addressing_supported(self, val): + if self._readonly: + raise ValueError("This DevAttribute instance is read-only") + self._ptr[0].unifiedAddressingSupported = val + + @property + def device_version(self): + """int: """ + return self._ptr[0].deviceVersion + + @device_version.setter + def device_version(self, val): + if self._readonly: + raise ValueError("This DevAttribute instance is read-only") + self._ptr[0].deviceVersion = val + + @staticmethod + def from_buffer(buffer): + """Create an DevAttribute instance with the memory from the given buffer.""" + return __from_buffer(buffer, sizeof(cudlaDevAttribute), DevAttribute) + + @staticmethod + def from_data(data): + """Create an DevAttribute instance wrapping the given NumPy array. + + Args: + data (_numpy.ndarray): a single-element array of dtype `dev_attribute_dtype` holding the data. + """ + return __from_data(data, "dev_attribute_dtype", dev_attribute_dtype, DevAttribute) + + @staticmethod + def from_ptr(intptr_t ptr, bint readonly=False, object owner=None): + """Create an DevAttribute instance wrapping the given pointer. + + Args: + ptr (intptr_t): pointer address as Python :class:`int` to the data. + owner (object): The Python object that owns the pointer. If not provided, data will be copied. + readonly (bool): whether the data is read-only (to the user). default is `False`. + """ + if ptr == 0: + raise ValueError("ptr must not be null (0)") + cdef DevAttribute obj = DevAttribute.__new__(DevAttribute) + if owner is None: + obj._ptr = malloc(sizeof(cudlaDevAttribute)) + if obj._ptr == NULL: + raise MemoryError("Error allocating DevAttribute") + memcpy((obj._ptr), ptr, sizeof(cudlaDevAttribute)) + obj._owner = None + obj._owned = True + else: + obj._ptr = ptr + obj._owner = owner + obj._owned = False + obj._readonly = readonly + return obj + + +module_attribute_dtype = _numpy.dtype(( + _numpy.dtype((_numpy.void, sizeof(cudlaModuleAttribute))), + { + "num_input_tensors": (_numpy.uint32, 0), + "num_output_tensors": (_numpy.uint32, 0), + "input_tensor_desc": (_numpy.intp, 0), + "output_tensor_desc": (_numpy.intp, 0), + } + )) + + +cdef class ModuleAttribute: + """Empty-initialize an instance of `cudlaModuleAttribute`. + + + .. seealso:: `cudlaModuleAttribute` + """ + cdef: + cudlaModuleAttribute *_ptr + object _owner + bint _owned + bint _readonly + + def __init__(self): + self._ptr = calloc(1, sizeof(cudlaModuleAttribute)) + if self._ptr == NULL: + raise MemoryError("Error allocating ModuleAttribute") + self._owner = None + self._owned = True + self._readonly = False + + def __dealloc__(self): + cdef cudlaModuleAttribute *ptr + if self._owned and self._ptr != NULL: + ptr = self._ptr + self._ptr = NULL + free(ptr) + + def __repr__(self): + return f"<{__name__}.ModuleAttribute object at {hex(id(self))}>" + + @property + def ptr(self): + """Get the pointer address to the data as Python :class:`int`.""" + return (self._ptr) + + cdef intptr_t _get_ptr(self): + return (self._ptr) + + def __int__(self): + return (self._ptr) + + def __eq__(self, other): + cdef ModuleAttribute other_ + if not isinstance(other, ModuleAttribute): + return False + other_ = other + return (memcmp((self._ptr), (other_._ptr), sizeof(cudlaModuleAttribute)) == 0) + + def __getbuffer__(self, Py_buffer *buffer, int flags): + __getbuffer(self, buffer, self._ptr, sizeof(cudlaModuleAttribute), self._readonly) + + def __releasebuffer__(self, Py_buffer *buffer): + pass + + def __setitem__(self, key, val): + if key == 0 and isinstance(val, _numpy.ndarray): + self._ptr = malloc(sizeof(cudlaModuleAttribute)) + if self._ptr == NULL: + raise MemoryError("Error allocating ModuleAttribute") + memcpy(self._ptr, val.ctypes.data, sizeof(cudlaModuleAttribute)) + self._owner = None + self._owned = True + self._readonly = not val.flags.writeable + else: + setattr(self, key, val) + + @property + def num_input_tensors(self): + """int: """ + return self._ptr[0].numInputTensors + + @num_input_tensors.setter + def num_input_tensors(self, val): + if self._readonly: + raise ValueError("This ModuleAttribute instance is read-only") + self._ptr[0].numInputTensors = val + + @property + def num_output_tensors(self): + """int: """ + return self._ptr[0].numOutputTensors + + @num_output_tensors.setter + def num_output_tensors(self, val): + if self._readonly: + raise ValueError("This ModuleAttribute instance is read-only") + self._ptr[0].numOutputTensors = val + + @property + def input_tensor_desc(self): + """int: """ + return (self._ptr[0].inputTensorDesc) + + @input_tensor_desc.setter + def input_tensor_desc(self, val): + if self._readonly: + raise ValueError("This ModuleAttribute instance is read-only") + self._ptr[0].inputTensorDesc = val + + @property + def output_tensor_desc(self): + """int: """ + return (self._ptr[0].outputTensorDesc) + + @output_tensor_desc.setter + def output_tensor_desc(self, val): + if self._readonly: + raise ValueError("This ModuleAttribute instance is read-only") + self._ptr[0].outputTensorDesc = val + + @staticmethod + def from_buffer(buffer): + """Create an ModuleAttribute instance with the memory from the given buffer.""" + return __from_buffer(buffer, sizeof(cudlaModuleAttribute), ModuleAttribute) + + @staticmethod + def from_data(data): + """Create an ModuleAttribute instance wrapping the given NumPy array. + + Args: + data (_numpy.ndarray): a single-element array of dtype `module_attribute_dtype` holding the data. + """ + return __from_data(data, "module_attribute_dtype", module_attribute_dtype, ModuleAttribute) + + @staticmethod + def from_ptr(intptr_t ptr, bint readonly=False, object owner=None): + """Create an ModuleAttribute instance wrapping the given pointer. + + Args: + ptr (intptr_t): pointer address as Python :class:`int` to the data. + owner (object): The Python object that owns the pointer. If not provided, data will be copied. + readonly (bool): whether the data is read-only (to the user). default is `False`. + """ + if ptr == 0: + raise ValueError("ptr must not be null (0)") + cdef ModuleAttribute obj = ModuleAttribute.__new__(ModuleAttribute) + if owner is None: + obj._ptr = malloc(sizeof(cudlaModuleAttribute)) + if obj._ptr == NULL: + raise MemoryError("Error allocating ModuleAttribute") + memcpy((obj._ptr), ptr, sizeof(cudlaModuleAttribute)) + obj._owner = None + obj._owned = True + else: + obj._ptr = ptr + obj._owner = owner + obj._owned = False + obj._readonly = readonly + return obj + + +cdef _get_wait_events_dtype_offsets(): + cdef cudlaWaitEvents pod = cudlaWaitEvents() + return _numpy.dtype({ + 'names': ['pre_fences', 'num_events'], + 'formats': [_numpy.intp, _numpy.uint32], + 'offsets': [ + (&(pod.preFences)) - (&pod), + (&(pod.numEvents)) - (&pod), + ], + 'itemsize': sizeof(cudlaWaitEvents), + }) + +wait_events_dtype = _get_wait_events_dtype_offsets() + +cdef class WaitEvents: + """Empty-initialize an instance of `cudlaWaitEvents`. + + + .. seealso:: `cudlaWaitEvents` + """ + cdef: + cudlaWaitEvents *_ptr + object _owner + bint _owned + bint _readonly + dict _refs + + def __init__(self): + self._ptr = calloc(1, sizeof(cudlaWaitEvents)) + if self._ptr == NULL: + raise MemoryError("Error allocating WaitEvents") + self._owner = None + self._owned = True + self._readonly = False + self._refs = {} + + def __dealloc__(self): + cdef cudlaWaitEvents *ptr + if self._owned and self._ptr != NULL: + ptr = self._ptr + self._ptr = NULL + free(ptr) + + def __repr__(self): + return f"<{__name__}.WaitEvents object at {hex(id(self))}>" + + @property + def ptr(self): + """Get the pointer address to the data as Python :class:`int`.""" + return (self._ptr) + + cdef intptr_t _get_ptr(self): + return (self._ptr) + + def __int__(self): + return (self._ptr) + + def __eq__(self, other): + cdef WaitEvents other_ + if not isinstance(other, WaitEvents): + return False + other_ = other + return (memcmp((self._ptr), (other_._ptr), sizeof(cudlaWaitEvents)) == 0) + + def __getbuffer__(self, Py_buffer *buffer, int flags): + __getbuffer(self, buffer, self._ptr, sizeof(cudlaWaitEvents), self._readonly) + + def __releasebuffer__(self, Py_buffer *buffer): + pass + + def __setitem__(self, key, val): + if key == 0 and isinstance(val, _numpy.ndarray): + self._ptr = malloc(sizeof(cudlaWaitEvents)) + if self._ptr == NULL: + raise MemoryError("Error allocating WaitEvents") + memcpy(self._ptr, val.ctypes.data, sizeof(cudlaWaitEvents)) + self._owner = None + self._owned = True + self._readonly = not val.flags.writeable + else: + setattr(self, key, val) + + @property + def pre_fences(self): + """int: """ + if self._ptr[0].preFences == NULL or self._ptr[0].numEvents == 0: + return [] + return Fence.from_ptr((self._ptr[0].preFences), self._ptr[0].numEvents) + + @pre_fences.setter + def pre_fences(self, val): + if self._readonly: + raise ValueError("This WaitEvents instance is read-only") + cdef Fence arr = val + self._ptr[0].preFences = (arr._get_ptr()) + self._ptr[0].numEvents = len(arr) + self._refs["pre_fences"] = arr + + @staticmethod + def from_buffer(buffer): + """Create an WaitEvents instance with the memory from the given buffer.""" + return __from_buffer(buffer, sizeof(cudlaWaitEvents), WaitEvents) + + @staticmethod + def from_data(data): + """Create an WaitEvents instance wrapping the given NumPy array. + + Args: + data (_numpy.ndarray): a single-element array of dtype `wait_events_dtype` holding the data. + """ + return __from_data(data, "wait_events_dtype", wait_events_dtype, WaitEvents) + + @staticmethod + def from_ptr(intptr_t ptr, bint readonly=False, object owner=None): + """Create an WaitEvents instance wrapping the given pointer. + + Args: + ptr (intptr_t): pointer address as Python :class:`int` to the data. + owner (object): The Python object that owns the pointer. If not provided, data will be copied. + readonly (bool): whether the data is read-only (to the user). default is `False`. + """ + if ptr == 0: + raise ValueError("ptr must not be null (0)") + cdef WaitEvents obj = WaitEvents.__new__(WaitEvents) + if owner is None: + obj._ptr = malloc(sizeof(cudlaWaitEvents)) + if obj._ptr == NULL: + raise MemoryError("Error allocating WaitEvents") + memcpy((obj._ptr), ptr, sizeof(cudlaWaitEvents)) + obj._owner = None + obj._owned = True + else: + obj._ptr = ptr + obj._owner = owner + obj._owned = False + obj._readonly = readonly + obj._refs = {} + return obj + + +cdef _get_signal_events_dtype_offsets(): + cdef cudlaSignalEvents pod = cudlaSignalEvents() + return _numpy.dtype({ + 'names': ['dev_ptrs', 'eof_fences', 'num_events'], + 'formats': [_numpy.intp, _numpy.intp, _numpy.uint32], + 'offsets': [ + (&(pod.devPtrs)) - (&pod), + (&(pod.eofFences)) - (&pod), + (&(pod.numEvents)) - (&pod), + ], + 'itemsize': sizeof(cudlaSignalEvents), + }) + +signal_events_dtype = _get_signal_events_dtype_offsets() + +cdef class SignalEvents: + """Empty-initialize an instance of `cudlaSignalEvents`. + + + .. seealso:: `cudlaSignalEvents` + """ + cdef: + cudlaSignalEvents *_ptr + object _owner + bint _owned + bint _readonly + dict _refs + + def __init__(self): + self._ptr = calloc(1, sizeof(cudlaSignalEvents)) + if self._ptr == NULL: + raise MemoryError("Error allocating SignalEvents") + self._owner = None + self._owned = True + self._readonly = False + self._refs = {} + + def __dealloc__(self): + cdef cudlaSignalEvents *ptr + if self._owned and self._ptr != NULL: + ptr = self._ptr + self._ptr = NULL + free(ptr) + + def __repr__(self): + return f"<{__name__}.SignalEvents object at {hex(id(self))}>" + + @property + def ptr(self): + """Get the pointer address to the data as Python :class:`int`.""" + return (self._ptr) + + cdef intptr_t _get_ptr(self): + return (self._ptr) + + def __int__(self): + return (self._ptr) + + def __eq__(self, other): + cdef SignalEvents other_ + if not isinstance(other, SignalEvents): + return False + other_ = other + return (memcmp((self._ptr), (other_._ptr), sizeof(cudlaSignalEvents)) == 0) + + def __getbuffer__(self, Py_buffer *buffer, int flags): + __getbuffer(self, buffer, self._ptr, sizeof(cudlaSignalEvents), self._readonly) + + def __releasebuffer__(self, Py_buffer *buffer): + pass + + def __setitem__(self, key, val): + if key == 0 and isinstance(val, _numpy.ndarray): + self._ptr = malloc(sizeof(cudlaSignalEvents)) + if self._ptr == NULL: + raise MemoryError("Error allocating SignalEvents") + memcpy(self._ptr, val.ctypes.data, sizeof(cudlaSignalEvents)) + self._owner = None + self._owned = True + self._readonly = not val.flags.writeable + else: + setattr(self, key, val) + + @property + def dev_ptrs(self): + """int: """ + if self._ptr[0].devPtrs == NULL or self._ptr[0].numEvents == 0: + return view.array(shape=(1,), itemsize=sizeof(intptr_t), format="q", mode="c")[:0] + cdef view.array arr = view.array(shape=(self._ptr[0].numEvents,), itemsize=sizeof(intptr_t), format="q", mode="c", allocate_buffer=False) + arr.data = (self._ptr[0].devPtrs) + return arr + + @dev_ptrs.setter + def dev_ptrs(self, val): + if self._readonly: + raise ValueError("This SignalEvents instance is read-only") + cdef Py_ssize_t _n = len(val) + self._ptr[0].numEvents = _n + if _n == 0: + return + cdef view.array arr = view.array(shape=(_n,), itemsize=sizeof(intptr_t), format="q", mode="c") + cdef intptr_t[:] mv = arr + cdef Py_ssize_t i + for i in range(_n): + mv[i] = val[i] + self._ptr[0].devPtrs = (arr.data) + self._refs["dev_ptrs"] = arr + + @property + def eof_fences(self): + """int: """ + if self._ptr[0].eofFences == NULL or self._ptr[0].numEvents == 0: + return [] + return Fence.from_ptr((self._ptr[0].eofFences), self._ptr[0].numEvents) + + @eof_fences.setter + def eof_fences(self, val): + if self._readonly: + raise ValueError("This SignalEvents instance is read-only") + cdef Fence arr = val + self._ptr[0].eofFences = (arr._get_ptr()) + self._ptr[0].numEvents = len(arr) + self._refs["eof_fences"] = arr + + @staticmethod + def from_buffer(buffer): + """Create an SignalEvents instance with the memory from the given buffer.""" + return __from_buffer(buffer, sizeof(cudlaSignalEvents), SignalEvents) + + @staticmethod + def from_data(data): + """Create an SignalEvents instance wrapping the given NumPy array. + + Args: + data (_numpy.ndarray): a single-element array of dtype `signal_events_dtype` holding the data. + """ + return __from_data(data, "signal_events_dtype", signal_events_dtype, SignalEvents) + + @staticmethod + def from_ptr(intptr_t ptr, bint readonly=False, object owner=None): + """Create an SignalEvents instance wrapping the given pointer. + + Args: + ptr (intptr_t): pointer address as Python :class:`int` to the data. + owner (object): The Python object that owns the pointer. If not provided, data will be copied. + readonly (bool): whether the data is read-only (to the user). default is `False`. + """ + if ptr == 0: + raise ValueError("ptr must not be null (0)") + cdef SignalEvents obj = SignalEvents.__new__(SignalEvents) + if owner is None: + obj._ptr = malloc(sizeof(cudlaSignalEvents)) + if obj._ptr == NULL: + raise MemoryError("Error allocating SignalEvents") + memcpy((obj._ptr), ptr, sizeof(cudlaSignalEvents)) + obj._owner = None + obj._owned = True + else: + obj._ptr = ptr + obj._owner = owner + obj._owned = False + obj._readonly = readonly + obj._refs = {} + return obj + + +cdef _get_task_dtype_offsets(): + cdef cudlaTask pod = cudlaTask() + return _numpy.dtype({ + 'names': ['module_handle', 'output_tensor', 'num_output_tensors', 'num_input_tensors', 'input_tensor', 'wait_events', 'signal_events'], + 'formats': [_numpy.intp, _numpy.intp, _numpy.uint32, _numpy.uint32, _numpy.intp, _numpy.intp, _numpy.intp], + 'offsets': [ + (&(pod.moduleHandle)) - (&pod), + (&(pod.outputTensor)) - (&pod), + (&(pod.numOutputTensors)) - (&pod), + (&(pod.numInputTensors)) - (&pod), + (&(pod.inputTensor)) - (&pod), + (&(pod.waitEvents)) - (&pod), + (&(pod.signalEvents)) - (&pod), + ], + 'itemsize': sizeof(cudlaTask), + }) + +task_dtype = _get_task_dtype_offsets() + +cdef class Task: + """Empty-initialize an instance of `cudlaTask`. + + + .. seealso:: `cudlaTask` + """ + cdef: + cudlaTask *_ptr + object _owner + bint _owned + bint _readonly + dict _refs + + def __init__(self): + self._ptr = calloc(1, sizeof(cudlaTask)) + if self._ptr == NULL: + raise MemoryError("Error allocating Task") + self._owner = None + self._owned = True + self._readonly = False + self._refs = {} + + def __dealloc__(self): + cdef cudlaTask *ptr + if self._owned and self._ptr != NULL: + ptr = self._ptr + self._ptr = NULL + free(ptr) + + def __repr__(self): + return f"<{__name__}.Task object at {hex(id(self))}>" + + @property + def ptr(self): + """Get the pointer address to the data as Python :class:`int`.""" + return (self._ptr) + + cdef intptr_t _get_ptr(self): + return (self._ptr) + + def __int__(self): + return (self._ptr) + + def __eq__(self, other): + cdef Task other_ + if not isinstance(other, Task): + return False + other_ = other + return (memcmp((self._ptr), (other_._ptr), sizeof(cudlaTask)) == 0) + + def __getbuffer__(self, Py_buffer *buffer, int flags): + __getbuffer(self, buffer, self._ptr, sizeof(cudlaTask), self._readonly) + + def __releasebuffer__(self, Py_buffer *buffer): + pass + + def __setitem__(self, key, val): + if key == 0 and isinstance(val, _numpy.ndarray): + self._ptr = malloc(sizeof(cudlaTask)) + if self._ptr == NULL: + raise MemoryError("Error allocating Task") + memcpy(self._ptr, val.ctypes.data, sizeof(cudlaTask)) + self._owner = None + self._owned = True + self._readonly = not val.flags.writeable + else: + setattr(self, key, val) + + @property + def module_handle(self): + """int: """ + return (self._ptr[0].moduleHandle) + + @module_handle.setter + def module_handle(self, val): + if self._readonly: + raise ValueError("This Task instance is read-only") + self._ptr[0].moduleHandle = val + + @property + def output_tensor(self): + """int: """ + if self._ptr[0].outputTensor == NULL or self._ptr[0].numOutputTensors == 0: + return view.array(shape=(1,), itemsize=sizeof(intptr_t), format="q", mode="c")[:0] + cdef view.array arr = view.array(shape=(self._ptr[0].numOutputTensors,), itemsize=sizeof(intptr_t), format="q", mode="c", allocate_buffer=False) + arr.data = (self._ptr[0].outputTensor) + return arr + + @output_tensor.setter + def output_tensor(self, val): + if self._readonly: + raise ValueError("This Task instance is read-only") + cdef Py_ssize_t _n = len(val) + self._ptr[0].numOutputTensors = _n + if _n == 0: + return + cdef view.array arr = view.array(shape=(_n,), itemsize=sizeof(intptr_t), format="q", mode="c") + cdef intptr_t[:] mv = arr + cdef Py_ssize_t i + for i in range(_n): + mv[i] = val[i] + self._ptr[0].outputTensor = (arr.data) + self._refs["output_tensor"] = arr + + @property + def input_tensor(self): + """int: """ + if self._ptr[0].inputTensor == NULL or self._ptr[0].numInputTensors == 0: + return view.array(shape=(1,), itemsize=sizeof(intptr_t), format="q", mode="c")[:0] + cdef view.array arr = view.array(shape=(self._ptr[0].numInputTensors,), itemsize=sizeof(intptr_t), format="q", mode="c", allocate_buffer=False) + arr.data = (self._ptr[0].inputTensor) + return arr + + @input_tensor.setter + def input_tensor(self, val): + if self._readonly: + raise ValueError("This Task instance is read-only") + cdef Py_ssize_t _n = len(val) + self._ptr[0].numInputTensors = _n + if _n == 0: + return + cdef view.array arr = view.array(shape=(_n,), itemsize=sizeof(intptr_t), format="q", mode="c") + cdef intptr_t[:] mv = arr + cdef Py_ssize_t i + for i in range(_n): + mv[i] = val[i] + self._ptr[0].inputTensor = (arr.data) + self._refs["input_tensor"] = arr + + @property + def wait_events(self): + """int: """ + return (self._ptr[0].waitEvents) + + @wait_events.setter + def wait_events(self, val): + if self._readonly: + raise ValueError("This Task instance is read-only") + self._ptr[0].waitEvents = val + + @property + def signal_events(self): + """int: """ + return (self._ptr[0].signalEvents) + + @signal_events.setter + def signal_events(self, val): + if self._readonly: + raise ValueError("This Task instance is read-only") + self._ptr[0].signalEvents = val + + @staticmethod + def from_buffer(buffer): + """Create an Task instance with the memory from the given buffer.""" + return __from_buffer(buffer, sizeof(cudlaTask), Task) + + @staticmethod + def from_data(data): + """Create an Task instance wrapping the given NumPy array. + + Args: + data (_numpy.ndarray): a single-element array of dtype `task_dtype` holding the data. + """ + return __from_data(data, "task_dtype", task_dtype, Task) + + @staticmethod + def from_ptr(intptr_t ptr, bint readonly=False, object owner=None): + """Create an Task instance wrapping the given pointer. + + Args: + ptr (intptr_t): pointer address as Python :class:`int` to the data. + owner (object): The Python object that owns the pointer. If not provided, data will be copied. + readonly (bool): whether the data is read-only (to the user). default is `False`. + """ + if ptr == 0: + raise ValueError("ptr must not be null (0)") + cdef Task obj = Task.__new__(Task) + if owner is None: + obj._ptr = malloc(sizeof(cudlaTask)) + if obj._ptr == NULL: + raise MemoryError("Error allocating Task") + memcpy((obj._ptr), ptr, sizeof(cudlaTask)) + obj._owner = None + obj._owned = True + else: + obj._ptr = ptr + obj._owner = owner + obj._owned = False + obj._readonly = readonly + obj._refs = {} + return obj + + + +############################################################################### +# Enum +############################################################################### + +class Status(_IntEnum): + """ + See `cudlaStatus`. + """ + Success = cudlaSuccess + ErrorInvalidParam = cudlaErrorInvalidParam + ErrorOutOfResources = cudlaErrorOutOfResources + ErrorCreationFailed = cudlaErrorCreationFailed + ErrorInvalidAddress = cudlaErrorInvalidAddress + ErrorOs = cudlaErrorOs + ErrorCuda = cudlaErrorCuda + ErrorUmd = cudlaErrorUmd + ErrorInvalidDevice = cudlaErrorInvalidDevice + ErrorInvalidAttribute = cudlaErrorInvalidAttribute + ErrorIncompatibleDlaSWVersion = cudlaErrorIncompatibleDlaSWVersion + ErrorMemoryRegistered = cudlaErrorMemoryRegistered + ErrorInvalidModule = cudlaErrorInvalidModule + ErrorUnsupportedOperation = cudlaErrorUnsupportedOperation + ErrorNvSci = cudlaErrorNvSci + ErrorDlaErrInvalidInput = cudlaErrorDlaErrInvalidInput + ErrorDlaErrInvalidPreAction = cudlaErrorDlaErrInvalidPreAction + ErrorDlaErrNoMem = cudlaErrorDlaErrNoMem + ErrorDlaErrProcessorBusy = cudlaErrorDlaErrProcessorBusy + ErrorDlaErrTaskStatusMismatch = cudlaErrorDlaErrTaskStatusMismatch + ErrorDlaErrEngineTimeout = cudlaErrorDlaErrEngineTimeout + ErrorDlaErrDataMismatch = cudlaErrorDlaErrDataMismatch + ErrorUnknown = cudlaErrorUnknown + +class Mode(_IntEnum): + """ + See `cudlaMode`. + """ + CUDA_DLA = CUDLA_CUDA_DLA + STANDALONE = CUDLA_STANDALONE + +class ModuleAttributeType(_IntEnum): + """ + See `cudlaModuleAttributeType`. + """ + NUM_INPUT_TENSORS = CUDLA_NUM_INPUT_TENSORS + NUM_OUTPUT_TENSORS = CUDLA_NUM_OUTPUT_TENSORS + INPUT_TENSOR_DESCRIPTORS = CUDLA_INPUT_TENSOR_DESCRIPTORS + OUTPUT_TENSOR_DESCRIPTORS = CUDLA_OUTPUT_TENSOR_DESCRIPTORS + NUM_OUTPUT_TASK_STATISTICS = CUDLA_NUM_OUTPUT_TASK_STATISTICS + OUTPUT_TASK_STATISTICS_DESCRIPTORS = CUDLA_OUTPUT_TASK_STATISTICS_DESCRIPTORS + +class FenceType(_IntEnum): + """ + See `cudlaFenceType`. + """ + NVSCISYNC_FENCE = CUDLA_NVSCISYNC_FENCE + NVSCISYNC_FENCE_SOF = CUDLA_NVSCISYNC_FENCE_SOF + +class ModuleLoadFlags(_IntEnum): + """ + See `cudlaModuleLoadFlags`. + """ + MODULE_DEFAULT = CUDLA_MODULE_DEFAULT + MODULE_ENABLE_FAULT_DIAGNOSTICS = CUDLA_MODULE_ENABLE_FAULT_DIAGNOSTICS + +class SubmissionFlags(_IntEnum): + """ + See `cudlaSubmissionFlags`. + """ + SUBMIT_NOOP = CUDLA_SUBMIT_NOOP + SUBMIT_SKIP_LOCK_ACQUIRE = CUDLA_SUBMIT_SKIP_LOCK_ACQUIRE + SUBMIT_DIAGNOSTICS_TASK = CUDLA_SUBMIT_DIAGNOSTICS_TASK + +class AccessPermissionFlags(_IntEnum): + """ + See `cudlaAccessPermissionFlags`. + """ + READ_WRITE_PERM = CUDLA_READ_WRITE_PERM + READ_ONLY_PERM = CUDLA_READ_ONLY_PERM + TASK_STATISTICS = CUDLA_TASK_STATISTICS + +class DevAttributeType(_IntEnum): + """ + See `cudlaDevAttributeType`. + """ + UNIFIED_ADDRESSING = CUDLA_UNIFIED_ADDRESSING + DEVICE_VERSION = CUDLA_DEVICE_VERSION + + +############################################################################### +# Error handling +############################################################################### + +class CudlaError(Exception): + + def __init__(self, status): + self.status = status + s = Status(status) + cdef str err = f"{s.name} ({s.value})" + super(CudlaError, self).__init__(err) + + def __reduce__(self): + return (type(self), (self.status,)) + + +@cython.profile(False) +cpdef inline check_status(int status): + if status != 0: + raise CudlaError(status) + + +############################################################################### +# Wrapper functions +############################################################################### + +cpdef uint64_t get_version() except? -1: + cdef uint64_t version + with nogil: + __status__ = cudlaGetVersion(&version) + check_status(__status__) + return version + + +cpdef uint64_t device_get_count() except? -1: + cdef uint64_t p_num_devices + with nogil: + __status__ = cudlaDeviceGetCount(&p_num_devices) + check_status(__status__) + return p_num_devices + + +cpdef intptr_t create_device(uint64_t device, uint32_t flags) except *: + cdef DevHandle dev_handle + if flags == CUDLA_STANDALONE: + raise CudlaError(cudlaErrorUnsupportedOperation) + with nogil: + __status__ = cudlaCreateDevice(device, &dev_handle, flags) + check_status(__status__) + return dev_handle + + +cpdef intptr_t mem_register(intptr_t dev_handle, intptr_t ptr, size_t size, uint32_t flags) except *: + cdef uint64_t* dev_ptr + with nogil: + __status__ = cudlaMemRegister(dev_handle, ptr, size, &dev_ptr, flags) + check_status(__status__) + return dev_ptr + + +cpdef intptr_t module_load_from_memory(intptr_t dev_handle, p_module, size_t module_size, uint32_t flags) except *: + cdef void* _p_module_ = get_buffer_pointer(p_module, module_size, readonly=True) + cdef Module h_module + with nogil: + __status__ = cudlaModuleLoadFromMemory(dev_handle, _p_module_, module_size, &h_module, flags) + check_status(__status__) + return h_module + + +cpdef module_unload(intptr_t h_module, uint32_t flags): + with nogil: + __status__ = cudlaModuleUnload(h_module, flags) + check_status(__status__) + + +cpdef submit_task(intptr_t dev_handle, intptr_t ptr_to_tasks, uint32_t num_tasks, intptr_t stream, uint32_t flags): + with nogil: + __status__ = cudlaSubmitTask(dev_handle, ptr_to_tasks, num_tasks, stream, flags) + check_status(__status__) + + +cpdef object device_get_attribute(intptr_t dev_handle, int attrib) except *: + cdef DevAttribute p_attribute_py = DevAttribute() + cdef cudlaDevAttribute *p_attribute = (p_attribute_py._get_ptr()) + with nogil: + __status__ = cudlaDeviceGetAttribute(dev_handle, attrib, p_attribute) + check_status(__status__) + return p_attribute_py + + +cpdef mem_unregister(intptr_t dev_handle, intptr_t dev_ptr): + with nogil: + __status__ = cudlaMemUnregister(dev_handle, dev_ptr) + check_status(__status__) + + +cpdef int get_last_error(intptr_t dev_handle) except? 0: + return cudlaGetLastError(dev_handle) + + +cpdef destroy_device(intptr_t dev_handle): + with nogil: + __status__ = cudlaDestroyDevice(dev_handle) + check_status(__status__) + + +cpdef set_task_timeout_in_ms(intptr_t dev_handle, uint32_t timeout): + with nogil: + __status__ = cudlaSetTaskTimeoutInMs(dev_handle, timeout) + check_status(__status__) + + +cpdef module_get_attributes(intptr_t h_module, int attr_type) except *: + """Query module attributes, interpreting the cudlaModuleAttribute union + based on the requested attribute type. + + For count attributes (NUM_INPUT_TENSORS, NUM_OUTPUT_TENSORS, + NUM_OUTPUT_TASK_STATISTICS), returns an int. + + For descriptor attributes (INPUT_TENSOR_DESCRIPTORS, + OUTPUT_TENSOR_DESCRIPTORS, OUTPUT_TASK_STATISTICS_DESCRIPTORS), + returns a list of ModuleTensorDescriptor objects. + """ + cdef int _attr_type = attr_type + cdef cudlaModuleAttribute count_attr + cdef cudlaModuleAttribute num_attr + cdef cudlaModuleAttribute desc_attr + cdef uint32_t count + cdef cudlaModuleTensorDescriptor* desc_buf + cdef uint32_t i + cdef int num_attr_type + + if _attr_type == CUDLA_NUM_INPUT_TENSORS or _attr_type == CUDLA_NUM_OUTPUT_TENSORS or _attr_type == CUDLA_NUM_OUTPUT_TASK_STATISTICS: + with nogil: + __status__ = cudlaModuleGetAttributes(h_module, _attr_type, &count_attr) + check_status(__status__) + return (count_attr.numInputTensors) + elif _attr_type == CUDLA_INPUT_TENSOR_DESCRIPTORS or _attr_type == CUDLA_OUTPUT_TENSOR_DESCRIPTORS or _attr_type == CUDLA_OUTPUT_TASK_STATISTICS_DESCRIPTORS: + if _attr_type == CUDLA_INPUT_TENSOR_DESCRIPTORS: + num_attr_type = CUDLA_NUM_INPUT_TENSORS + elif _attr_type == CUDLA_OUTPUT_TENSOR_DESCRIPTORS: + num_attr_type = CUDLA_NUM_OUTPUT_TENSORS + else: + num_attr_type = CUDLA_NUM_OUTPUT_TASK_STATISTICS + with nogil: + __status__ = cudlaModuleGetAttributes(h_module, num_attr_type, &num_attr) + check_status(__status__) + count = num_attr.numInputTensors + desc_buf = malloc(count * sizeof(cudlaModuleTensorDescriptor)) + if desc_buf == NULL: + raise MemoryError("Failed to allocate descriptor buffer") + try: + desc_attr.inputTensorDesc = desc_buf + with nogil: + __status__ = cudlaModuleGetAttributes(h_module, _attr_type, &desc_attr) + check_status(__status__) + result = [] + for i in range(count): + result.append(ModuleTensorDescriptor.from_ptr(&desc_buf[i], readonly=True)) + return result + finally: + free(desc_buf) + else: + raise ValueError(f"Unknown attribute type: {attr_type}") diff --git a/cuda_bindings/cuda/bindings/cycudla.pxd b/cuda_bindings/cuda/bindings/cycudla.pxd new file mode 100644 index 00000000000..42578710de9 --- /dev/null +++ b/cuda_bindings/cuda/bindings/cycudla.pxd @@ -0,0 +1,155 @@ +# This code was automatically generated with version 1.5.0, generator version 0.3.1.dev1465+gc5c5c8652. Do not modify it directly. +# This layer exposes the C header to Cython as-is. + +from libc.stdint cimport int8_t, int16_t, int32_t, int64_t +from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t +from libc.stdint cimport intptr_t, uintptr_t +from libc.stddef cimport size_t + + + + +############################################################################### +# Types (structs, enums, ...) +############################################################################### + +# enums +ctypedef enum cudlaStatus "cudlaStatus": + cudlaSuccess "cudlaSuccess" = 0 + cudlaErrorInvalidParam "cudlaErrorInvalidParam" = 1 + cudlaErrorOutOfResources "cudlaErrorOutOfResources" = 2 + cudlaErrorCreationFailed "cudlaErrorCreationFailed" = 3 + cudlaErrorInvalidAddress "cudlaErrorInvalidAddress" = 4 + cudlaErrorOs "cudlaErrorOs" = 5 + cudlaErrorCuda "cudlaErrorCuda" = 6 + cudlaErrorUmd "cudlaErrorUmd" = 7 + cudlaErrorInvalidDevice "cudlaErrorInvalidDevice" = 8 + cudlaErrorInvalidAttribute "cudlaErrorInvalidAttribute" = 9 + cudlaErrorIncompatibleDlaSWVersion "cudlaErrorIncompatibleDlaSWVersion" = 10 + cudlaErrorMemoryRegistered "cudlaErrorMemoryRegistered" = 11 + cudlaErrorInvalidModule "cudlaErrorInvalidModule" = 12 + cudlaErrorUnsupportedOperation "cudlaErrorUnsupportedOperation" = 13 + cudlaErrorNvSci "cudlaErrorNvSci" = 14 + cudlaErrorDlaErrInvalidInput "cudlaErrorDlaErrInvalidInput" = 0x40000001 + cudlaErrorDlaErrInvalidPreAction "cudlaErrorDlaErrInvalidPreAction" = 0x40000002 + cudlaErrorDlaErrNoMem "cudlaErrorDlaErrNoMem" = 0x40000003 + cudlaErrorDlaErrProcessorBusy "cudlaErrorDlaErrProcessorBusy" = 0x40000004 + cudlaErrorDlaErrTaskStatusMismatch "cudlaErrorDlaErrTaskStatusMismatch" = 0x40000005 + cudlaErrorDlaErrEngineTimeout "cudlaErrorDlaErrEngineTimeout" = 0x40000006 + cudlaErrorDlaErrDataMismatch "cudlaErrorDlaErrDataMismatch" = 0x40000007 + cudlaErrorUnknown "cudlaErrorUnknown" = 0x7fffffff + _CUDLASTATUS_INTERNAL_LOADING_ERROR "_CUDLASTATUS_INTERNAL_LOADING_ERROR" = -42 + +ctypedef enum cudlaMode "cudlaMode": + CUDLA_CUDA_DLA "CUDLA_CUDA_DLA" = 0 + CUDLA_STANDALONE "CUDLA_STANDALONE" = 1 + +ctypedef enum cudlaModuleAttributeType "cudlaModuleAttributeType": + CUDLA_NUM_INPUT_TENSORS "CUDLA_NUM_INPUT_TENSORS" = 0 + CUDLA_NUM_OUTPUT_TENSORS "CUDLA_NUM_OUTPUT_TENSORS" = 1 + CUDLA_INPUT_TENSOR_DESCRIPTORS "CUDLA_INPUT_TENSOR_DESCRIPTORS" = 2 + CUDLA_OUTPUT_TENSOR_DESCRIPTORS "CUDLA_OUTPUT_TENSOR_DESCRIPTORS" = 3 + CUDLA_NUM_OUTPUT_TASK_STATISTICS "CUDLA_NUM_OUTPUT_TASK_STATISTICS" = 4 + CUDLA_OUTPUT_TASK_STATISTICS_DESCRIPTORS "CUDLA_OUTPUT_TASK_STATISTICS_DESCRIPTORS" = 5 + +ctypedef enum cudlaFenceType "cudlaFenceType": + CUDLA_NVSCISYNC_FENCE "CUDLA_NVSCISYNC_FENCE" = 1 + CUDLA_NVSCISYNC_FENCE_SOF "CUDLA_NVSCISYNC_FENCE_SOF" = 2 + +ctypedef enum cudlaModuleLoadFlags "cudlaModuleLoadFlags": + CUDLA_MODULE_DEFAULT "CUDLA_MODULE_DEFAULT" = 0 + CUDLA_MODULE_ENABLE_FAULT_DIAGNOSTICS "CUDLA_MODULE_ENABLE_FAULT_DIAGNOSTICS" = 1 + +ctypedef enum cudlaSubmissionFlags "cudlaSubmissionFlags": + CUDLA_SUBMIT_NOOP "CUDLA_SUBMIT_NOOP" = 1 + CUDLA_SUBMIT_SKIP_LOCK_ACQUIRE "CUDLA_SUBMIT_SKIP_LOCK_ACQUIRE" = (1 << 1) + CUDLA_SUBMIT_DIAGNOSTICS_TASK "CUDLA_SUBMIT_DIAGNOSTICS_TASK" = (1 << 2) + +ctypedef enum cudlaAccessPermissionFlags "cudlaAccessPermissionFlags": + CUDLA_READ_WRITE_PERM "CUDLA_READ_WRITE_PERM" = 0 + CUDLA_READ_ONLY_PERM "CUDLA_READ_ONLY_PERM" = 1 + CUDLA_TASK_STATISTICS "CUDLA_TASK_STATISTICS" = (1 << 1) + +ctypedef enum cudlaDevAttributeType "cudlaDevAttributeType": + CUDLA_UNIFIED_ADDRESSING "CUDLA_UNIFIED_ADDRESSING" = 0 + CUDLA_DEVICE_VERSION "CUDLA_DEVICE_VERSION" = 1 + +# types +ctypedef void* cudlaDevHandle 'cudlaDevHandle' +ctypedef void* cudlaModule 'cudlaModule' +ctypedef struct cudlaExternalMemoryHandleDesc_t 'cudlaExternalMemoryHandleDesc_t': + void* extBufObject + unsigned long long size + +ctypedef struct cudlaExternalSemaphoreHandleDesc_t 'cudlaExternalSemaphoreHandleDesc_t': + void* extSyncObject + +ctypedef struct cudlaModuleTensorDescriptor 'cudlaModuleTensorDescriptor': + char name[(80U + 1)] + uint64_t size + uint64_t n + uint64_t c + uint64_t h + uint64_t w + uint8_t dataFormat + uint8_t dataType + uint8_t dataCategory + uint8_t pixelFormat + uint8_t pixelMapping + uint32_t stride[8U] + +ctypedef struct CudlaFence 'CudlaFence': + void* fence + cudlaFenceType type + +ctypedef union cudlaDevAttribute 'cudlaDevAttribute': + uint8_t unifiedAddressingSupported + uint32_t deviceVersion + +ctypedef union cudlaModuleAttribute 'cudlaModuleAttribute': + uint32_t numInputTensors + uint32_t numOutputTensors + cudlaModuleTensorDescriptor* inputTensorDesc + cudlaModuleTensorDescriptor* outputTensorDesc + +ctypedef struct cudlaWaitEvents 'cudlaWaitEvents': + CudlaFence* preFences + uint32_t numEvents + +ctypedef struct cudlaSignalEvents 'cudlaSignalEvents': + uint64_t** devPtrs + CudlaFence* eofFences + uint32_t numEvents + +ctypedef struct cudlaTask 'cudlaTask': + cudlaModule moduleHandle + uint64_t** outputTensor + uint32_t numOutputTensors + uint32_t numInputTensors + uint64_t** inputTensor + cudlaWaitEvents* waitEvents + cudlaSignalEvents* signalEvents + + +# Typedef aliases for struct types (struct has _t, typedef doesn't) +ctypedef cudlaExternalMemoryHandleDesc_t cudlaExternalMemoryHandleDesc 'cudlaExternalMemoryHandleDesc' +ctypedef cudlaExternalSemaphoreHandleDesc_t cudlaExternalSemaphoreHandleDesc 'cudlaExternalSemaphoreHandleDesc' + + +############################################################################### +# Functions +############################################################################### + +cdef cudlaStatus cudlaGetVersion(uint64_t* const version) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil +cdef cudlaStatus cudlaDeviceGetCount(uint64_t* const pNumDevices) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil +cdef cudlaStatus cudlaCreateDevice(const uint64_t device, cudlaDevHandle* const devHandle, const uint32_t flags) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil +cdef cudlaStatus cudlaMemRegister(const cudlaDevHandle devHandle, const uint64_t* const ptr, const size_t size, uint64_t** const devPtr, const uint32_t flags) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil +cdef cudlaStatus cudlaModuleLoadFromMemory(const cudlaDevHandle devHandle, const uint8_t* const pModule, const size_t moduleSize, cudlaModule* const hModule, const uint32_t flags) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil +cdef cudlaStatus cudlaModuleGetAttributes(const cudlaModule hModule, const cudlaModuleAttributeType attrType, cudlaModuleAttribute* const attribute) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil +cdef cudlaStatus cudlaModuleUnload(const cudlaModule hModule, const uint32_t flags) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil +cdef cudlaStatus cudlaSubmitTask(const cudlaDevHandle devHandle, const cudlaTask* const ptrToTasks, const uint32_t numTasks, void* const stream, const uint32_t flags) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil +cdef cudlaStatus cudlaDeviceGetAttribute(const cudlaDevHandle devHandle, const cudlaDevAttributeType attrib, cudlaDevAttribute* const pAttribute) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil +cdef cudlaStatus cudlaMemUnregister(const cudlaDevHandle devHandle, const uint64_t* const devPtr) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil +cdef cudlaStatus cudlaGetLastError(const cudlaDevHandle devHandle) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil +cdef cudlaStatus cudlaDestroyDevice(const cudlaDevHandle devHandle) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil +cdef cudlaStatus cudlaSetTaskTimeoutInMs(const cudlaDevHandle devHandle, const uint32_t timeout) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil diff --git a/cuda_bindings/cuda/bindings/cycudla.pyx b/cuda_bindings/cuda/bindings/cycudla.pyx new file mode 100644 index 00000000000..d64e28f847d --- /dev/null +++ b/cuda_bindings/cuda/bindings/cycudla.pyx @@ -0,0 +1,61 @@ +# This code was automatically generated with version 1.5.0, generator version 0.3.1.dev1465+gc5c5c8652. Do not modify it directly. + +from ._internal cimport cudla as _cudla + + + + +############################################################################### +# Wrapper functions +############################################################################### + +cdef cudlaStatus cudlaGetVersion(uint64_t* const version) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + return _cudla._cudlaGetVersion(version) + + +cdef cudlaStatus cudlaDeviceGetCount(uint64_t* const pNumDevices) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + return _cudla._cudlaDeviceGetCount(pNumDevices) + + +cdef cudlaStatus cudlaCreateDevice(const uint64_t device, cudlaDevHandle* const devHandle, const uint32_t flags) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + return _cudla._cudlaCreateDevice(device, devHandle, flags) + + +cdef cudlaStatus cudlaMemRegister(const cudlaDevHandle devHandle, const uint64_t* const ptr, const size_t size, uint64_t** const devPtr, const uint32_t flags) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + return _cudla._cudlaMemRegister(devHandle, ptr, size, devPtr, flags) + + +cdef cudlaStatus cudlaModuleLoadFromMemory(const cudlaDevHandle devHandle, const uint8_t* const pModule, const size_t moduleSize, cudlaModule* const hModule, const uint32_t flags) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + return _cudla._cudlaModuleLoadFromMemory(devHandle, pModule, moduleSize, hModule, flags) + + +cdef cudlaStatus cudlaModuleGetAttributes(const cudlaModule hModule, const cudlaModuleAttributeType attrType, cudlaModuleAttribute* const attribute) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + return _cudla._cudlaModuleGetAttributes(hModule, attrType, attribute) + + +cdef cudlaStatus cudlaModuleUnload(const cudlaModule hModule, const uint32_t flags) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + return _cudla._cudlaModuleUnload(hModule, flags) + + +cdef cudlaStatus cudlaSubmitTask(const cudlaDevHandle devHandle, const cudlaTask* const ptrToTasks, const uint32_t numTasks, void* const stream, const uint32_t flags) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + return _cudla._cudlaSubmitTask(devHandle, ptrToTasks, numTasks, stream, flags) + + +cdef cudlaStatus cudlaDeviceGetAttribute(const cudlaDevHandle devHandle, const cudlaDevAttributeType attrib, cudlaDevAttribute* const pAttribute) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + return _cudla._cudlaDeviceGetAttribute(devHandle, attrib, pAttribute) + + +cdef cudlaStatus cudlaMemUnregister(const cudlaDevHandle devHandle, const uint64_t* const devPtr) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + return _cudla._cudlaMemUnregister(devHandle, devPtr) + + +cdef cudlaStatus cudlaGetLastError(const cudlaDevHandle devHandle) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + return _cudla._cudlaGetLastError(devHandle) + + +cdef cudlaStatus cudlaDestroyDevice(const cudlaDevHandle devHandle) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + return _cudla._cudlaDestroyDevice(devHandle) + + +cdef cudlaStatus cudlaSetTaskTimeoutInMs(const cudlaDevHandle devHandle, const uint32_t timeout) except?_CUDLASTATUS_INTERNAL_LOADING_ERROR nogil: + return _cudla._cudlaSetTaskTimeoutInMs(devHandle, timeout) diff --git a/cuda_bindings/docs/source/api.rst b/cuda_bindings/docs/source/api.rst index a47b65a9297..07eef44252f 100644 --- a/cuda_bindings/docs/source/api.rst +++ b/cuda_bindings/docs/source/api.rst @@ -15,6 +15,7 @@ CUDA Python API Reference module/nvjitlink module/nvvm module/nvfatbin + module/cudla module/cufile module/nvml module/utils diff --git a/cuda_bindings/docs/source/module/cudla.rst b/cuda_bindings/docs/source/module/cudla.rst new file mode 100644 index 00000000000..e1eb720568e --- /dev/null +++ b/cuda_bindings/docs/source/module/cudla.rst @@ -0,0 +1,67 @@ +.. SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +.. SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +.. default-role:: cpp:any + +cudla +===== + +Note +---- + +The cuDLA bindings require a Jetson platform with DLA hardware (Xavier or Orin). +cuDLA is not available on desktop GPUs. + +Functions +--------- + +cuDLA defines the following functions for DLA device management and inference. + +.. autofunction:: cuda.bindings.cudla.get_version +.. autofunction:: cuda.bindings.cudla.device_get_count +.. autofunction:: cuda.bindings.cudla.create_device +.. autofunction:: cuda.bindings.cudla.destroy_device +.. autofunction:: cuda.bindings.cudla.mem_register +.. autofunction:: cuda.bindings.cudla.mem_unregister +.. autofunction:: cuda.bindings.cudla.module_load_from_memory +.. autofunction:: cuda.bindings.cudla.module_get_attributes +.. autofunction:: cuda.bindings.cudla.module_unload +.. autofunction:: cuda.bindings.cudla.submit_task +.. autofunction:: cuda.bindings.cudla.device_get_attribute +.. autofunction:: cuda.bindings.cudla.get_last_error +.. autofunction:: cuda.bindings.cudla.import_external_memory +.. autofunction:: cuda.bindings.cudla.import_external_semaphore +.. autofunction:: cuda.bindings.cudla.get_nv_sci_sync_attributes +.. autofunction:: cuda.bindings.cudla.set_task_timeout_in_ms + +Types +----- + +.. autoclass:: cuda.bindings.cudla.ExternalMemoryHandleDesc +.. autoclass:: cuda.bindings.cudla.ExternalSemaphoreHandleDesc +.. autoclass:: cuda.bindings.cudla.ModuleTensorDescriptor +.. autoclass:: cuda.bindings.cudla.Fence +.. autoclass:: cuda.bindings.cudla.DevAttribute +.. autoclass:: cuda.bindings.cudla.ModuleAttribute +.. autoclass:: cuda.bindings.cudla.WaitEvents +.. autoclass:: cuda.bindings.cudla.SignalEvents +.. autoclass:: cuda.bindings.cudla.Task + +Enums +----- + +.. autoclass:: cuda.bindings.cudla.Status + + .. autoattribute:: cuda.bindings.cudla.Status.SUCCESS + +.. autoclass:: cuda.bindings.cudla.Mode + + .. autoattribute:: cuda.bindings.cudla.Mode.CUDA_DLA + .. autoattribute:: cuda.bindings.cudla.Mode.STANDALONE + +.. autoclass:: cuda.bindings.cudla.ModuleAttributeType +.. autoclass:: cuda.bindings.cudla.FenceType +.. autoclass:: cuda.bindings.cudla.ModuleLoadFlags +.. autoclass:: cuda.bindings.cudla.SubmissionFlags +.. autoclass:: cuda.bindings.cudla.AccessPermissionFlags +.. autoclass:: cuda.bindings.cudla.DevAttributeType diff --git a/cuda_bindings/pyproject.toml b/cuda_bindings/pyproject.toml index d72ab7f7750..bac72f5b9be 100644 --- a/cuda_bindings/pyproject.toml +++ b/cuda_bindings/pyproject.toml @@ -36,7 +36,7 @@ dependencies = ["cuda-pathfinder >=1.4.2"] [project.optional-dependencies] all = [ - "cuda-toolkit[nvrtc,nvjitlink,nvvm,nvfatbin]==13.*", + "cuda-toolkit[nvrtc,nvjitlink,nvvm,nvfatbin,cudla]==13.*", "cuda-toolkit[cufile]==13.*; sys_platform == 'linux'", ] diff --git a/cuda_bindings/tests/cudla/conftest.py b/cuda_bindings/tests/cudla/conftest.py new file mode 100644 index 00000000000..2d08058f735 --- /dev/null +++ b/cuda_bindings/tests/cudla/conftest.py @@ -0,0 +1,13 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +import pytest + + +@pytest.fixture(scope="module", autouse=True) +def ctx(): + """Override the parent conftest's ``ctx`` fixture which creates a CUDA + context. cuDLA tests do not require a CUDA context, so this no-op + prevents ``cuInit`` / ``cuCtxCreate`` from running (and failing on + machines without a CUDA-capable GPU).""" + yield None diff --git a/cuda_bindings/tests/cudla/test_cudla_bindings.py b/cuda_bindings/tests/cudla/test_cudla_bindings.py new file mode 100644 index 00000000000..4caca695840 --- /dev/null +++ b/cuda_bindings/tests/cudla/test_cudla_bindings.py @@ -0,0 +1,305 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +import pytest + +cudla = pytest.importorskip("cuda.bindings.cudla") + + +def _cudla_library_available(): + """Check if the cuDLA shared library is loaded and usable.""" + try: + from cuda.bindings._internal import cudla as _inner + + return _inner._inspect_function_pointer("__cudlaGetVersion") != 0 + except Exception: + return False + + +requires_cudla_library = pytest.mark.skipif( + not _cudla_library_available(), + reason="cuDLA library not available (requires NVIDIA Orin with DLA)", +) + + +# --------------------------------------------------------------------------- +# Enum tests (always run -- no library needed) +# --------------------------------------------------------------------------- + + +class TestEnums: + def test_status_values(self): + assert cudla.Status.Success == 0 + assert cudla.Status.ErrorInvalidParam == 1 + assert cudla.Status.ErrorOutOfResources == 2 + assert cudla.Status.ErrorCreationFailed == 3 + assert cudla.Status.ErrorInvalidAddress == 4 + assert cudla.Status.ErrorOs == 5 + assert cudla.Status.ErrorCuda == 6 + assert cudla.Status.ErrorUmd == 7 + assert cudla.Status.ErrorInvalidDevice == 8 + assert cudla.Status.ErrorInvalidAttribute == 9 + assert cudla.Status.ErrorIncompatibleDlaSWVersion == 10 + assert cudla.Status.ErrorMemoryRegistered == 11 + assert cudla.Status.ErrorInvalidModule == 12 + assert cudla.Status.ErrorUnsupportedOperation == 13 + assert cudla.Status.ErrorNvSci == 14 + assert cudla.Status.ErrorDlaErrInvalidInput == 0x40000001 + assert cudla.Status.ErrorDlaErrInvalidPreAction == 0x40000002 + assert cudla.Status.ErrorDlaErrNoMem == 0x40000003 + assert cudla.Status.ErrorDlaErrProcessorBusy == 0x40000004 + assert cudla.Status.ErrorDlaErrTaskStatusMismatch == 0x40000005 + assert cudla.Status.ErrorDlaErrEngineTimeout == 0x40000006 + assert cudla.Status.ErrorDlaErrDataMismatch == 0x40000007 + assert cudla.Status.ErrorUnknown == 0x7FFFFFFF + + def test_status_member_count(self): + assert len(cudla.Status) == 23 + + def test_mode_values(self): + assert cudla.Mode.CUDA_DLA == 0 + assert cudla.Mode.STANDALONE == 1 + + def test_module_attribute_type_values(self): + assert cudla.ModuleAttributeType.NUM_INPUT_TENSORS == 0 + assert cudla.ModuleAttributeType.NUM_OUTPUT_TENSORS == 1 + assert cudla.ModuleAttributeType.INPUT_TENSOR_DESCRIPTORS == 2 + assert cudla.ModuleAttributeType.OUTPUT_TENSOR_DESCRIPTORS == 3 + assert cudla.ModuleAttributeType.NUM_OUTPUT_TASK_STATISTICS == 4 + assert cudla.ModuleAttributeType.OUTPUT_TASK_STATISTICS_DESCRIPTORS == 5 + + def test_fence_type_values(self): + assert cudla.FenceType.NVSCISYNC_FENCE == 1 + assert cudla.FenceType.NVSCISYNC_FENCE_SOF == 2 + + def test_module_load_flags(self): + assert cudla.ModuleLoadFlags.MODULE_DEFAULT == 0 + assert cudla.ModuleLoadFlags.MODULE_ENABLE_FAULT_DIAGNOSTICS == 1 + + def test_submission_flags(self): + assert cudla.SubmissionFlags.SUBMIT_NOOP == 1 + assert cudla.SubmissionFlags.SUBMIT_SKIP_LOCK_ACQUIRE == 2 + assert cudla.SubmissionFlags.SUBMIT_DIAGNOSTICS_TASK == 4 + + def test_access_permission_flags(self): + assert cudla.AccessPermissionFlags.READ_WRITE_PERM == 0 + assert cudla.AccessPermissionFlags.READ_ONLY_PERM == 1 + assert cudla.AccessPermissionFlags.TASK_STATISTICS == 2 + + def test_dev_attribute_type(self): + assert cudla.DevAttributeType.UNIFIED_ADDRESSING == 0 + assert cudla.DevAttributeType.DEVICE_VERSION == 1 + + +# --------------------------------------------------------------------------- +# POD type tests (always run -- no library needed) +# --------------------------------------------------------------------------- + + +class TestPodTypes: + def test_external_memory_handle_desc(self): + desc = cudla.ExternalMemoryHandleDesc() + desc.size_ = 4096 + assert desc.size_ == 4096 + desc.ext_buf_object = 0xABCD + assert desc.ext_buf_object == 0xABCD + + def test_external_semaphore_handle_desc(self): + desc = cudla.ExternalSemaphoreHandleDesc() + desc.ext_sync_object = 0x1234 + assert desc.ext_sync_object == 0x1234 + + def test_module_tensor_descriptor_fields(self): + desc = cudla.ModuleTensorDescriptor() + assert desc.size_ == 0 + assert desc.n == 0 + assert desc.c == 0 + assert desc.h == 0 + assert desc.w == 0 + assert desc.data_format == 0 + assert desc.data_type == 0 + assert desc.data_category == 0 + assert desc.pixel_format == 0 + assert desc.pixel_mapping == 0 + + def test_module_tensor_descriptor_name(self): + desc = cudla.ModuleTensorDescriptor() + name = desc.name + assert isinstance(name, (str, bytes)) + + def test_module_tensor_descriptor_stride(self): + desc = cudla.ModuleTensorDescriptor() + stride = desc.stride + assert len(stride) == 8 + + def test_fence(self): + fence = cudla.Fence() + fence.fence = 0xBEEF + assert fence.fence == 0xBEEF + fence.type = int(cudla.FenceType.NVSCISYNC_FENCE) + assert fence.type == 1 + + def test_dev_attribute(self): + attr = cudla.DevAttribute() + assert attr.unified_addressing_supported == 0 + assert attr.device_version == 0 + attr.unified_addressing_supported = 1 + assert attr.unified_addressing_supported == 1 + attr.device_version = 0x20 + assert attr.device_version == 0x20 + + def test_module_attribute(self): + attr = cudla.ModuleAttribute() + assert attr.num_input_tensors == 0 + assert attr.num_output_tensors == 0 + attr.num_input_tensors = 3 + assert attr.num_input_tensors == 3 + attr.num_output_tensors = 1 + assert attr.num_output_tensors == 1 + + def test_wait_events(self): + we = cudla.WaitEvents() + assert we.pre_fences == [] + + def test_signal_events(self): + se = cudla.SignalEvents() + assert se.eof_fences == [] + + def test_task_construction(self): + task = cudla.Task() + task.module_handle = 0xDEAD + assert task.module_handle == 0xDEAD + + def test_task_input_tensor_auto_size(self): + task = cudla.Task() + task.input_tensor = [0x1000, 0x2000, 0x3000] + assert len(task.input_tensor) == 3 + + def test_task_output_tensor_auto_size(self): + task = cudla.Task() + task.output_tensor = [0x4000] + assert len(task.output_tensor) == 1 + + def test_task_combined(self): + task = cudla.Task() + task.module_handle = 0xABCD + task.input_tensor = [0x1000, 0x2000] + task.output_tensor = [0x3000] + task.wait_events = 0 + task.signal_events = 0 + assert task.module_handle == 0xABCD + assert len(task.input_tensor) == 2 + assert len(task.output_tensor) == 1 + + def test_pod_ptr_is_nonzero(self): + """Verify that int(pod) returns a nonzero pointer (memory is allocated).""" + task = cudla.Task() + assert int(task) != 0 + desc = cudla.ModuleTensorDescriptor() + assert int(desc) != 0 + + +# --------------------------------------------------------------------------- +# Error type tests (always run -- no library needed) +# --------------------------------------------------------------------------- + + +class TestErrorType: + def test_cudla_error_is_exception(self): + assert issubclass(cudla.CudlaError, Exception) + + def test_cudla_error_stores_status(self): + err = cudla.CudlaError(int(cudla.Status.ErrorInvalidParam)) + assert err.status == int(cudla.Status.ErrorInvalidParam) + + def test_cudla_error_str(self): + err = cudla.CudlaError(int(cudla.Status.ErrorInvalidParam)) + assert "ErrorInvalidParam" in str(err) + + +# --------------------------------------------------------------------------- +# API surface tests (always run -- no library needed) +# --------------------------------------------------------------------------- + + +class TestApiSurface: + """Verify that all expected functions exist as callable attributes.""" + + @pytest.mark.parametrize( + "func_name", + [ + "get_version", + "device_get_count", + "create_device", + "destroy_device", + "mem_register", + "mem_unregister", + "module_load_from_memory", + "module_get_attributes", + "module_unload", + "submit_task", + "device_get_attribute", + "get_last_error", + "set_task_timeout_in_ms", + ], + ) + def test_function_exists(self, func_name): + assert callable(getattr(cudla, func_name)) + + +# --------------------------------------------------------------------------- +# Function tests (hardware-gated -- skipped when libcudla.so is unavailable) +# --------------------------------------------------------------------------- + + +@requires_cudla_library +class TestFunctions: + def test_get_version(self): + version = cudla.get_version() + assert version > 0 + + def test_device_get_count(self): + count = cudla.device_get_count() + assert count >= 0 + + def test_create_destroy_device(self): + from cuda.bindings import driver, runtime + + driver.cuInit(0) + runtime.cudaSetDevice(0) + + handle = cudla.create_device(0, int(cudla.Mode.CUDA_DLA)) + try: + assert handle != 0 + finally: + cudla.destroy_device(handle) + + def test_create_device_rejects_standalone(self): + with pytest.raises(cudla.CudlaError, match="ErrorUnsupportedOperation"): + cudla.create_device(0, int(cudla.Mode.STANDALONE)) + + def test_create_device_rejects_standalone_raw_int(self): + with pytest.raises(cudla.CudlaError, match="ErrorUnsupportedOperation"): + cudla.create_device(0, 1) + + def test_mem_register_unregister(self): + from cuda.bindings import driver, runtime + + driver.cuInit(0) + runtime.cudaSetDevice(0) + + dev_handle = cudla.create_device(0, int(cudla.Mode.CUDA_DLA)) + try: + buf_size = 1024 + err, gpu_ptr = runtime.cudaMalloc(buf_size) + assert err.value == 0, f"cudaMalloc failed: {err}" + try: + registered_ptr = cudla.mem_register( + dev_handle, int(gpu_ptr), buf_size, 0 + ) + assert registered_ptr != 0 + cudla.mem_unregister(dev_handle, registered_ptr) + finally: + runtime.cudaFree(gpu_ptr) + finally: + cudla.destroy_device(dev_handle) From dcd089e4ec0b04571b845e2f4fbbe205e82488ca Mon Sep 17 00:00:00 2001 From: Nikshay Shrivastava Date: Wed, 6 May 2026 07:29:34 -0700 Subject: [PATCH 2/4] fixup: ruff lint fixes --- cuda_bindings/tests/cudla/conftest.py | 2 +- cuda_bindings/tests/cudla/test_cudla_bindings.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/cuda_bindings/tests/cudla/conftest.py b/cuda_bindings/tests/cudla/conftest.py index 2d08058f735..c22097fce97 100644 --- a/cuda_bindings/tests/cudla/conftest.py +++ b/cuda_bindings/tests/cudla/conftest.py @@ -10,4 +10,4 @@ def ctx(): context. cuDLA tests do not require a CUDA context, so this no-op prevents ``cuInit`` / ``cuCtxCreate`` from running (and failing on machines without a CUDA-capable GPU).""" - yield None + return None diff --git a/cuda_bindings/tests/cudla/test_cudla_bindings.py b/cuda_bindings/tests/cudla/test_cudla_bindings.py index 4caca695840..4a756329d75 100644 --- a/cuda_bindings/tests/cudla/test_cudla_bindings.py +++ b/cuda_bindings/tests/cudla/test_cudla_bindings.py @@ -294,9 +294,7 @@ def test_mem_register_unregister(self): err, gpu_ptr = runtime.cudaMalloc(buf_size) assert err.value == 0, f"cudaMalloc failed: {err}" try: - registered_ptr = cudla.mem_register( - dev_handle, int(gpu_ptr), buf_size, 0 - ) + registered_ptr = cudla.mem_register(dev_handle, int(gpu_ptr), buf_size, 0) assert registered_ptr != 0 cudla.mem_unregister(dev_handle, registered_ptr) finally: From 285eeb59d548799dbdf341bf55695bd9ef2b6f55 Mon Sep 17 00:00:00 2001 From: Nikshay Shrivastava Date: Wed, 6 May 2026 11:11:37 -0700 Subject: [PATCH 3/4] Add SPDX license headers to cuDLA binding files --- cuda_bindings/cuda/bindings/_internal/cudla.pxd | 4 ++++ cuda_bindings/cuda/bindings/_internal/cudla_linux.pyx | 4 ++++ cuda_bindings/cuda/bindings/_internal/cudla_windows.pyx | 4 ++++ cuda_bindings/cuda/bindings/cudla.pxd | 4 ++++ cuda_bindings/cuda/bindings/cudla.pyx | 7 ++++--- cuda_bindings/cuda/bindings/cycudla.pxd | 7 ++++++- cuda_bindings/cuda/bindings/cycudla.pyx | 4 ++++ 7 files changed, 30 insertions(+), 4 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/cudla.pxd b/cuda_bindings/cuda/bindings/_internal/cudla.pxd index 57fef6a7323..2cbdc706f5a 100644 --- a/cuda_bindings/cuda/bindings/_internal/cudla.pxd +++ b/cuda_bindings/cuda/bindings/_internal/cudla.pxd @@ -1,3 +1,7 @@ +# Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# # This code was automatically generated with version 1.5.0, generator version 0.3.1.dev1465+gc5c5c8652. Do not modify it directly. from ..cycudla cimport * diff --git a/cuda_bindings/cuda/bindings/_internal/cudla_linux.pyx b/cuda_bindings/cuda/bindings/_internal/cudla_linux.pyx index 6a0f4f2984b..10202ec3e36 100644 --- a/cuda_bindings/cuda/bindings/_internal/cudla_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/cudla_linux.pyx @@ -1,3 +1,7 @@ +# Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# # This code was automatically generated with version 1.5.0, generator version 0.3.1.dev1465+gc5c5c8652. Do not modify it directly. from libc.stdint cimport intptr_t, uintptr_t diff --git a/cuda_bindings/cuda/bindings/_internal/cudla_windows.pyx b/cuda_bindings/cuda/bindings/_internal/cudla_windows.pyx index 2cddfd6d19e..5baf53efb45 100644 --- a/cuda_bindings/cuda/bindings/_internal/cudla_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/cudla_windows.pyx @@ -1,3 +1,7 @@ +# Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# # This code was automatically generated with version 1.5.0, generator version 0.3.1.dev1465+gc5c5c8652. Do not modify it directly. from libc.stdint cimport intptr_t diff --git a/cuda_bindings/cuda/bindings/cudla.pxd b/cuda_bindings/cuda/bindings/cudla.pxd index 09b55c579b6..484e17bd50b 100644 --- a/cuda_bindings/cuda/bindings/cudla.pxd +++ b/cuda_bindings/cuda/bindings/cudla.pxd @@ -1,3 +1,7 @@ +# Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# # This code was automatically generated with version 1.5.0, generator version 0.3.1.dev1465+gc5c5c8652. Do not modify it directly. from libc.stdint cimport intptr_t diff --git a/cuda_bindings/cuda/bindings/cudla.pyx b/cuda_bindings/cuda/bindings/cudla.pyx index c11b2494e1b..1ade8672599 100644 --- a/cuda_bindings/cuda/bindings/cudla.pyx +++ b/cuda_bindings/cuda/bindings/cudla.pyx @@ -1,3 +1,7 @@ +# Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# # This code was automatically generated with version 1.5.0, generator version 0.3.1.dev1465+gc5c5c8652. Do not modify it directly. cimport cython # NOQA @@ -761,7 +765,6 @@ dev_attribute_dtype = _numpy.dtype(( } )) - cdef class DevAttribute: """Empty-initialize an instance of `cudlaDevAttribute`. @@ -901,7 +904,6 @@ module_attribute_dtype = _numpy.dtype(( } )) - cdef class ModuleAttribute: """Empty-initialize an instance of `cudlaModuleAttribute`. @@ -1571,7 +1573,6 @@ cdef class Task: return obj - ############################################################################### # Enum ############################################################################### diff --git a/cuda_bindings/cuda/bindings/cycudla.pxd b/cuda_bindings/cuda/bindings/cycudla.pxd index 42578710de9..50410519246 100644 --- a/cuda_bindings/cuda/bindings/cycudla.pxd +++ b/cuda_bindings/cuda/bindings/cycudla.pxd @@ -1,3 +1,7 @@ +# Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# # This code was automatically generated with version 1.5.0, generator version 0.3.1.dev1465+gc5c5c8652. Do not modify it directly. # This layer exposes the C header to Cython as-is. @@ -76,7 +80,9 @@ ctypedef enum cudlaDevAttributeType "cudlaDevAttributeType": # types ctypedef void* cudlaDevHandle 'cudlaDevHandle' + ctypedef void* cudlaModule 'cudlaModule' + ctypedef struct cudlaExternalMemoryHandleDesc_t 'cudlaExternalMemoryHandleDesc_t': void* extBufObject unsigned long long size @@ -130,7 +136,6 @@ ctypedef struct cudlaTask 'cudlaTask': cudlaWaitEvents* waitEvents cudlaSignalEvents* signalEvents - # Typedef aliases for struct types (struct has _t, typedef doesn't) ctypedef cudlaExternalMemoryHandleDesc_t cudlaExternalMemoryHandleDesc 'cudlaExternalMemoryHandleDesc' ctypedef cudlaExternalSemaphoreHandleDesc_t cudlaExternalSemaphoreHandleDesc 'cudlaExternalSemaphoreHandleDesc' diff --git a/cuda_bindings/cuda/bindings/cycudla.pyx b/cuda_bindings/cuda/bindings/cycudla.pyx index d64e28f847d..7fd88c17648 100644 --- a/cuda_bindings/cuda/bindings/cycudla.pyx +++ b/cuda_bindings/cuda/bindings/cycudla.pyx @@ -1,3 +1,7 @@ +# Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +# # This code was automatically generated with version 1.5.0, generator version 0.3.1.dev1465+gc5c5c8652. Do not modify it directly. from ._internal cimport cudla as _cudla From 04de604cbc386a98ca6fe5efdd4dd11bfe6ff543 Mon Sep 17 00:00:00 2001 From: Nikshay Shrivastava Date: Wed, 6 May 2026 11:27:05 -0700 Subject: [PATCH 4/4] fixed SPDX license headers format --- cuda_bindings/cuda/bindings/_internal/cudla.pxd | 5 ++--- cuda_bindings/cuda/bindings/_internal/cudla_linux.pyx | 5 ++--- cuda_bindings/cuda/bindings/_internal/cudla_windows.pyx | 5 ++--- cuda_bindings/cuda/bindings/cudla.pxd | 5 ++--- cuda_bindings/cuda/bindings/cudla.pyx | 5 ++--- cuda_bindings/cuda/bindings/cycudla.pxd | 5 ++--- cuda_bindings/cuda/bindings/cycudla.pyx | 5 ++--- 7 files changed, 14 insertions(+), 21 deletions(-) diff --git a/cuda_bindings/cuda/bindings/_internal/cudla.pxd b/cuda_bindings/cuda/bindings/_internal/cudla.pxd index 2cbdc706f5a..beca59f3e6b 100644 --- a/cuda_bindings/cuda/bindings/_internal/cudla.pxd +++ b/cuda_bindings/cuda/bindings/_internal/cudla.pxd @@ -1,7 +1,6 @@ -# Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. -# +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -# + # This code was automatically generated with version 1.5.0, generator version 0.3.1.dev1465+gc5c5c8652. Do not modify it directly. from ..cycudla cimport * diff --git a/cuda_bindings/cuda/bindings/_internal/cudla_linux.pyx b/cuda_bindings/cuda/bindings/_internal/cudla_linux.pyx index 10202ec3e36..ccc53f32ca8 100644 --- a/cuda_bindings/cuda/bindings/_internal/cudla_linux.pyx +++ b/cuda_bindings/cuda/bindings/_internal/cudla_linux.pyx @@ -1,7 +1,6 @@ -# Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. -# +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -# + # This code was automatically generated with version 1.5.0, generator version 0.3.1.dev1465+gc5c5c8652. Do not modify it directly. from libc.stdint cimport intptr_t, uintptr_t diff --git a/cuda_bindings/cuda/bindings/_internal/cudla_windows.pyx b/cuda_bindings/cuda/bindings/_internal/cudla_windows.pyx index 5baf53efb45..2d91c16ee99 100644 --- a/cuda_bindings/cuda/bindings/_internal/cudla_windows.pyx +++ b/cuda_bindings/cuda/bindings/_internal/cudla_windows.pyx @@ -1,7 +1,6 @@ -# Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. -# +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -# + # This code was automatically generated with version 1.5.0, generator version 0.3.1.dev1465+gc5c5c8652. Do not modify it directly. from libc.stdint cimport intptr_t diff --git a/cuda_bindings/cuda/bindings/cudla.pxd b/cuda_bindings/cuda/bindings/cudla.pxd index 484e17bd50b..786622ac9a9 100644 --- a/cuda_bindings/cuda/bindings/cudla.pxd +++ b/cuda_bindings/cuda/bindings/cudla.pxd @@ -1,7 +1,6 @@ -# Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. -# +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -# + # This code was automatically generated with version 1.5.0, generator version 0.3.1.dev1465+gc5c5c8652. Do not modify it directly. from libc.stdint cimport intptr_t diff --git a/cuda_bindings/cuda/bindings/cudla.pyx b/cuda_bindings/cuda/bindings/cudla.pyx index 1ade8672599..ff7569b9b7a 100644 --- a/cuda_bindings/cuda/bindings/cudla.pyx +++ b/cuda_bindings/cuda/bindings/cudla.pyx @@ -1,7 +1,6 @@ -# Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. -# +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -# + # This code was automatically generated with version 1.5.0, generator version 0.3.1.dev1465+gc5c5c8652. Do not modify it directly. cimport cython # NOQA diff --git a/cuda_bindings/cuda/bindings/cycudla.pxd b/cuda_bindings/cuda/bindings/cycudla.pxd index 50410519246..5bcbe623469 100644 --- a/cuda_bindings/cuda/bindings/cycudla.pxd +++ b/cuda_bindings/cuda/bindings/cycudla.pxd @@ -1,7 +1,6 @@ -# Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. -# +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -# + # This code was automatically generated with version 1.5.0, generator version 0.3.1.dev1465+gc5c5c8652. Do not modify it directly. # This layer exposes the C header to Cython as-is. diff --git a/cuda_bindings/cuda/bindings/cycudla.pyx b/cuda_bindings/cuda/bindings/cycudla.pyx index 7fd88c17648..8d0cbdc5111 100644 --- a/cuda_bindings/cuda/bindings/cycudla.pyx +++ b/cuda_bindings/cuda/bindings/cycudla.pyx @@ -1,7 +1,6 @@ -# Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. -# +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -# + # This code was automatically generated with version 1.5.0, generator version 0.3.1.dev1465+gc5c5c8652. Do not modify it directly. from ._internal cimport cudla as _cudla