diff --git a/cuda_bindings/cuda/bindings/driver.pyx.in b/cuda_bindings/cuda/bindings/driver.pyx.in index d97fba967a..7a132b5769 100644 --- a/cuda_bindings/cuda/bindings/driver.pyx.in +++ b/cuda_bindings/cuda/bindings/driver.pyx.in @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -# This code was automatically generated with version 13.2.0, generator version 0.3.1.dev1568+g289771de9.d20260413. Do not modify it directly. +# This code was automatically generated with version 13.2.0, generator version 0.3.1.dev1630+gadce055ea.d20260422. Do not modify it directly. from typing import Any, Optional import cython import ctypes @@ -40589,9 +40589,13 @@ def cuStreamGetCaptureInfo(hStream): with nogil: err = cydriver.cuStreamGetCaptureInfo(cyhStream, &captureStatus_out, id_out._pvt_ptr, graph_out._pvt_ptr, &cydependencies_out, &cyedgeData_out, &numDependencies_out) if CUresult(err) == CUresult(0): - pydependencies_out = [CUgraphNode(init_value=cydependencies_out[idx]) for idx in range(numDependencies_out)] + pydependencies_out = [CUgraphNode() for _ in range(numDependencies_out)] + for idx in range(numDependencies_out): + string.memcpy((pydependencies_out[idx])._pvt_ptr, &cydependencies_out[idx], sizeof(cydriver.CUgraphNode)) if CUresult(err) == CUresult(0): - pyedgeData_out = [CUgraphEdgeData(_ptr=&cyedgeData_out[idx]) for idx in range(numDependencies_out)] + pyedgeData_out = [CUgraphEdgeData() for _ in range(numDependencies_out)] + for idx in range(numDependencies_out): + string.memcpy((pyedgeData_out[idx])._pvt_ptr, &cyedgeData_out[idx], sizeof(cydriver.CUgraphEdgeData)) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None, None, None, None, None, None) return (_CUresult_SUCCESS, CUstreamCaptureStatus(captureStatus_out), id_out, graph_out, pydependencies_out, pyedgeData_out, numDependencies_out) @@ -47195,7 +47199,9 @@ def cuGraphGetNodes(hGraph, size_t numNodes = 0): with nogil: err = cydriver.cuGraphGetNodes(cyhGraph, cynodes, &numNodes) if CUresult(err) == CUresult(0): - pynodes = [CUgraphNode(init_value=cynodes[idx]) for idx in range(_graph_length)] + pynodes = [CUgraphNode() for _ in range(_graph_length)] + for idx in range(_graph_length): + string.memcpy((pynodes[idx])._pvt_ptr, &cynodes[idx], sizeof(cydriver.CUgraphNode)) if cynodes is not NULL: free(cynodes) if err != cydriver.CUDA_SUCCESS: @@ -47254,7 +47260,9 @@ def cuGraphGetRootNodes(hGraph, size_t numRootNodes = 0): with nogil: err = cydriver.cuGraphGetRootNodes(cyhGraph, cyrootNodes, &numRootNodes) if CUresult(err) == CUresult(0): - pyrootNodes = [CUgraphNode(init_value=cyrootNodes[idx]) for idx in range(_graph_length)] + pyrootNodes = [CUgraphNode() for _ in range(_graph_length)] + for idx in range(_graph_length): + string.memcpy((pyrootNodes[idx])._pvt_ptr, &cyrootNodes[idx], sizeof(cydriver.CUgraphNode)) if cyrootNodes is not NULL: free(cyrootNodes) if err != cydriver.CUDA_SUCCESS: @@ -47336,15 +47344,21 @@ def cuGraphGetEdges(hGraph, size_t numEdges = 0): with nogil: err = cydriver.cuGraphGetEdges(cyhGraph, cyfrom_, cyto, cyedgeData, &numEdges) if CUresult(err) == CUresult(0): - pyfrom_ = [CUgraphNode(init_value=cyfrom_[idx]) for idx in range(_graph_length)] + pyfrom_ = [CUgraphNode() for _ in range(_graph_length)] + for idx in range(_graph_length): + string.memcpy((pyfrom_[idx])._pvt_ptr, &cyfrom_[idx], sizeof(cydriver.CUgraphNode)) if cyfrom_ is not NULL: free(cyfrom_) if CUresult(err) == CUresult(0): - pyto = [CUgraphNode(init_value=cyto[idx]) for idx in range(_graph_length)] + pyto = [CUgraphNode() for _ in range(_graph_length)] + for idx in range(_graph_length): + string.memcpy((pyto[idx])._pvt_ptr, &cyto[idx], sizeof(cydriver.CUgraphNode)) if cyto is not NULL: free(cyto) if CUresult(err) == CUresult(0): - pyedgeData = [CUgraphEdgeData(_ptr=&cyedgeData[idx]) for idx in range(_graph_length)] + pyedgeData = [CUgraphEdgeData() for _ in range(_graph_length)] + for idx in range(_graph_length): + string.memcpy((pyedgeData[idx])._pvt_ptr, &cyedgeData[idx], sizeof(cydriver.CUgraphEdgeData)) if cyedgeData is not NULL: free(cyedgeData) if err != cydriver.CUDA_SUCCESS: @@ -47417,11 +47431,15 @@ def cuGraphNodeGetDependencies(hNode, size_t numDependencies = 0): with nogil: err = cydriver.cuGraphNodeGetDependencies(cyhNode, cydependencies, cyedgeData, &numDependencies) if CUresult(err) == CUresult(0): - pydependencies = [CUgraphNode(init_value=cydependencies[idx]) for idx in range(_graph_length)] + pydependencies = [CUgraphNode() for _ in range(_graph_length)] + for idx in range(_graph_length): + string.memcpy((pydependencies[idx])._pvt_ptr, &cydependencies[idx], sizeof(cydriver.CUgraphNode)) if cydependencies is not NULL: free(cydependencies) if CUresult(err) == CUresult(0): - pyedgeData = [CUgraphEdgeData(_ptr=&cyedgeData[idx]) for idx in range(_graph_length)] + pyedgeData = [CUgraphEdgeData() for _ in range(_graph_length)] + for idx in range(_graph_length): + string.memcpy((pyedgeData[idx])._pvt_ptr, &cyedgeData[idx], sizeof(cydriver.CUgraphEdgeData)) if cyedgeData is not NULL: free(cyedgeData) if err != cydriver.CUDA_SUCCESS: @@ -47494,11 +47512,15 @@ def cuGraphNodeGetDependentNodes(hNode, size_t numDependentNodes = 0): with nogil: err = cydriver.cuGraphNodeGetDependentNodes(cyhNode, cydependentNodes, cyedgeData, &numDependentNodes) if CUresult(err) == CUresult(0): - pydependentNodes = [CUgraphNode(init_value=cydependentNodes[idx]) for idx in range(_graph_length)] + pydependentNodes = [CUgraphNode() for _ in range(_graph_length)] + for idx in range(_graph_length): + string.memcpy((pydependentNodes[idx])._pvt_ptr, &cydependentNodes[idx], sizeof(cydriver.CUgraphNode)) if cydependentNodes is not NULL: free(cydependentNodes) if CUresult(err) == CUresult(0): - pyedgeData = [CUgraphEdgeData(_ptr=&cyedgeData[idx]) for idx in range(_graph_length)] + pyedgeData = [CUgraphEdgeData() for _ in range(_graph_length)] + for idx in range(_graph_length): + string.memcpy((pyedgeData[idx])._pvt_ptr, &cyedgeData[idx], sizeof(cydriver.CUgraphEdgeData)) if cyedgeData is not NULL: free(cyedgeData) if err != cydriver.CUDA_SUCCESS: diff --git a/cuda_bindings/cuda/bindings/runtime.pyx.in b/cuda_bindings/cuda/bindings/runtime.pyx.in index 31e29c4dc8..894c9c0965 100644 --- a/cuda_bindings/cuda/bindings/runtime.pyx.in +++ b/cuda_bindings/cuda/bindings/runtime.pyx.in @@ -24290,9 +24290,13 @@ def cudaStreamGetCaptureInfo(stream): with nogil: err = cyruntime.cudaStreamGetCaptureInfo(cystream, &captureStatus_out, &id_out, graph_out._pvt_ptr, &cydependencies_out, &cyedgeData_out, &numDependencies_out) if cudaError_t(err) == cudaError_t(0): - pydependencies_out = [cudaGraphNode_t(init_value=cydependencies_out[idx]) for idx in range(numDependencies_out)] + pydependencies_out = [cudaGraphNode_t() for _ in range(numDependencies_out)] + for idx in range(numDependencies_out): + string.memcpy((pydependencies_out[idx])._pvt_ptr, &cydependencies_out[idx], sizeof(cyruntime.cudaGraphNode_t)) if cudaError_t(err) == cudaError_t(0): - pyedgeData_out = [cudaGraphEdgeData(_ptr=&cyedgeData_out[idx]) for idx in range(numDependencies_out)] + pyedgeData_out = [cudaGraphEdgeData() for _ in range(numDependencies_out)] + for idx in range(numDependencies_out): + string.memcpy((pyedgeData_out[idx])._pvt_ptr, &cyedgeData_out[idx], sizeof(cyruntime.cudaGraphEdgeData)) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None, None, None, None, None, None) return (_cudaError_t_SUCCESS, cudaStreamCaptureStatus(captureStatus_out), id_out, graph_out, pydependencies_out, pyedgeData_out, numDependencies_out) @@ -35805,7 +35809,9 @@ def cudaGraphGetNodes(graph, size_t numNodes = 0): with nogil: err = cyruntime.cudaGraphGetNodes(cygraph, cynodes, &numNodes) if cudaError_t(err) == cudaError_t(0): - pynodes = [cudaGraphNode_t(init_value=cynodes[idx]) for idx in range(_graph_length)] + pynodes = [cudaGraphNode_t() for _ in range(_graph_length)] + for idx in range(_graph_length): + string.memcpy((pynodes[idx])._pvt_ptr, &cynodes[idx], sizeof(cyruntime.cudaGraphNode_t)) if cynodes is not NULL: free(cynodes) if err != cyruntime.cudaSuccess: @@ -35864,7 +35870,9 @@ def cudaGraphGetRootNodes(graph, size_t pNumRootNodes = 0): with nogil: err = cyruntime.cudaGraphGetRootNodes(cygraph, cypRootNodes, &pNumRootNodes) if cudaError_t(err) == cudaError_t(0): - pypRootNodes = [cudaGraphNode_t(init_value=cypRootNodes[idx]) for idx in range(_graph_length)] + pypRootNodes = [cudaGraphNode_t() for _ in range(_graph_length)] + for idx in range(_graph_length): + string.memcpy((pypRootNodes[idx])._pvt_ptr, &cypRootNodes[idx], sizeof(cyruntime.cudaGraphNode_t)) if cypRootNodes is not NULL: free(cypRootNodes) if err != cyruntime.cudaSuccess: @@ -35946,15 +35954,21 @@ def cudaGraphGetEdges(graph, size_t numEdges = 0): with nogil: err = cyruntime.cudaGraphGetEdges(cygraph, cyfrom_, cyto, cyedgeData, &numEdges) if cudaError_t(err) == cudaError_t(0): - pyfrom_ = [cudaGraphNode_t(init_value=cyfrom_[idx]) for idx in range(_graph_length)] + pyfrom_ = [cudaGraphNode_t() for _ in range(_graph_length)] + for idx in range(_graph_length): + string.memcpy((pyfrom_[idx])._pvt_ptr, &cyfrom_[idx], sizeof(cyruntime.cudaGraphNode_t)) if cyfrom_ is not NULL: free(cyfrom_) if cudaError_t(err) == cudaError_t(0): - pyto = [cudaGraphNode_t(init_value=cyto[idx]) for idx in range(_graph_length)] + pyto = [cudaGraphNode_t() for _ in range(_graph_length)] + for idx in range(_graph_length): + string.memcpy((pyto[idx])._pvt_ptr, &cyto[idx], sizeof(cyruntime.cudaGraphNode_t)) if cyto is not NULL: free(cyto) if cudaError_t(err) == cudaError_t(0): - pyedgeData = [cudaGraphEdgeData(_ptr=&cyedgeData[idx]) for idx in range(_graph_length)] + pyedgeData = [cudaGraphEdgeData() for _ in range(_graph_length)] + for idx in range(_graph_length): + string.memcpy((pyedgeData[idx])._pvt_ptr, &cyedgeData[idx], sizeof(cyruntime.cudaGraphEdgeData)) if cyedgeData is not NULL: free(cyedgeData) if err != cyruntime.cudaSuccess: @@ -36027,11 +36041,15 @@ def cudaGraphNodeGetDependencies(node, size_t pNumDependencies = 0): with nogil: err = cyruntime.cudaGraphNodeGetDependencies(cynode, cypDependencies, cyedgeData, &pNumDependencies) if cudaError_t(err) == cudaError_t(0): - pypDependencies = [cudaGraphNode_t(init_value=cypDependencies[idx]) for idx in range(_graph_length)] + pypDependencies = [cudaGraphNode_t() for _ in range(_graph_length)] + for idx in range(_graph_length): + string.memcpy((pypDependencies[idx])._pvt_ptr, &cypDependencies[idx], sizeof(cyruntime.cudaGraphNode_t)) if cypDependencies is not NULL: free(cypDependencies) if cudaError_t(err) == cudaError_t(0): - pyedgeData = [cudaGraphEdgeData(_ptr=&cyedgeData[idx]) for idx in range(_graph_length)] + pyedgeData = [cudaGraphEdgeData() for _ in range(_graph_length)] + for idx in range(_graph_length): + string.memcpy((pyedgeData[idx])._pvt_ptr, &cyedgeData[idx], sizeof(cyruntime.cudaGraphEdgeData)) if cyedgeData is not NULL: free(cyedgeData) if err != cyruntime.cudaSuccess: @@ -36104,11 +36122,15 @@ def cudaGraphNodeGetDependentNodes(node, size_t pNumDependentNodes = 0): with nogil: err = cyruntime.cudaGraphNodeGetDependentNodes(cynode, cypDependentNodes, cyedgeData, &pNumDependentNodes) if cudaError_t(err) == cudaError_t(0): - pypDependentNodes = [cudaGraphNode_t(init_value=cypDependentNodes[idx]) for idx in range(_graph_length)] + pypDependentNodes = [cudaGraphNode_t() for _ in range(_graph_length)] + for idx in range(_graph_length): + string.memcpy((pypDependentNodes[idx])._pvt_ptr, &cypDependentNodes[idx], sizeof(cyruntime.cudaGraphNode_t)) if cypDependentNodes is not NULL: free(cypDependentNodes) if cudaError_t(err) == cudaError_t(0): - pyedgeData = [cudaGraphEdgeData(_ptr=&cyedgeData[idx]) for idx in range(_graph_length)] + pyedgeData = [cudaGraphEdgeData() for _ in range(_graph_length)] + for idx in range(_graph_length): + string.memcpy((pyedgeData[idx])._pvt_ptr, &cyedgeData[idx], sizeof(cyruntime.cudaGraphEdgeData)) if cyedgeData is not NULL: free(cyedgeData) if err != cyruntime.cudaSuccess: diff --git a/cuda_bindings/docs/source/release/13.3.0-notes.rst b/cuda_bindings/docs/source/release/13.3.0-notes.rst new file mode 100644 index 0000000000..3189654880 --- /dev/null +++ b/cuda_bindings/docs/source/release/13.3.0-notes.rst @@ -0,0 +1,34 @@ +.. SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +.. SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE + +.. module:: cuda.bindings + +``cuda-bindings`` 13.3.0 Release notes +====================================== + +Highlights +---------- + + +Bugfixes +-------- + +* Fixed a use-after-free in ``cudaGraphGetEdges``, ``cudaGraphNodeGetDependencies``, + ``cudaGraphNodeGetDependentNodes``, ``cudaStreamGetCaptureInfo``, and their + driver-API counterparts (``cuGraphGetEdges``, ``cuGraphNodeGetDependencies``, + ``cuGraphNodeGetDependentNodes``, ``cuStreamGetCaptureInfo``). The returned + ``cudaGraphEdgeData``/``CUgraphEdgeData`` wrappers were backed by a scratch + buffer that was freed before the call returned, leaving every wrapper holding + a dangling pointer. The returned wrappers now own deep copies of the edge + data. + (`Issue #1804 `_) + + +Miscellaneous +------------- + + +Known issues +------------ + +* Updating from older versions (v12.6.2.post1 and below) via ``pip install -U cuda-python`` might not work. Please do a clean re-installation by uninstalling ``pip uninstall -y cuda-python`` followed by installing ``pip install cuda-python``. diff --git a/cuda_bindings/tests/test_cuda.py b/cuda_bindings/tests/test_cuda.py index 91fbe93827..9a793be880 100644 --- a/cuda_bindings/tests/test_cuda.py +++ b/cuda_bindings/tests/test_cuda.py @@ -964,6 +964,87 @@ def test_cuGraphExecGetId(device, ctx): assert err == cuda.CUresult.CUDA_SUCCESS +def test_cuGraphGetEdges_edgeData_outlives_call(device, ctx): + # Regression test for https://github.com/NVIDIA/cuda-python/issues/1804 + # cuGraphGetEdges previously returned CUgraphEdgeData wrappers backed by + # a scratch buffer that was freed before the call returned, leaving the + # wrappers pointing at freed memory. Ensure the returned objects remain + # readable after the call and after subsequent allocations. + err, graph = cuda.cuGraphCreate(0) + assert err == cuda.CUresult.CUDA_SUCCESS + try: + err, n0 = cuda.cuGraphAddEmptyNode(graph, None, 0) + assert err == cuda.CUresult.CUDA_SUCCESS + err, n1 = cuda.cuGraphAddEmptyNode(graph, [n0], 1) + assert err == cuda.CUresult.CUDA_SUCCESS + err, n2 = cuda.cuGraphAddEmptyNode(graph, [n0, n1], 2) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, _, _, _, num_edges = cuda.cuGraphGetEdges(graph) + assert err == cuda.CUresult.CUDA_SUCCESS + assert num_edges == 3 + err, from_nodes, to_nodes, edge_data, num_edges = cuda.cuGraphGetEdges(graph, num_edges) + assert err == cuda.CUresult.CUDA_SUCCESS + assert len(edge_data) == num_edges == 3 + + # Stir the heap to make a use-after-free more likely to surface. + for _ in range(64): + err, _, _, _, _ = cuda.cuGraphGetEdges(graph, num_edges) + assert err == cuda.CUresult.CUDA_SUCCESS + err, _, _, _ = cuda.cuGraphNodeGetDependencies(n1, 1) + assert err == cuda.CUresult.CUDA_SUCCESS + + # Each wrapper must still own its data. + for ed in edge_data: + assert ed.from_port == 0 + assert ed.to_port == 0 + assert int(ed.type) == 0 + assert ed.reserved == b"\x00" * 5 + finally: + (err,) = cuda.cuGraphDestroy(graph) + assert err == cuda.CUresult.CUDA_SUCCESS + + +def test_cuGraphNodeGetDependencies_edgeData_outlives_call(device, ctx): + # Companion regression test for #1804 covering the dependency-query path. + err, graph = cuda.cuGraphCreate(0) + assert err == cuda.CUresult.CUDA_SUCCESS + try: + err, n0 = cuda.cuGraphAddEmptyNode(graph, None, 0) + assert err == cuda.CUresult.CUDA_SUCCESS + err, n1 = cuda.cuGraphAddEmptyNode(graph, [n0], 1) + assert err == cuda.CUresult.CUDA_SUCCESS + + err, _, _, num_deps = cuda.cuGraphNodeGetDependencies(n1) + assert err == cuda.CUresult.CUDA_SUCCESS + assert num_deps == 1 + err, deps, edge_data, num_deps = cuda.cuGraphNodeGetDependencies(n1, num_deps) + assert err == cuda.CUresult.CUDA_SUCCESS + assert len(edge_data) == num_deps == 1 + + err, _, _, num_dependents = cuda.cuGraphNodeGetDependentNodes(n0) + assert err == cuda.CUresult.CUDA_SUCCESS + assert num_dependents == 1 + err, dependents, dep_edge_data, num_dependents = cuda.cuGraphNodeGetDependentNodes(n0, num_dependents) + assert err == cuda.CUresult.CUDA_SUCCESS + assert len(dep_edge_data) == num_dependents == 1 + + for _ in range(64): + err, _, _, _ = cuda.cuGraphNodeGetDependencies(n1, num_deps) + assert err == cuda.CUresult.CUDA_SUCCESS + err, _, _, _ = cuda.cuGraphNodeGetDependentNodes(n0, num_dependents) + assert err == cuda.CUresult.CUDA_SUCCESS + + for ed in edge_data + dep_edge_data: + assert ed.from_port == 0 + assert ed.to_port == 0 + assert int(ed.type) == 0 + assert ed.reserved == b"\x00" * 5 + finally: + (err,) = cuda.cuGraphDestroy(graph) + assert err == cuda.CUresult.CUDA_SUCCESS + + @pytest.mark.skipif( driverVersionLessThan(13010) or not supportsCudaAPI("cuGraphNodeGetLocalId"), reason="Requires CUDA 13.1+", diff --git a/cuda_bindings/tests/test_cudart.py b/cuda_bindings/tests/test_cudart.py index 144d7e75b1..91f7797fd4 100644 --- a/cuda_bindings/tests/test_cudart.py +++ b/cuda_bindings/tests/test_cudart.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE import ctypes @@ -257,6 +257,89 @@ def test_cudart_graphs(): assertSuccess(err) +def test_cudart_cudaGraphGetEdges_edgeData_outlives_call(): + # Regression test for https://github.com/NVIDIA/cuda-python/issues/1804 + # cudaGraphGetEdges previously returned cudaGraphEdgeData wrappers backed + # by a scratch buffer that was freed before the call returned, leaving + # the wrappers pointing at freed memory. Ensure the returned objects + # remain readable after the call and after subsequent allocations. + err, graph = cudart.cudaGraphCreate(0) + assertSuccess(err) + try: + err, n0 = cudart.cudaGraphAddEmptyNode(graph, None, 0) + assertSuccess(err) + err, n1 = cudart.cudaGraphAddEmptyNode(graph, [n0], 1) + assertSuccess(err) + err, n2 = cudart.cudaGraphAddEmptyNode(graph, [n0, n1], 2) + assertSuccess(err) + + err, _, _, _, num_edges = cudart.cudaGraphGetEdges(graph) + assertSuccess(err) + assert num_edges == 3 + err, from_nodes, to_nodes, edge_data, num_edges = cudart.cudaGraphGetEdges(graph, num_edges) + assertSuccess(err) + assert len(edge_data) == num_edges == 3 + + # Stir the heap to make a use-after-free more likely to surface + # by reallocating the same-sized scratch buffer many times. + for _ in range(64): + err, _, _, _, _ = cudart.cudaGraphGetEdges(graph, num_edges) + assertSuccess(err) + + # Each wrapper must still own its data. Default-edge values are zero; + # if the wrapper were holding a dangling pointer, attribute access + # would be undefined behavior. We at minimum require it to not crash + # and to report the documented defaults. + for ed in edge_data: + assert ed.from_port == 0 + assert ed.to_port == 0 + assert int(ed.type) == 0 + assert ed.reserved == b"\x00" * 5 + finally: + (err,) = cudart.cudaGraphDestroy(graph) + assertSuccess(err) + + +def test_cudart_cudaGraphNodeGetDependencies_edgeData_outlives_call(): + # Companion regression test for #1804 covering the dependency-query path. + err, graph = cudart.cudaGraphCreate(0) + assertSuccess(err) + try: + err, n0 = cudart.cudaGraphAddEmptyNode(graph, None, 0) + assertSuccess(err) + err, n1 = cudart.cudaGraphAddEmptyNode(graph, [n0], 1) + assertSuccess(err) + + err, _, _, num_deps = cudart.cudaGraphNodeGetDependencies(n1) + assertSuccess(err) + assert num_deps == 1 + err, deps, edge_data, num_deps = cudart.cudaGraphNodeGetDependencies(n1, num_deps) + assertSuccess(err) + assert len(edge_data) == num_deps == 1 + + err, _, _, num_dependents = cudart.cudaGraphNodeGetDependentNodes(n0) + assertSuccess(err) + assert num_dependents == 1 + err, dependents, dep_edge_data, num_dependents = cudart.cudaGraphNodeGetDependentNodes(n0, num_dependents) + assertSuccess(err) + assert len(dep_edge_data) == num_dependents == 1 + + for _ in range(64): + err, _, _, _ = cudart.cudaGraphNodeGetDependencies(n1, num_deps) + assertSuccess(err) + err, _, _, _ = cudart.cudaGraphNodeGetDependentNodes(n0, num_dependents) + assertSuccess(err) + + for ed in edge_data + dep_edge_data: + assert ed.from_port == 0 + assert ed.to_port == 0 + assert int(ed.type) == 0 + assert ed.reserved == b"\x00" * 5 + finally: + (err,) = cudart.cudaGraphDestroy(graph) + assertSuccess(err) + + def test_cudart_list_access(): err, prop = cudart.cudaGetDeviceProperties(0) prop.name = prop.name + b" " * (256 - len(prop.name))