Skip to content
Open
6 changes: 4 additions & 2 deletions cuda_core/cuda/core/_memory/_device_memory_resource.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

Expand All @@ -7,7 +7,9 @@ from cuda.core._memory._ipc cimport IPCDataForMR


cdef class DeviceMemoryResource(_MemPool):
pass
cdef:
int _dev_id
object _peer_accessible_by


cpdef DMR_mempool_get_access(DeviceMemoryResource, int)
144 changes: 121 additions & 23 deletions cuda_core/cuda/core/_memory/_device_memory_resource.pyx
Original file line number Diff line number Diff line change
@@ -1,25 +1,31 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

from __future__ import annotations

from cuda.bindings cimport cydriver
from cuda.core._memory._memory_pool cimport _MemPool, _MemPoolOptions
from cuda.core._memory._memory_pool cimport (
_MemPool, MP_init_create_pool, MP_raise_release_threshold,
)
from cuda.core._memory cimport _ipc
from cuda.core._memory._ipc cimport IPCAllocationHandle
from cuda.core._resource_handles cimport (
as_cu,
get_device_mempool,
)
from cuda.core._utils.cuda_utils cimport (
check_or_create_options,
HANDLE_RETURN,
)
from cpython.mem cimport PyMem_Malloc, PyMem_Free

from dataclasses import dataclass
import multiprocessing
import platform # no-cython-lint
import uuid

from cuda.core._utils.cuda_utils import check_multiprocessing_start_method
from cuda.core._resource_handles cimport as_cu

__all__ = ['DeviceMemoryResource', 'DeviceMemoryResourceOptions']

Expand Down Expand Up @@ -122,27 +128,12 @@ cdef class DeviceMemoryResource(_MemPool):
associated MMR.
"""

def __init__(self, device_id: Device | int, options=None):
from .._device import Device
cdef int dev_id = Device(device_id).device_id
cdef DeviceMemoryResourceOptions opts = check_or_create_options(
DeviceMemoryResourceOptions, options, "DeviceMemoryResource options",
keep_none=True
)
cdef _MemPoolOptions opts_base = _MemPoolOptions()

cdef bint ipc_enabled = False
if opts:
ipc_enabled = opts.ipc_enabled
if ipc_enabled and not _ipc.is_supported():
raise RuntimeError("IPC is not available on {platform.system()}")
opts_base._max_size = opts.max_size
opts_base._use_current = False
opts_base._ipc_enabled = ipc_enabled
opts_base._location = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
opts_base._type = cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_PINNED
def __cinit__(self, *args, **kwargs):
self._dev_id = cydriver.CU_DEVICE_INVALID
self._peer_accessible_by = ()

super().__init__(dev_id, opts_base)
def __init__(self, device_id: Device | int, options=None):
_DMR_init(self, device_id, options)

def __reduce__(self):
return DeviceMemoryResource.from_registry, (self.uuid,)
Expand Down Expand Up @@ -215,6 +206,37 @@ cdef class DeviceMemoryResource(_MemPool):
raise RuntimeError("Memory resource is not IPC-enabled")
return self._ipc_data._alloc_handle

@property
def device_id(self) -> int:
"""The associated device ordinal."""
return self._dev_id

@property
def peer_accessible_by(self):
"""
Get or set the devices that can access allocations from this memory
pool. Access can be modified at any time and affects all allocations
from this memory pool.

Returns a tuple of sorted device IDs that currently have peer access to
allocations from this memory pool.

When setting, accepts a sequence of Device objects or device IDs.
Setting to an empty sequence revokes all peer access.

Examples
--------
>>> dmr = DeviceMemoryResource(0)
>>> dmr.peer_accessible_by = [1] # Grant access to device 1
>>> assert dmr.peer_accessible_by == (1,)
>>> dmr.peer_accessible_by = [] # Revoke access
"""
return self._peer_accessible_by

@peer_accessible_by.setter
def peer_accessible_by(self, devices):
_DMR_set_peer_accessible_by(self, devices)

@property
def is_device_accessible(self) -> bool:
"""Return True. This memory resource provides device-accessible buffers."""
Expand All @@ -226,6 +248,82 @@ cdef class DeviceMemoryResource(_MemPool):
return False


cdef inline _DMR_set_peer_accessible_by(DeviceMemoryResource self, devices):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems like something that should almost certainly never be invoked in __dealloc__. It's reading from and writing to the Python object whose __dealloc__ method is being called.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, yeah this was a mess. I removed all peer-access-clearing logic from Python. The real problem was that certain tests were using the default memory pool, which is persistent and should keep its peer access settings. Those tests incorrectly assumed the beginning state should be "no peer access." I updated those to simply use fresh pools.

from .._device import Device

cdef set[int] target_ids = {Device(dev).device_id for dev in devices}
target_ids.discard(self._dev_id)
this_dev = Device(self._dev_id)
cdef list bad = [dev for dev in target_ids if not this_dev.can_access_peer(dev)]
if bad:
raise ValueError(f"Device {self._dev_id} cannot access peer(s): {', '.join(map(str, bad))}")
cdef set[int] cur_ids = set(self._peer_accessible_by)
cdef set[int] to_add = target_ids - cur_ids
cdef set[int] to_rm = cur_ids - target_ids
cdef size_t count = len(to_add) + len(to_rm)
cdef cydriver.CUmemAccessDesc* access_desc = NULL
cdef size_t i = 0

if count > 0:
access_desc = <cydriver.CUmemAccessDesc*>PyMem_Malloc(count * sizeof(cydriver.CUmemAccessDesc))
if access_desc == NULL:
raise MemoryError("Failed to allocate memory for access descriptors")

try:
for dev_id in to_add:
access_desc[i].flags = cydriver.CUmemAccess_flags.CU_MEM_ACCESS_FLAGS_PROT_READWRITE
access_desc[i].location.type = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
access_desc[i].location.id = dev_id
i += 1

for dev_id in to_rm:
access_desc[i].flags = cydriver.CUmemAccess_flags.CU_MEM_ACCESS_FLAGS_PROT_NONE
access_desc[i].location.type = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
access_desc[i].location.id = dev_id
i += 1

with nogil:
HANDLE_RETURN(cydriver.cuMemPoolSetAccess(as_cu(self._h_pool), access_desc, count))
finally:
if access_desc != NULL:
PyMem_Free(access_desc)

self._peer_accessible_by = tuple(target_ids)


cdef inline _DMR_init(DeviceMemoryResource self, device_id, options):
from .._device import Device
cdef int dev_id = Device(device_id).device_id
cdef DeviceMemoryResourceOptions opts = check_or_create_options(
DeviceMemoryResourceOptions, options, "DeviceMemoryResource options",
keep_none=True
)
cdef bint ipc_enabled = False
cdef size_t max_size = 0

self._dev_id = dev_id

if opts is not None:
ipc_enabled = opts.ipc_enabled
if ipc_enabled and not _ipc.is_supported():
raise RuntimeError(f"IPC is not available on {platform.system()}")
max_size = opts.max_size

if opts is None:
self._h_pool = get_device_mempool(dev_id)
self._mempool_owned = False
MP_raise_release_threshold(self)
else:
MP_init_create_pool(
self,
cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE,
dev_id,
cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_PINNED,
ipc_enabled,
max_size,
)


# Note: this is referenced in instructions to debug nvbug 5698116.
cpdef DMR_mempool_get_access(DeviceMemoryResource dmr, int device_id):
"""
Expand Down
6 changes: 4 additions & 2 deletions cuda_core/cuda/core/_memory/_managed_memory_resource.pxd
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

from cuda.core._memory._memory_pool cimport _MemPool


cdef class ManagedMemoryResource(_MemPool):
pass
cdef:
str _pref_loc_type
int _pref_loc_id
Loading