From 069bdaabb11173dddbd564e7b7998c409255a3ff Mon Sep 17 00:00:00 2001
From: Andy Jost <ajost@nvidia.com>
Date: Thu, 26 Feb 2026 16:32:56 -0800
Subject: [PATCH 1/4] Refactor _MemPool hierarchy: separate shared pool
 machinery from device-specific concerns

Move _dev_id, device_id, and peer_accessible_by from _MemPool into
DeviceMemoryResource. Eliminate _MemPoolOptions and refactor pool
initialization into freestanding cdef functions (MP_init_create_pool,
MP_init_current_pool, MP_raise_release_threshold) for cross-module
visibility. Extract __init__ bodies into inline cdef helpers (_DMR_init,
_PMR_init, _MMR_init) for consistency and shorter class definitions.

Implements device_id as -1 for PinnedMemoryResource and
ManagedMemoryResource since they are not device-bound.

Made-with: Cursor
---
 .../core/_memory/_device_memory_resource.pxd  |   6 +-
 .../core/_memory/_device_memory_resource.pyx  | 158 +++++++++--
 .../core/_memory/_managed_memory_resource.pxd |   2 +-
 .../core/_memory/_managed_memory_resource.pyx |  86 +++---
 cuda_core/cuda/core/_memory/_memory_pool.pxd  |  31 +-
 cuda_core/cuda/core/_memory/_memory_pool.pyx  | 268 +++++-------------
 .../core/_memory/_pinned_memory_resource.pyx  | 195 +++++++------
 cuda_core/tests/test_memory.py                |   2 +-
 8 files changed, 392 insertions(+), 356 deletions(-)

diff --git a/cuda_core/cuda/core/_memory/_device_memory_resource.pxd b/cuda_core/cuda/core/_memory/_device_memory_resource.pxd
index c293d72750..a7f3bfd958 100644
--- a/cuda_core/cuda/core/_memory/_device_memory_resource.pxd
+++ b/cuda_core/cuda/core/_memory/_device_memory_resource.pxd
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # SPDX-License-Identifier: Apache-2.0
 
@@ -7,7 +7,9 @@ from cuda.core._memory._ipc cimport IPCDataForMR
 
 
 cdef class DeviceMemoryResource(_MemPool):
-    pass
+    cdef:
+        int _dev_id
+        object _peer_accessible_by
 
 
 cpdef DMR_mempool_get_access(DeviceMemoryResource, int)
diff --git a/cuda_core/cuda/core/_memory/_device_memory_resource.pyx b/cuda_core/cuda/core/_memory/_device_memory_resource.pyx
index 78a49d3e44..09aa482234 100644
--- a/cuda_core/cuda/core/_memory/_device_memory_resource.pyx
+++ b/cuda_core/cuda/core/_memory/_device_memory_resource.pyx
@@ -1,17 +1,24 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # SPDX-License-Identifier: Apache-2.0
 
 from __future__ import annotations
 
 from cuda.bindings cimport cydriver
-from cuda.core._memory._memory_pool cimport _MemPool, _MemPoolOptions
+from cuda.core._memory._memory_pool cimport (
+    _MemPool, MP_init_create_pool, MP_raise_release_threshold,
+)
 from cuda.core._memory cimport _ipc
 from cuda.core._memory._ipc cimport IPCAllocationHandle
+from cuda.core._resource_handles cimport (
+    as_cu,
+    get_device_mempool,
+)
 from cuda.core._utils.cuda_utils cimport (
     check_or_create_options,
     HANDLE_RETURN,
 )
+from cpython.mem cimport PyMem_Malloc, PyMem_Free
 
 from dataclasses import dataclass
 import multiprocessing
@@ -19,7 +26,6 @@ import platform  # no-cython-lint
 import uuid
 
 from cuda.core._utils.cuda_utils import check_multiprocessing_start_method
-from cuda.core._resource_handles cimport as_cu
 
 __all__ = ['DeviceMemoryResource', 'DeviceMemoryResourceOptions']
 
@@ -122,27 +128,26 @@ cdef class DeviceMemoryResource(_MemPool):
     associated MMR.
     """
 
-    def __init__(self, device_id: Device | int, options=None):
-        from .._device import Device
-        cdef int dev_id = Device(device_id).device_id
-        cdef DeviceMemoryResourceOptions opts = check_or_create_options(
-            DeviceMemoryResourceOptions, options, "DeviceMemoryResource options",
-            keep_none=True
-        )
-        cdef _MemPoolOptions opts_base = _MemPoolOptions()
-
-        cdef bint ipc_enabled = False
-        if opts:
-            ipc_enabled = opts.ipc_enabled
-            if ipc_enabled and not _ipc.is_supported():
-                raise RuntimeError("IPC is not available on {platform.system()}")
-            opts_base._max_size = opts.max_size
-            opts_base._use_current = False
-        opts_base._ipc_enabled = ipc_enabled
-        opts_base._location = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
-        opts_base._type = cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_PINNED
+    def __cinit__(self, *args, **kwargs):
+        self._dev_id = cydriver.CU_DEVICE_INVALID
+        self._peer_accessible_by = ()
 
-        super().__init__(dev_id, opts_base)
+    def __init__(self, device_id: Device | int, options=None):
+        _DMR_init(self, device_id, options)
+
+    def __dealloc__(self):
+        try:
+            self.close()
+        except Exception:
+            pass
+
+    def close(self):
+        """Close the memory resource, revoking peer access before destruction."""
+        # nvbug 5698116: clear peer access before pool destruction; also
+        # needed for non-owned (default) pools to undo modifications.
+        if self._peer_accessible_by:
+            _DMR_set_peer_accessible_by(self, [])
+        super().close()
 
     def __reduce__(self):
         return DeviceMemoryResource.from_registry, (self.uuid,)
@@ -215,6 +220,37 @@ cdef class DeviceMemoryResource(_MemPool):
             raise RuntimeError("Memory resource is not IPC-enabled")
         return self._ipc_data._alloc_handle
 
+    @property
+    def device_id(self) -> int:
+        """The associated device ordinal."""
+        return self._dev_id
+
+    @property
+    def peer_accessible_by(self):
+        """
+        Get or set the devices that can access allocations from this memory
+        pool. Access can be modified at any time and affects all allocations
+        from this memory pool.
+
+        Returns a tuple of sorted device IDs that currently have peer access to
+        allocations from this memory pool.
+
+        When setting, accepts a sequence of Device objects or device IDs.
+        Setting to an empty sequence revokes all peer access.
+
+        Examples
+        --------
+        >>> dmr = DeviceMemoryResource(0)
+        >>> dmr.peer_accessible_by = [1]  # Grant access to device 1
+        >>> assert dmr.peer_accessible_by == (1,)
+        >>> dmr.peer_accessible_by = []  # Revoke access
+        """
+        return self._peer_accessible_by
+
+    @peer_accessible_by.setter
+    def peer_accessible_by(self, devices):
+        _DMR_set_peer_accessible_by(self, devices)
+
     @property
     def is_device_accessible(self) -> bool:
         """Return True. This memory resource provides device-accessible buffers."""
@@ -226,6 +262,82 @@ cdef class DeviceMemoryResource(_MemPool):
         return False
 
 
+cdef inline _DMR_set_peer_accessible_by(DeviceMemoryResource self, devices):
+    from .._device import Device
+
+    cdef set[int] target_ids = {Device(dev).device_id for dev in devices}
+    target_ids.discard(self._dev_id)
+    this_dev = Device(self._dev_id)
+    cdef list bad = [dev for dev in target_ids if not this_dev.can_access_peer(dev)]
+    if bad:
+        raise ValueError(f"Device {self._dev_id} cannot access peer(s): {', '.join(map(str, bad))}")
+    cdef set[int] cur_ids = set(self._peer_accessible_by)
+    cdef set[int] to_add = target_ids - cur_ids
+    cdef set[int] to_rm = cur_ids - target_ids
+    cdef size_t count = len(to_add) + len(to_rm)
+    cdef cydriver.CUmemAccessDesc* access_desc = NULL
+    cdef size_t i = 0
+
+    if count > 0:
+        access_desc = <cydriver.CUmemAccessDesc*>PyMem_Malloc(count * sizeof(cydriver.CUmemAccessDesc))
+        if access_desc == NULL:
+            raise MemoryError("Failed to allocate memory for access descriptors")
+
+        try:
+            for dev_id in to_add:
+                access_desc[i].flags = cydriver.CUmemAccess_flags.CU_MEM_ACCESS_FLAGS_PROT_READWRITE
+                access_desc[i].location.type = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
+                access_desc[i].location.id = dev_id
+                i += 1
+
+            for dev_id in to_rm:
+                access_desc[i].flags = cydriver.CUmemAccess_flags.CU_MEM_ACCESS_FLAGS_PROT_NONE
+                access_desc[i].location.type = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
+                access_desc[i].location.id = dev_id
+                i += 1
+
+            with nogil:
+                HANDLE_RETURN(cydriver.cuMemPoolSetAccess(as_cu(self._h_pool), access_desc, count))
+        finally:
+            if access_desc != NULL:
+                PyMem_Free(access_desc)
+
+        self._peer_accessible_by = tuple(target_ids)
+
+
+cdef inline _DMR_init(DeviceMemoryResource self, device_id, options):
+    from .._device import Device
+    cdef int dev_id = Device(device_id).device_id
+    cdef DeviceMemoryResourceOptions opts = check_or_create_options(
+        DeviceMemoryResourceOptions, options, "DeviceMemoryResource options",
+        keep_none=True
+    )
+    cdef bint ipc_enabled = False
+    cdef size_t max_size = 0
+
+    self._dev_id = dev_id
+
+    if opts is not None:
+        ipc_enabled = opts.ipc_enabled
+        if ipc_enabled and not _ipc.is_supported():
+            raise RuntimeError(f"IPC is not available on {platform.system()}")
+        max_size = opts.max_size
+
+    if opts is None:
+        self._h_pool = get_device_mempool(dev_id)
+        self._mempool_owned = False
+        MP_raise_release_threshold(self)
+    else:
+        MP_init_create_pool(
+            self,
+            cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE,
+            dev_id,
+            cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_PINNED,
+            ipc_enabled,
+            max_size,
+        )
+
+
 # Note: this is referenced in instructions to debug nvbug 5698116.
 cpdef DMR_mempool_get_access(DeviceMemoryResource dmr, int device_id):
     """
diff --git a/cuda_core/cuda/core/_memory/_managed_memory_resource.pxd b/cuda_core/cuda/core/_memory/_managed_memory_resource.pxd
index 46e00cd4cb..5a73a57ee9 100644
--- a/cuda_core/cuda/core/_memory/_managed_memory_resource.pxd
+++ b/cuda_core/cuda/core/_memory/_managed_memory_resource.pxd
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # SPDX-License-Identifier: Apache-2.0
 
diff --git a/cuda_core/cuda/core/_memory/_managed_memory_resource.pyx b/cuda_core/cuda/core/_memory/_managed_memory_resource.pyx
index a268520e55..64f523087c 100644
--- a/cuda_core/cuda/core/_memory/_managed_memory_resource.pyx
+++ b/cuda_core/cuda/core/_memory/_managed_memory_resource.pyx
@@ -6,7 +6,7 @@ from __future__ import annotations
 
 from cuda.bindings cimport cydriver
 
-from cuda.core._memory._memory_pool cimport _MemPool, _MemPoolOptions
+from cuda.core._memory._memory_pool cimport _MemPool, MP_init_create_pool, MP_init_current_pool
 from cuda.core._utils.cuda_utils cimport (
     HANDLE_RETURN,
     check_or_create_options,
@@ -64,40 +64,12 @@ cdef class ManagedMemoryResource(_MemPool):
     """
 
     def __init__(self, options=None):
-        cdef ManagedMemoryResourceOptions opts = check_or_create_options(
-            ManagedMemoryResourceOptions, options, "ManagedMemoryResource options",
-            keep_none=True
-        )
-        cdef _MemPoolOptions opts_base = _MemPoolOptions()
-
-        cdef int device_id = -1
-        cdef object preferred_location = None
-        if opts:
-            preferred_location = opts.preferred_location
-            if preferred_location is not None:
-                device_id = preferred_location
-            opts_base._use_current = False
-
-        opts_base._ipc_enabled = False  # IPC not supported for managed memory pools
-
-        IF CUDA_CORE_BUILD_MAJOR >= 13:
-            # Set location based on preferred_location
-            if preferred_location is None:
-                # Let the driver decide
-                opts_base._location = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_NONE
-            elif device_id == -1:
-                # CPU/host preference
-                opts_base._location = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST
-            else:
-                # Device preference
-                opts_base._location = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
-
-            opts_base._type = cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_MANAGED
-
-            super().__init__(device_id, opts_base)
-            _check_concurrent_managed_access()
-        ELSE:
-            raise RuntimeError("ManagedMemoryResource requires CUDA 13.0 or later")
+        _MMR_init(self, options)
+
+    @property
+    def device_id(self) -> int:
+        """Return -1. Managed memory migrates automatically and is not tied to a specific device."""
+        return -1
 
     @property
     def is_device_accessible(self) -> bool:
@@ -110,6 +82,50 @@ cdef class ManagedMemoryResource(_MemPool):
         return True
 
 
+cdef inline _MMR_init(ManagedMemoryResource self, options):
+    cdef ManagedMemoryResourceOptions opts = check_or_create_options(
+        ManagedMemoryResourceOptions, options, "ManagedMemoryResource options",
+        keep_none=True
+    )
+    cdef int location_id = -1
+    cdef object preferred_location = None
+    cdef cydriver.CUmemLocationType loc_type
+
+    if opts is not None:
+        preferred_location = opts.preferred_location
+        if preferred_location is not None:
+            location_id = preferred_location
+
+    IF CUDA_CORE_BUILD_MAJOR >= 13:
+        if preferred_location is None:
+            loc_type = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_NONE
+        elif location_id == -1:
+            loc_type = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST
+        else:
+            loc_type = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
+
+        if opts is None:
+            MP_init_current_pool(
+                self,
+                loc_type,
+                location_id,
+                cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_MANAGED,
+            )
+        else:
+            MP_init_create_pool(
+                self,
+                loc_type,
+                location_id,
+                cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_MANAGED,
+                False,
+                0,
+            )
+
+        _check_concurrent_managed_access()
+    ELSE:
+        raise RuntimeError("ManagedMemoryResource requires CUDA 13.0 or later")
+
+
 cdef bint _concurrent_access_warned = False
 cdef object _concurrent_access_lock = threading.Lock()
 
diff --git a/cuda_core/cuda/core/_memory/_memory_pool.pxd b/cuda_core/cuda/core/_memory/_memory_pool.pxd
index a8838bf9dc..45062826e4 100644
--- a/cuda_core/cuda/core/_memory/_memory_pool.pxd
+++ b/cuda_core/cuda/core/_memory/_memory_pool.pxd
@@ -10,15 +10,32 @@ from cuda.core._resource_handles cimport MemoryPoolHandle
 
 cdef class _MemPool(MemoryResource):
     cdef:
-        int                   _dev_id
         MemoryPoolHandle      _h_pool
         bint                  _mempool_owned
         IPCDataForMR          _ipc_data
         object                _attributes
-        object                _peer_accessible_by
         object                __weakref__
 
 
+cdef int MP_init_create_pool(
+    _MemPool self,
+    cydriver.CUmemLocationType loc_type,
+    int loc_id,
+    cydriver.CUmemAllocationType alloc_type,
+    bint ipc_enabled,
+    size_t max_size,
+) except? -1
+
+cdef int MP_init_current_pool(
+    _MemPool self,
+    cydriver.CUmemLocationType loc_type,
+    int loc_id,
+    cydriver.CUmemAllocationType alloc_type,
+) except? -1
+
+cdef int MP_raise_release_threshold(_MemPool self) except? -1
+
+
 cdef class _MemPoolAttributes:
     cdef:
         MemoryPoolHandle _h_pool
@@ -27,13 +44,3 @@ cdef class _MemPoolAttributes:
     cdef _MemPoolAttributes _init(MemoryPoolHandle h_pool)
 
     cdef int _getattribute(self, cydriver.CUmemPool_attribute attr_enum, void* value) except? -1
-
-
-cdef class _MemPoolOptions:
-
-    cdef:
-        bint _ipc_enabled
-        size_t _max_size
-        cydriver.CUmemLocationType _location
-        cydriver.CUmemAllocationType _type
-        bint _use_current
diff --git a/cuda_core/cuda/core/_memory/_memory_pool.pyx b/cuda_core/cuda/core/_memory/_memory_pool.pyx
index 1e9f5116c1..a37ea17ab3 100644
--- a/cuda_core/cuda/core/_memory/_memory_pool.pyx
+++ b/cuda_core/cuda/core/_memory/_memory_pool.pyx
@@ -7,7 +7,6 @@ from __future__ import annotations
 from libc.limits cimport ULLONG_MAX
 from libc.stdint cimport uintptr_t
 from libc.string cimport memset
-from cpython.mem cimport PyMem_Malloc, PyMem_Free
 
 from cuda.bindings cimport cydriver
 from cuda.core._memory._buffer cimport Buffer, Buffer_from_deviceptr_handle, MemoryResource
@@ -18,7 +17,6 @@ from cuda.core._resource_handles cimport (
     DevicePtrHandle,
     create_mempool_handle,
     create_mempool_handle_ref,
-    get_device_mempool,
     deviceptr_alloc_from_pool,
     as_cu,
     as_py,
@@ -28,20 +26,6 @@ from cuda.core._utils.cuda_utils cimport (
     HANDLE_RETURN,
 )
 
-import platform  # no-cython-lint
-
-from cuda.core._utils.cuda_utils import driver
-
-
-cdef class _MemPoolOptions:
-
-    def __cinit__(self):
-        self._ipc_enabled = False
-        self._max_size = 0
-        self._location = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_INVALID
-        self._type = cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_INVALID
-        self._use_current = True
-
 
 cdef class _MemPoolAttributes:
     """Provides access to memory pool attributes."""
@@ -126,24 +110,14 @@ cdef class _MemPoolAttributes:
 cdef class _MemPool(MemoryResource):
 
     def __cinit__(self):
-        self._dev_id = cydriver.CU_DEVICE_INVALID
+        # Note: subclasses use MP_init_create_pool or MP_init_current_pool to initialize.
         self._mempool_owned = False
         self._ipc_data = None
         self._attributes = None
-        self._peer_accessible_by = ()
-
-    def __init__(self, int device_id, _MemPoolOptions opts):
-        if opts._use_current:
-            _MP_init_current(self, device_id, opts)
-        else:
-            _MP_init_create(self, device_id, opts)
-
-    def __dealloc__(self):
-        _MP_close(self)
 
     def close(self):
         """
-        Close the device memory resource and destroy the associated memory pool
+        Close the memory resource and destroy the associated memory pool
         if owned.
         """
         _MP_close(self)
@@ -194,11 +168,6 @@ cdef class _MemPool(MemoryResource):
             self._attributes = _MemPoolAttributes._init(self._h_pool)
         return self._attributes
 
-    @property
-    def device_id(self) -> int:
-        """The associated device ordinal."""
-        return self._dev_id
-
     @property
     def handle(self) -> object:
         """Handle to the underlying memory pool."""
@@ -209,73 +178,6 @@ cdef class _MemPool(MemoryResource):
         """Whether the memory resource handle is owned. If False, ``close`` has no effect."""
         return self._mempool_owned
 
-    @property
-    def peer_accessible_by(self):
-        """
-        Get or set the devices that can access allocations from this memory
-        pool. Access can be modified at any time and affects all allocations
-        from this memory pool.
-
-        Returns a tuple of sorted device IDs that currently have peer access to
-        allocations from this memory pool.
-
-        When setting, accepts a sequence of Device objects or device IDs.
-        Setting to an empty sequence revokes all peer access.
-
-        Examples
-        --------
-        >>> dmr = DeviceMemoryResource(0)
-        >>> dmr.peer_accessible_by = [1]  # Grant access to device 1
-        >>> assert dmr.peer_accessible_by == (1,)
-        >>> dmr.peer_accessible_by = []  # Revoke access
-        """
-        return self._peer_accessible_by
-
-    @peer_accessible_by.setter
-    def peer_accessible_by(self, devices):
-        """Set which devices can access this memory pool."""
-        from .._device import Device
-
-        # Convert all devices to device IDs
-        cdef set[int] target_ids = {Device(dev).device_id for dev in devices}
-        target_ids.discard(self._dev_id)  # exclude this device from peer access list
-        this_dev = Device(self._dev_id)
-        cdef list bad = [dev for dev in target_ids if not this_dev.can_access_peer(dev)]
-        if bad:
-            raise ValueError(f"Device {self._dev_id} cannot access peer(s): {', '.join(map(str, bad))}")
-        cdef set[int] cur_ids = set(self._peer_accessible_by)
-        cdef set[int] to_add = target_ids - cur_ids
-        cdef set[int] to_rm = cur_ids - target_ids
-        cdef size_t count = len(to_add) + len(to_rm) # transaction size
-        cdef cydriver.CUmemAccessDesc* access_desc = NULL
-        cdef size_t i = 0
-
-        if count > 0:
-            access_desc = <cydriver.CUmemAccessDesc*>PyMem_Malloc(count * sizeof(cydriver.CUmemAccessDesc))
-            if access_desc == NULL:
-                raise MemoryError("Failed to allocate memory for access descriptors")
-
-            try:
-                for dev_id in to_add:
-                    access_desc[i].flags = cydriver.CUmemAccess_flags.CU_MEM_ACCESS_FLAGS_PROT_READWRITE
-                    access_desc[i].location.type = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
-                    access_desc[i].location.id = dev_id
-                    i += 1
-
-                for dev_id in to_rm:
-                    access_desc[i].flags = cydriver.CUmemAccess_flags.CU_MEM_ACCESS_FLAGS_PROT_NONE
-                    access_desc[i].location.type = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
-                    access_desc[i].location.id = dev_id
-                    i += 1
-
-                with nogil:
-                    HANDLE_RETURN(cydriver.cuMemPoolSetAccess(as_cu(self._h_pool), access_desc, count))
-            finally:
-                if access_desc != NULL:
-                    PyMem_Free(access_desc)
-
-            self._peer_accessible_by = tuple(target_ids)
-
     @property
     def is_ipc_enabled(self) -> bool:
         """Whether this memory resource has IPC enabled."""
@@ -298,106 +200,90 @@ cdef class _MemPool(MemoryResource):
         return getattr(self._ipc_data, 'uuid', None)
 
 
-# _MemPool Implementation
-# -----------------------
+cdef int MP_init_create_pool(
+    _MemPool self,
+    cydriver.CUmemLocationType loc_type,
+    int loc_id,
+    cydriver.CUmemAllocationType alloc_type,
+    bint ipc_enabled,
+    size_t max_size,
+) except? -1:
+    """Initialize a _MemPool by creating a new memory pool with the given
+    parameters.
 
-cdef int _MP_init_current(_MemPool self, int dev_id, _MemPoolOptions opts) except?-1:
-    # Get the current memory pool.
-    cdef cydriver.cuuint64_t current_threshold
-    cdef cydriver.cuuint64_t max_threshold = ULLONG_MAX
-    cdef cydriver.CUmemLocation loc
-    cdef cydriver.CUmemoryPool pool
+    Sets ``_h_pool`` (owning), ``_mempool_owned``, and ``_ipc_data``.
+    """
+    cdef cydriver.CUmemPoolProps properties
+    memset(&properties, 0, sizeof(cydriver.CUmemPoolProps))
 
-    self._dev_id = dev_id
-    self._mempool_owned = False
+    properties.allocType = alloc_type
+    properties.handleTypes = (
+        _ipc.IPC_HANDLE_TYPE if ipc_enabled
+        else cydriver.CUmemAllocationHandleType.CU_MEM_HANDLE_TYPE_NONE
+    )
+    properties.location.id = loc_id
+    properties.location.type = loc_type
+    properties.maxSize = max_size
 
-    if opts._type == cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_PINNED \
-            and opts._location == cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE:
-        assert dev_id >= 0
-        self._h_pool = get_device_mempool(dev_id)
+    self._mempool_owned = True
+    self._h_pool = create_mempool_handle(properties)
 
-        # Set a higher release threshold to improve performance when there are
-        # no active allocations.  By default, the release threshold is 0, which
-        # means memory is immediately released back to the OS when there are no
-        # active suballocations, causing performance issues.
-        with nogil:
-            HANDLE_RETURN(
-                cydriver.cuMemPoolGetAttribute(
-                    as_cu(self._h_pool),
-                    cydriver.CUmemPool_attribute.CU_MEMPOOL_ATTR_RELEASE_THRESHOLD,
-                    &current_threshold
-                )
-            )
-            if current_threshold == 0:
-                HANDLE_RETURN(cydriver.cuMemPoolSetAttribute(
-                    as_cu(self._h_pool),
-                    cydriver.CUmemPool_attribute.CU_MEMPOOL_ATTR_RELEASE_THRESHOLD,
-                    &max_threshold
-                ))
-    elif opts._type == cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_PINNED \
-            and opts._location == cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST:
-        IF CUDA_CORE_BUILD_MAJOR >= 13:
-            assert dev_id == -1
-            loc.id = dev_id
-            loc.type = opts._location
-            with nogil:
-                HANDLE_RETURN(cydriver.cuMemGetMemPool(&pool, &loc, opts._type))
-            self._h_pool = create_mempool_handle_ref(pool)
-        ELSE:
-            raise RuntimeError("not supported")
-    elif opts._type == cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_PINNED \
-            and opts._location == cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST_NUMA:
-        IF CUDA_CORE_BUILD_MAJOR >= 13:
-            assert dev_id == 0
-            loc.id = 0
-            loc.type = opts._location
-            with nogil:
-                HANDLE_RETURN(cydriver.cuMemGetMemPool(&pool, &loc, opts._type))
-            self._h_pool = create_mempool_handle_ref(pool)
-        ELSE:
-            raise RuntimeError("not supported")
-    else:
-        IF CUDA_CORE_BUILD_MAJOR >= 13:
-            if opts._type == cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_MANAGED:
-                # Managed memory pools
-                loc.id = dev_id
-                loc.type = opts._location
-                with nogil:
-                    HANDLE_RETURN(cydriver.cuMemGetMemPool(&pool, &loc, opts._type))
-                self._h_pool = create_mempool_handle_ref(pool)
-            else:
-                assert False
-        ELSE:
-            assert False
+    if ipc_enabled:
+        alloc_handle = _ipc.MP_export_mempool(self)
+        self._ipc_data = _ipc.IPCDataForMR(alloc_handle, False)
 
     return 0
 
 
-cdef int _MP_init_create(_MemPool self, int dev_id, _MemPoolOptions opts) except?-1:
-    cdef cydriver.CUmemPoolProps properties
-    memset(&properties, 0, sizeof(cydriver.CUmemPoolProps))
+cdef int MP_init_current_pool(
+    _MemPool self,
+    cydriver.CUmemLocationType loc_type,
+    int loc_id,
+    cydriver.CUmemAllocationType alloc_type,
+) except? -1:
+    """Initialize a _MemPool by getting the driver's current pool for a
+    location and allocation type.
 
-    cdef bint ipc_enabled = opts._ipc_enabled
-    properties.allocType = opts._type
-    properties.handleTypes = _ipc.IPC_HANDLE_TYPE if ipc_enabled else cydriver.CUmemAllocationHandleType.CU_MEM_HANDLE_TYPE_NONE
-    properties.location.id = dev_id
-    properties.location.type = opts._location
-    # managed memory does not support maxSize as of CUDA 13.0
+    Sets ``_h_pool`` (non-owning) via ``cuMemGetMemPool``.
+    Requires CUDA 13+.
+    """
     IF CUDA_CORE_BUILD_MAJOR >= 13:
-        if properties.allocType != cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_MANAGED:
-            properties.maxSize = opts._max_size
+        cdef cydriver.CUmemLocation loc
+        cdef cydriver.CUmemoryPool pool
+        loc.id = loc_id
+        loc.type = loc_type
+        with nogil:
+            HANDLE_RETURN(cydriver.cuMemGetMemPool(&pool, &loc, alloc_type))
+        self._h_pool = create_mempool_handle_ref(pool)
+        self._mempool_owned = False
     ELSE:
-        properties.maxSize = opts._max_size
-
-    self._dev_id = dev_id
-    self._mempool_owned = True
+        raise RuntimeError("not supported")
+    return 0
 
-    self._h_pool = create_mempool_handle(properties)
 
-    if ipc_enabled:
-        alloc_handle = _ipc.MP_export_mempool(self)
-        self._ipc_data = _ipc.IPCDataForMR(alloc_handle, False)
+cdef int MP_raise_release_threshold(_MemPool self) except? -1:
+    """Raise the pool's release threshold to ULLONG_MAX if currently zero.
 
+    By default the release threshold is 0, meaning memory is returned to
+    the OS as soon as there are no active suballocations.  Setting it to
+    ULLONG_MAX avoids repeated OS round-trips.
+    """
+    cdef cydriver.cuuint64_t current_threshold
+    cdef cydriver.cuuint64_t max_threshold = ULLONG_MAX
+    with nogil:
+        HANDLE_RETURN(
+            cydriver.cuMemPoolGetAttribute(
+                as_cu(self._h_pool),
+                cydriver.CUmemPool_attribute.CU_MEMPOOL_ATTR_RELEASE_THRESHOLD,
+                &current_threshold
+            )
+        )
+        if current_threshold == 0:
+            HANDLE_RETURN(cydriver.cuMemPoolSetAttribute(
+                as_cu(self._h_pool),
+                cydriver.CUmemPool_attribute.CU_MEMPOOL_ATTR_RELEASE_THRESHOLD,
+                &max_threshold
+            ))
     return 0
 
 
@@ -438,17 +324,9 @@ cdef inline _MP_close(_MemPool self):
     if not self._h_pool:
         return
 
-    # This works around nvbug 5698116. When a memory pool handle is recycled
-    # the new handle inherits the peer access state of the previous handle.
-    if self._peer_accessible_by:
-        self.peer_accessible_by = []
-
     # Reset members in declaration order.
-    # The RAII deleter handles nvbug 5698116 workaround (clears peer access)
-    # and calls cuMemPoolDestroy if this is an owning handle.
+    # The RAII deleter calls cuMemPoolDestroy if this is an owning handle.
     self._h_pool.reset()
-    self._dev_id = cydriver.CU_DEVICE_INVALID
     self._mempool_owned = False
     self._ipc_data = None
     self._attributes = None
-    self._peer_accessible_by = ()
diff --git a/cuda_core/cuda/core/_memory/_pinned_memory_resource.pyx b/cuda_core/cuda/core/_memory/_pinned_memory_resource.pyx
index b2a9db4594..b35bc1ebdf 100644
--- a/cuda_core/cuda/core/_memory/_pinned_memory_resource.pyx
+++ b/cuda_core/cuda/core/_memory/_pinned_memory_resource.pyx
@@ -1,11 +1,11 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # SPDX-License-Identifier: Apache-2.0
 
 from __future__ import annotations
 
 from cuda.bindings cimport cydriver
-from cuda.core._memory._memory_pool cimport _MemPool, _MemPoolOptions
+from cuda.core._memory._memory_pool cimport _MemPool, MP_init_create_pool, MP_init_current_pool
 from cuda.core._memory cimport _ipc
 from cuda.core._memory._ipc cimport IPCAllocationHandle
 from cuda.core._utils.cuda_utils cimport (
@@ -24,67 +24,6 @@ import warnings
 
 from cuda.core._utils.cuda_utils import check_multiprocessing_start_method
 
-
-# Cache to ensure NUMA warning is only raised once per process
-cdef bint _numa_warning_shown = False
-cdef object _lock = threading.Lock()
-
-
-def _check_numa_nodes():
-    """Check if system has multiple NUMA nodes and warn if so."""
-    global _numa_warning_shown
-    if _numa_warning_shown:
-        return
-
-    with _lock:
-        if _numa_warning_shown:
-            return
-
-        if platform.system() != "Linux":
-            _numa_warning_shown = True
-            return
-
-        numa_count = None
-
-        # Try /sys filesystem first (most reliable and doesn't require external tools)
-        try:
-            node_path = "/sys/devices/system/node"
-            if os.path.exists(node_path):
-                # Count directories named "node[0-9]+"
-                nodes = [d for d in os.listdir(node_path) if d.startswith("node") and d[4:].isdigit()]
-                numa_count = len(nodes)
-        except (OSError, PermissionError):
-            pass
-
-        # Fallback to lscpu if /sys check didn't work
-        if numa_count is None:
-            try:
-                result = subprocess.run(
-                    ["lscpu"],
-                    capture_output=True,
-                    text=True,
-                    timeout=1
-                )
-                for line in result.stdout.splitlines():
-                    if line.startswith("NUMA node(s):"):
-                        numa_count = int(line.split(":")[1].strip())
-                        break
-            except (subprocess.SubprocessError, ValueError, FileNotFoundError):
-                pass
-
-        # Warn if multiple NUMA nodes detected
-        if numa_count is not None and numa_count > 1:
-            warnings.warn(
-                f"System has {numa_count} NUMA nodes. IPC-enabled pinned memory "
-                f"uses location ID 0, which may not work correctly with multiple "
-                f"NUMA nodes.",
-                UserWarning,
-                stacklevel=3
-            )
-
-        _numa_warning_shown = True
-
-
 __all__ = ['PinnedMemoryResource', 'PinnedMemoryResourceOptions']
 
 
@@ -143,30 +82,7 @@ cdef class PinnedMemoryResource(_MemPool):
     """
 
     def __init__(self, options=None):
-        cdef PinnedMemoryResourceOptions opts = check_or_create_options(
-            PinnedMemoryResourceOptions, options, "PinnedMemoryResource options",
-            keep_none=True
-        )
-        cdef _MemPoolOptions opts_base = _MemPoolOptions()
-
-        cdef bint ipc_enabled = False
-        if opts:
-            ipc_enabled = opts.ipc_enabled
-            if ipc_enabled and not _ipc.is_supported():
-                raise RuntimeError(f"IPC is not available on {platform.system()}")
-            if ipc_enabled:
-                # Check for multiple NUMA nodes on Linux
-                _check_numa_nodes()
-            opts_base._max_size = opts.max_size
-            opts_base._use_current = False
-        opts_base._ipc_enabled = ipc_enabled
-        if ipc_enabled:
-            opts_base._location = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST_NUMA
-        else:
-            opts_base._location = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST
-        opts_base._type = cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_PINNED
-
-        super().__init__(0 if ipc_enabled else -1, opts_base)
+        _PMR_init(self, options)
 
     def __reduce__(self):
         return PinnedMemoryResource.from_registry, (self.uuid,)
@@ -239,6 +155,11 @@ cdef class PinnedMemoryResource(_MemPool):
             raise RuntimeError("Memory resource is not IPC-enabled")
         return self._ipc_data._alloc_handle
 
+    @property
+    def device_id(self) -> int:
+        """Return -1. Pinned memory is host memory and is not associated with a specific device."""
+        return -1
+
     @property
     def is_device_accessible(self) -> bool:
         """Return True. This memory resource provides device-accessible buffers."""
@@ -250,6 +171,49 @@ cdef class PinnedMemoryResource(_MemPool):
         return True
 
 
+cdef inline _PMR_init(PinnedMemoryResource self, options):
+    cdef PinnedMemoryResourceOptions opts = check_or_create_options(
+        PinnedMemoryResourceOptions, options, "PinnedMemoryResource options",
+        keep_none=True
+    )
+    cdef bint ipc_enabled = False
+    cdef size_t max_size = 0
+    cdef cydriver.CUmemLocationType loc_type
+    cdef int location_id
+
+    if opts is not None:
+        ipc_enabled = opts.ipc_enabled
+        if ipc_enabled and not _ipc.is_supported():
+            raise RuntimeError(f"IPC is not available on {platform.system()}")
+        if ipc_enabled:
+            _check_numa_nodes()
+        max_size = opts.max_size
+
+    if ipc_enabled:
+        loc_type = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST_NUMA
+        location_id = 0
+    else:
+        loc_type = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST
+        location_id = -1
+
+    if opts is None:
+        MP_init_current_pool(
+            self,
+            loc_type,
+            location_id,
+            cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_PINNED,
+        )
+    else:
+        MP_init_create_pool(
+            self,
+            loc_type,
+            location_id,
+            cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_PINNED,
+            ipc_enabled,
+            max_size,
+        )
+
+
 def _deep_reduce_pinned_memory_resource(mr):
     check_multiprocessing_start_method()
     alloc_handle = mr.get_allocation_handle()
@@ -257,3 +221,60 @@ def _deep_reduce_pinned_memory_resource(mr):
 
 
 multiprocessing.reduction.register(PinnedMemoryResource, _deep_reduce_pinned_memory_resource)
+
+
+cdef bint _numa_warning_shown = False
+cdef object _numa_lock = threading.Lock()
+
+
+cdef inline _check_numa_nodes():
+    """Check if system has multiple NUMA nodes and warn if so."""
+    global _numa_warning_shown
+    if _numa_warning_shown:
+        return
+
+    with _numa_lock:
+        if _numa_warning_shown:
+            return
+
+        if platform.system() != "Linux":
+            _numa_warning_shown = True
+            return
+
+        numa_count = None
+
+        # Try /sys filesystem first (most reliable and doesn't require external tools)
+        try:
+            node_path = "/sys/devices/system/node"
+            if os.path.exists(node_path):
+                nodes = [d for d in os.listdir(node_path) if d.startswith("node") and d[4:].isdigit()]
+                numa_count = len(nodes)
+        except (OSError, PermissionError):
+            pass
+
+        # Fallback to lscpu if /sys check didn't work
+        if numa_count is None:
+            try:
+                result = subprocess.run(
+                    ["lscpu"],
+                    capture_output=True,
+                    text=True,
+                    timeout=1
+                )
+                for line in result.stdout.splitlines():
+                    if line.startswith("NUMA node(s):"):
+                        numa_count = int(line.split(":")[1].strip())
+                        break
+            except (subprocess.SubprocessError, ValueError, FileNotFoundError):
+                pass
+
+        if numa_count is not None and numa_count > 1:
+            warnings.warn(
+                f"System has {numa_count} NUMA nodes. IPC-enabled pinned memory "
+                f"uses location ID 0, which may not work correctly with multiple "
+                f"NUMA nodes.",
+                UserWarning,
+                stacklevel=3
+            )
+
+        _numa_warning_shown = True
diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
index 65230944ad..49c4935f59 100644
--- a/cuda_core/tests/test_memory.py
+++ b/cuda_core/tests/test_memory.py
@@ -1038,7 +1038,7 @@ def test_pinned_mempool_ipc_basic():
     assert mr.is_ipc_enabled
     assert mr.is_device_accessible
     assert mr.is_host_accessible
-    assert mr.device_id == 0  # IPC-enabled uses location id 0
+    assert mr.device_id == -1  # pinned memory is not device-specific
 
     # Test allocation handle export
     alloc_handle = mr.get_allocation_handle()

From e55a26b0bdb340361acf3a5bb8b896d1bf9e2b27 Mon Sep 17 00:00:00 2001
From: Andy Jost <ajost@nvidia.com>
Date: Thu, 26 Feb 2026 17:10:00 -0800
Subject: [PATCH 2/4] Fix PinnedMemoryResource IPC to derive NUMA ID from
 active device (#1603)

PinnedMemoryResource(ipc_enabled=True) hardcoded host NUMA ID 0, causing
failures on multi-NUMA systems where the active device is attached to a
different NUMA node. Now derives the NUMA ID from the current device's
host_numa_id attribute, and adds an explicit numa_id option for manual
override. Removes the _check_numa_nodes warning machinery in favor of
proper NUMA node selection.

Made-with: Cursor
---
 .../core/_memory/_pinned_memory_resource.pxd  |   2 +-
 .../core/_memory/_pinned_memory_resource.pyx  | 119 +++++++-----------
 cuda_core/tests/test_memory.py                |  72 ++++++++++-
 3 files changed, 116 insertions(+), 77 deletions(-)

diff --git a/cuda_core/cuda/core/_memory/_pinned_memory_resource.pxd b/cuda_core/cuda/core/_memory/_pinned_memory_resource.pxd
index a8262d9bd8..fcfcfeb346 100644
--- a/cuda_core/cuda/core/_memory/_pinned_memory_resource.pxd
+++ b/cuda_core/cuda/core/_memory/_pinned_memory_resource.pxd
@@ -7,4 +7,4 @@ from cuda.core._memory._ipc cimport IPCDataForMR
 
 
 cdef class PinnedMemoryResource(_MemPool):
-    pass
+    cdef int _numa_id
diff --git a/cuda_core/cuda/core/_memory/_pinned_memory_resource.pyx b/cuda_core/cuda/core/_memory/_pinned_memory_resource.pyx
index b35bc1ebdf..64ebcc7bc5 100644
--- a/cuda_core/cuda/core/_memory/_pinned_memory_resource.pyx
+++ b/cuda_core/cuda/core/_memory/_pinned_memory_resource.pyx
@@ -15,12 +15,8 @@ from cuda.core._utils.cuda_utils cimport (
 
 from dataclasses import dataclass
 import multiprocessing
-import os
 import platform  # no-cython-lint
-import subprocess
-import threading
 import uuid
-import warnings
 
 from cuda.core._utils.cuda_utils import check_multiprocessing_start_method
 
@@ -41,9 +37,22 @@ cdef class PinnedMemoryResourceOptions:
     max_size : int, optional
         Maximum pool size. When set to 0, defaults to a system-dependent value.
         (Default to 0)
+
+    numa_id : int or None, optional
+        Host NUMA node ID for pool placement. When set to None (the default),
+        the behavior depends on ``ipc_enabled``:
+
+        - ``ipc_enabled=False``: OS-managed placement (location type HOST).
+        - ``ipc_enabled=True``: automatically derived from the current CUDA
+          device's ``host_numa_id`` attribute, requiring an active CUDA
+          context.
+
+        When set to a non-negative integer, that NUMA node is used explicitly
+        regardless of ``ipc_enabled`` (location type HOST_NUMA).
     """
     ipc_enabled : bool = False
     max_size : int = 0
+    numa_id : int | None = None
 
 
 cdef class PinnedMemoryResource(_MemPool):
@@ -71,12 +80,10 @@ cdef class PinnedMemoryResource(_MemPool):
     -----
     To create an IPC-Enabled memory resource (MR) that is capable of sharing
     allocations between processes, specify ``ipc_enabled=True`` in the initializer
-    option. When IPC is enabled, the location type is automatically set to
-    CU_MEM_LOCATION_TYPE_HOST_NUMA instead of CU_MEM_LOCATION_TYPE_HOST,
-    with location ID 0.
-
-    Note: IPC support for pinned memory requires a single NUMA node. A warning
-    is issued if multiple NUMA nodes are detected.
+    option. When IPC is enabled and ``numa_id`` is not specified, the NUMA node
+    is automatically derived from the current CUDA device's ``host_numa_id``
+    attribute, which requires an active CUDA context. If ``numa_id`` is
+    explicitly set, that value is used regardless of ``ipc_enabled``.
 
     See :class:`DeviceMemoryResource` for more details on IPC usage patterns.
     """
@@ -160,6 +167,11 @@ cdef class PinnedMemoryResource(_MemPool):
         """Return -1. Pinned memory is host memory and is not associated with a specific device."""
         return -1
 
+    @property
+    def numa_id(self) -> int:
+        """The host NUMA node ID used for pool placement, or -1 for OS-managed placement."""
+        return self._numa_id
+
     @property
     def is_device_accessible(self) -> bool:
         """Return True. This memory resource provides device-accessible buffers."""
@@ -172,6 +184,8 @@ cdef class PinnedMemoryResource(_MemPool):
 
 
 cdef inline _PMR_init(PinnedMemoryResource self, options):
+    from .._device import Device
+
     cdef PinnedMemoryResourceOptions opts = check_or_create_options(
         PinnedMemoryResourceOptions, options, "PinnedMemoryResource options",
         keep_none=True
@@ -179,35 +193,47 @@ cdef inline _PMR_init(PinnedMemoryResource self, options):
     cdef bint ipc_enabled = False
     cdef size_t max_size = 0
     cdef cydriver.CUmemLocationType loc_type
-    cdef int location_id
+    cdef int numa_id = -1
 
     if opts is not None:
         ipc_enabled = opts.ipc_enabled
         if ipc_enabled and not _ipc.is_supported():
             raise RuntimeError(f"IPC is not available on {platform.system()}")
-        if ipc_enabled:
-            _check_numa_nodes()
         max_size = opts.max_size
 
-    if ipc_enabled:
+        if opts.numa_id is not None:
+            numa_id = opts.numa_id
+            if numa_id < 0:
+                raise ValueError(f"numa_id must be >= 0, got {numa_id}")
+        elif ipc_enabled:
+            dev = Device()
+            numa_id = dev.properties.host_numa_id
+            if numa_id < 0:
+                raise RuntimeError(
+                    "Cannot determine host NUMA ID for IPC-enabled pinned "
+                    "memory pool. The system may not support NUMA, or no "
+                    "CUDA context is active. Set numa_id explicitly or "
+                    "call Device.set_current() first.")
+
+    if numa_id >= 0:
         loc_type = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST_NUMA
-        location_id = 0
     else:
         loc_type = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST
-        location_id = -1
+
+    self._numa_id = numa_id
 
     if opts is None:
         MP_init_current_pool(
             self,
             loc_type,
-            location_id,
+            numa_id,
             cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_PINNED,
         )
     else:
         MP_init_create_pool(
             self,
             loc_type,
-            location_id,
+            numa_id,
             cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_PINNED,
             ipc_enabled,
             max_size,
@@ -221,60 +247,3 @@ def _deep_reduce_pinned_memory_resource(mr):
 
 
 multiprocessing.reduction.register(PinnedMemoryResource, _deep_reduce_pinned_memory_resource)
-
-
-cdef bint _numa_warning_shown = False
-cdef object _numa_lock = threading.Lock()
-
-
-cdef inline _check_numa_nodes():
-    """Check if system has multiple NUMA nodes and warn if so."""
-    global _numa_warning_shown
-    if _numa_warning_shown:
-        return
-
-    with _numa_lock:
-        if _numa_warning_shown:
-            return
-
-        if platform.system() != "Linux":
-            _numa_warning_shown = True
-            return
-
-        numa_count = None
-
-        # Try /sys filesystem first (most reliable and doesn't require external tools)
-        try:
-            node_path = "/sys/devices/system/node"
-            if os.path.exists(node_path):
-                nodes = [d for d in os.listdir(node_path) if d.startswith("node") and d[4:].isdigit()]
-                numa_count = len(nodes)
-        except (OSError, PermissionError):
-            pass
-
-        # Fallback to lscpu if /sys check didn't work
-        if numa_count is None:
-            try:
-                result = subprocess.run(
-                    ["lscpu"],
-                    capture_output=True,
-                    text=True,
-                    timeout=1
-                )
-                for line in result.stdout.splitlines():
-                    if line.startswith("NUMA node(s):"):
-                        numa_count = int(line.split(":")[1].strip())
-                        break
-            except (subprocess.SubprocessError, ValueError, FileNotFoundError):
-                pass
-
-        if numa_count is not None and numa_count > 1:
-            warnings.warn(
-                f"System has {numa_count} NUMA nodes. IPC-enabled pinned memory "
-                f"uses location ID 0, which may not work correctly with multiple "
-                f"NUMA nodes.",
-                UserWarning,
-                stacklevel=3
-            )
-
-        _numa_warning_shown = True
diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
index 49c4935f59..8933dcba09 100644
--- a/cuda_core/tests/test_memory.py
+++ b/cuda_core/tests/test_memory.py
@@ -1039,6 +1039,7 @@ def test_pinned_mempool_ipc_basic():
     assert mr.is_device_accessible
     assert mr.is_host_accessible
     assert mr.device_id == -1  # pinned memory is not device-specific
+    assert mr.numa_id >= 0  # IPC requires a concrete NUMA node
 
     # Test allocation handle export
     alloc_handle = mr.get_allocation_handle()
@@ -1070,7 +1071,8 @@ def test_pinned_mempool_ipc_errors():
     options = PinnedMemoryResourceOptions(max_size=POOL_SIZE, ipc_enabled=False)
     mr = PinnedMemoryResource(options)
     assert not mr.is_ipc_enabled
-    assert mr.device_id == -1  # Non-IPC uses location id -1
+    assert mr.device_id == -1
+    assert mr.numa_id == -1  # Non-IPC uses OS-managed placement
 
     buffer = mr.allocate(64)
     ipc_error_msg = "Memory resource is not IPC-enabled"
@@ -1089,6 +1091,74 @@ def test_pinned_mempool_ipc_errors():
     mr.close()
 
 
+def test_pinned_mr_numa_id_default_no_ipc(init_cuda):
+    """numa_id defaults to -1 (OS-managed) when IPC is disabled."""
+    device = Device()
+    skip_if_pinned_memory_unsupported(device)
+
+    mr = PinnedMemoryResource(PinnedMemoryResourceOptions())
+    assert mr.numa_id == -1
+    mr.close()
+
+    mr = PinnedMemoryResource(PinnedMemoryResourceOptions(ipc_enabled=False))
+    assert mr.numa_id == -1
+    mr.close()
+
+
+def test_pinned_mr_numa_id_default_with_ipc(init_cuda):
+    """numa_id is derived from the current device when IPC is enabled."""
+    device = Device()
+    skip_if_pinned_memory_unsupported(device)
+
+    if platform.system() == "Windows":
+        pytest.skip("IPC not implemented for Windows")
+    if not supports_ipc_mempool(device):
+        pytest.skip("Driver rejects IPC-enabled mempool creation on this platform")
+
+    expected_numa_id = device.properties.host_numa_id
+    if expected_numa_id < 0:
+        pytest.skip("System does not support NUMA")
+
+    mr = PinnedMemoryResource(PinnedMemoryResourceOptions(ipc_enabled=True, max_size=POOL_SIZE))
+    assert mr.numa_id == expected_numa_id
+    mr.close()
+
+
+def test_pinned_mr_numa_id_explicit(init_cuda):
+    """Explicit numa_id is used regardless of ipc_enabled."""
+    device = Device()
+    skip_if_pinned_memory_unsupported(device)
+
+    host_numa_id = device.properties.host_numa_id
+    if host_numa_id < 0:
+        pytest.skip("System does not support NUMA")
+
+    mr = PinnedMemoryResource(PinnedMemoryResourceOptions(numa_id=host_numa_id))
+    assert mr.numa_id == host_numa_id
+    mr.close()
+
+    if platform.system() == "Windows":
+        pytest.skip("IPC not implemented for Windows")
+    if not supports_ipc_mempool(device):
+        pytest.skip("Driver rejects IPC-enabled mempool creation on this platform")
+
+    mr = PinnedMemoryResource(PinnedMemoryResourceOptions(ipc_enabled=True, numa_id=host_numa_id, max_size=POOL_SIZE))
+    assert mr.numa_id == host_numa_id
+    mr.close()
+
+
+def test_pinned_mr_numa_id_negative_error(init_cuda):
+    """Negative numa_id raises ValueError."""
+    device = Device()
+    skip_if_pinned_memory_unsupported(device)
+
+    with pytest.raises(ValueError, match="numa_id must be >= 0"):
+        PinnedMemoryResource(PinnedMemoryResourceOptions(numa_id=-1))
+
+    with pytest.raises(ValueError, match="numa_id must be >= 0"):
+        PinnedMemoryResource(PinnedMemoryResourceOptions(numa_id=-42))
+
+
 @pytest.mark.parametrize("ipc_enabled", [True, False])
 @pytest.mark.parametrize(
     "property_name,expected_type",

From 29025785a584bf0cdd7c72875d3200796ffcf7f8 Mon Sep 17 00:00:00 2001
From: Andy Jost <ajost@nvidia.com>
Date: Mon, 2 Mar 2026 13:37:59 -0800
Subject: [PATCH 3/4] Add preferred_location_type option and query property to
 ManagedMemoryResource

Extends ManagedMemoryResourceOptions with a preferred_location_type field
("device", "host", "host_numa", or None) enabling NUMA-aware managed memory
pool placement. Adds ManagedMemoryResource.preferred_location property to
query the resolved setting. Fully backwards-compatible: existing code using
preferred_location alone continues to work unchanged.

Made-with: Cursor
---
 .pre-commit-config.yaml                       |    1 +
 .../cuda/bindings/_bindings/cydriver.pxd.in   |    4 +-
 .../cuda/bindings/_bindings/cydriver.pyx.in   |    4 +-
 .../cuda/bindings/_bindings/cynvrtc.pxd.in    |    4 +-
 .../cuda/bindings/_bindings/cynvrtc.pyx.in    |    4 +-
 .../cuda/bindings/_bindings/cyruntime.pxd.in  |    4 +-
 .../cuda/bindings/_bindings/cyruntime.pyx.in  |    4 +-
 .../bindings/_bindings/cyruntime_ptds.pxd.in  |    4 +-
 .../bindings/_bindings/cyruntime_ptds.pyx.in  |    4 +-
 .../cuda/bindings/_internal/_fast_enum.py     |    2 +-
 .../cuda/bindings/_internal/cufile.pxd        |    2 +-
 .../cuda/bindings/_internal/cufile_linux.pyx  |    2 +-
 .../cuda/bindings/_internal/nvjitlink.pxd     |    2 +-
 .../bindings/_internal/nvjitlink_linux.pyx    |    2 +-
 .../bindings/_internal/nvjitlink_windows.pyx  |    2 +-
 .../cuda/bindings/_internal/nvml.pxd          |    2 +-
 .../cuda/bindings/_internal/nvml_linux.pyx    |    2 +-
 .../cuda/bindings/_internal/nvml_windows.pyx  |    2 +-
 .../cuda/bindings/_internal/nvvm.pxd          |    2 +-
 .../cuda/bindings/_internal/nvvm_linux.pyx    |    2 +-
 .../cuda/bindings/_internal/nvvm_windows.pyx  |    2 +-
 cuda_bindings/cuda/bindings/cufile.pxd        |    2 +-
 cuda_bindings/cuda/bindings/cufile.pyx        |  152 ++-
 cuda_bindings/cuda/bindings/cycufile.pxd      |    2 +-
 cuda_bindings/cuda/bindings/cycufile.pyx      |    2 +-
 cuda_bindings/cuda/bindings/cydriver.pxd.in   |    4 +-
 cuda_bindings/cuda/bindings/cydriver.pyx.in   |    4 +-
 cuda_bindings/cuda/bindings/cynvjitlink.pxd   |    2 +-
 cuda_bindings/cuda/bindings/cynvjitlink.pyx   |    2 +-
 cuda_bindings/cuda/bindings/cynvml.pxd        |    2 +-
 cuda_bindings/cuda/bindings/cynvml.pyx        |    2 +-
 cuda_bindings/cuda/bindings/cynvrtc.pxd.in    |    4 +-
 cuda_bindings/cuda/bindings/cynvrtc.pyx.in    |    4 +-
 cuda_bindings/cuda/bindings/cynvvm.pxd        |    2 +-
 cuda_bindings/cuda/bindings/cynvvm.pyx        |    2 +-
 cuda_bindings/cuda/bindings/cyruntime.pxd.in  |    4 +-
 cuda_bindings/cuda/bindings/cyruntime.pyx.in  |    4 +-
 .../cuda/bindings/cyruntime_functions.pxi.in  |    4 +-
 .../cuda/bindings/cyruntime_types.pxi.in      |    4 +-
 cuda_bindings/cuda/bindings/driver.pxd.in     |    2 +-
 cuda_bindings/cuda/bindings/driver.pyx.in     |    2 +-
 cuda_bindings/cuda/bindings/nvjitlink.pxd     |    2 +-
 cuda_bindings/cuda/bindings/nvjitlink.pyx     |    4 +-
 cuda_bindings/cuda/bindings/nvml.pxd          |    2 +-
 cuda_bindings/cuda/bindings/nvml.pyx          | 1197 ++++++++++++++++-
 cuda_bindings/cuda/bindings/nvrtc.pxd.in      |    4 +-
 cuda_bindings/cuda/bindings/nvrtc.pyx.in      |    2 +-
 cuda_bindings/cuda/bindings/nvvm.pxd          |    2 +-
 cuda_bindings/cuda/bindings/nvvm.pyx          |    4 +-
 cuda_bindings/cuda/bindings/runtime.pxd.in    |    2 +-
 cuda_bindings/cuda/bindings/runtime.pyx.in    |    6 +-
 .../core/_memory/_managed_memory_resource.pxd |    4 +-
 .../core/_memory/_managed_memory_resource.pyx |  173 ++-
 cuda_core/cuda/core/_program.pxd              |    2 +
 cuda_core/cuda/core/_program.pyx              |  123 +-
 cuda_core/cuda/core/_stream.pyx               |   24 +-
 cuda_core/docs/source/release/0.6.0-notes.rst |    5 +
 cuda_core/docs/source/release/0.7.x-notes.rst |   57 +
 cuda_core/pyproject.toml                      |    8 +-
 cuda_core/tests/conftest.py                   |    6 +
 cuda_core/tests/test_memory.py                |  149 ++
 cuda_core/tests/test_program.py               |   35 +
 pytest.ini                                    |    1 +
 63 files changed, 1952 insertions(+), 123 deletions(-)
 create mode 100644 cuda_core/docs/source/release/0.7.x-notes.rst

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 44ba5d5bf2..2fbb9d897e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -19,6 +19,7 @@ repos:
     hooks:
       - id: ruff-check
         args: [--fix, --show-fixes]
+        exclude: ^cuda_bindings/cuda/bindings/_internal/_fast_enum\.py$
       - id: ruff-format
 
   - repo: local
diff --git a/cuda_bindings/cuda/bindings/_bindings/cydriver.pxd.in b/cuda_bindings/cuda/bindings/_bindings/cydriver.pxd.in
index 6f5a2a4014..2127076caa 100644
--- a/cuda_bindings/cuda/bindings/_bindings/cydriver.pxd.in
+++ b/cuda_bindings/cuda/bindings/_bindings/cydriver.pxd.in
@@ -1,7 +1,7 @@
-# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
-# This code was automatically generated with version 13.1.0. Do not modify it directly.
+# This code was automatically generated with version 13.1.0, generator version 49a8141. Do not modify it directly.
 from cuda.bindings.cydriver cimport *
 
 {{if 'cuGetErrorString' in found_functions}}
diff --git a/cuda_bindings/cuda/bindings/_bindings/cydriver.pyx.in b/cuda_bindings/cuda/bindings/_bindings/cydriver.pyx.in
index 4dba6dfbc8..e7b4f463b6 100644
--- a/cuda_bindings/cuda/bindings/_bindings/cydriver.pyx.in
+++ b/cuda_bindings/cuda/bindings/_bindings/cydriver.pyx.in
@@ -1,7 +1,7 @@
-# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
-# This code was automatically generated with version 13.1.0. Do not modify it directly.
+# This code was automatically generated with version 13.1.0, generator version 49a8141. Do not modify it directly.
 {{if 'Windows' == platform.system()}}
 import os
 cimport cuda.bindings._lib.windll as windll
diff --git a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pxd.in b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pxd.in
index f1bbb53998..7d8fc40a20 100644
--- a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pxd.in
+++ b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pxd.in
@@ -1,7 +1,7 @@
-# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
-# This code was automatically generated with version 13.1.0. Do not modify it directly.
+# This code was automatically generated with version 13.1.0, generator version 49a8141. Do not modify it directly.
 from cuda.bindings.cynvrtc cimport *
 
 {{if 'nvrtcGetErrorString' in found_functions}}
diff --git a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in
index 608aebd1af..2b88fde640 100644
--- a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in
+++ b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in
@@ -1,7 +1,7 @@
-# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
-# This code was automatically generated with version 13.1.0. Do not modify it directly.
+# This code was automatically generated with version 13.1.0, generator version 49a8141. Do not modify it directly.
 {{if 'Windows' == platform.system()}}
 import os
 cimport cuda.bindings._lib.windll as windll
diff --git a/cuda_bindings/cuda/bindings/_bindings/cyruntime.pxd.in b/cuda_bindings/cuda/bindings/_bindings/cyruntime.pxd.in
index 05451b0b42..8f0339be21 100644
--- a/cuda_bindings/cuda/bindings/_bindings/cyruntime.pxd.in
+++ b/cuda_bindings/cuda/bindings/_bindings/cyruntime.pxd.in
@@ -1,7 +1,7 @@
-# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
-# This code was automatically generated with version 13.1.0. Do not modify it directly.
+# This code was automatically generated with version 13.1.0, generator version 49a8141. Do not modify it directly.
 include "../cyruntime_types.pxi"
 
 include "../_lib/cyruntime/cyruntime.pxd"
diff --git a/cuda_bindings/cuda/bindings/_bindings/cyruntime.pyx.in b/cuda_bindings/cuda/bindings/_bindings/cyruntime.pyx.in
index 6a8d1ab783..cccd4fc661 100644
--- a/cuda_bindings/cuda/bindings/_bindings/cyruntime.pyx.in
+++ b/cuda_bindings/cuda/bindings/_bindings/cyruntime.pyx.in
@@ -1,7 +1,7 @@
-# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
-# This code was automatically generated with version 13.1.0. Do not modify it directly.
+# This code was automatically generated with version 13.1.0, generator version 49a8141. Do not modify it directly.
 include "../cyruntime_functions.pxi"
 
 import os
diff --git a/cuda_bindings/cuda/bindings/_bindings/cyruntime_ptds.pxd.in b/cuda_bindings/cuda/bindings/_bindings/cyruntime_ptds.pxd.in
index 804c7078ab..0af3f78b2b 100644
--- a/cuda_bindings/cuda/bindings/_bindings/cyruntime_ptds.pxd.in
+++ b/cuda_bindings/cuda/bindings/_bindings/cyruntime_ptds.pxd.in
@@ -1,7 +1,7 @@
-# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
-# This code was automatically generated with version 13.1.0. Do not modify it directly.
+# This code was automatically generated with version 13.1.0, generator version 49a8141. Do not modify it directly.
 cdef extern from "":
     """
     #define CUDA_API_PER_THREAD_DEFAULT_STREAM
diff --git a/cuda_bindings/cuda/bindings/_bindings/cyruntime_ptds.pyx.in b/cuda_bindings/cuda/bindings/_bindings/cyruntime_ptds.pyx.in
index f57fbbb126..bd0b42c0b3 100644
--- a/cuda_bindings/cuda/bindings/_bindings/cyruntime_ptds.pyx.in
+++ b/cuda_bindings/cuda/bindings/_bindings/cyruntime_ptds.pyx.in
@@ -1,7 +1,7 @@
-# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
-# This code was automatically generated with version 13.1.0. Do not modify it directly.
+# This code was automatically generated with version 13.1.0, generator version 49a8141. Do not modify it directly.
 cdef extern from "":
     """
     #define CUDA_API_PER_THREAD_DEFAULT_STREAM
diff --git a/cuda_bindings/cuda/bindings/_internal/_fast_enum.py b/cuda_bindings/cuda/bindings/_internal/_fast_enum.py
index 33e3b1e12f..0958b55b8f 100644
--- a/cuda_bindings/cuda/bindings/_internal/_fast_enum.py
+++ b/cuda_bindings/cuda/bindings/_internal/_fast_enum.py
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
 
-# This code was automatically generated across versions from 12.9.1 to 13.1.1. Do not modify it directly.
+# This code was automatically generated across versions from 12.9.1 to 13.1.1, generator version 0.3.1.dev1322+g646ce84ec. Do not modify it directly.
 
 
 """
diff --git a/cuda_bindings/cuda/bindings/_internal/cufile.pxd b/cuda_bindings/cuda/bindings/_internal/cufile.pxd
index a2e7d560ce..4b1a09a182 100644
--- a/cuda_bindings/cuda/bindings/_internal/cufile.pxd
+++ b/cuda_bindings/cuda/bindings/_internal/cufile.pxd
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 #
-# This code was automatically generated across versions from 12.9.1 to 13.1.1. Do not modify it directly.
+# This code was automatically generated across versions from 12.9.1 to 13.1.1, generator version 0.3.1.dev1322+g646ce84ec. Do not modify it directly.
 
 from ..cycufile cimport *
 
diff --git a/cuda_bindings/cuda/bindings/_internal/cufile_linux.pyx b/cuda_bindings/cuda/bindings/_internal/cufile_linux.pyx
index eb38750f5e..cbb2c422ac 100644
--- a/cuda_bindings/cuda/bindings/_internal/cufile_linux.pyx
+++ b/cuda_bindings/cuda/bindings/_internal/cufile_linux.pyx
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 #
-# This code was automatically generated across versions from 12.9.1 to 13.1.1. Do not modify it directly.
+# This code was automatically generated across versions from 12.9.1 to 13.1.1, generator version 0.3.1.dev1322+g646ce84ec. Do not modify it directly.
 
 from libc.stdint cimport intptr_t, uintptr_t
 import threading
diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink.pxd b/cuda_bindings/cuda/bindings/_internal/nvjitlink.pxd
index 84abf408d4..6c9670edee 100644
--- a/cuda_bindings/cuda/bindings/_internal/nvjitlink.pxd
+++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink.pxd
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 #
-# This code was automatically generated across versions from 12.0.1 to 13.1.1. Do not modify it directly.
+# This code was automatically generated across versions from 12.0.1 to 13.1.1, generator version 0.3.1.dev1322+g646ce84ec. Do not modify it directly.
 
 from ..cynvjitlink cimport *
 
diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx
index 057b52f5eb..378efda1c6 100644
--- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx
+++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_linux.pyx
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 #
-# This code was automatically generated across versions from 12.0.1 to 13.1.1. Do not modify it directly.
+# This code was automatically generated across versions from 12.0.1 to 13.1.1, generator version 0.3.1.dev1322+g646ce84ec. Do not modify it directly.
 
 from libc.stdint cimport intptr_t, uintptr_t
 
diff --git a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx
index 8967e7fe1c..976b824852 100644
--- a/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx
+++ b/cuda_bindings/cuda/bindings/_internal/nvjitlink_windows.pyx
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 #
-# This code was automatically generated across versions from 12.0.1 to 13.1.1. Do not modify it directly.
+# This code was automatically generated across versions from 12.0.1 to 13.1.1, generator version 0.3.1.dev1322+g646ce84ec. Do not modify it directly.
 
 from libc.stdint cimport intptr_t
 
diff --git a/cuda_bindings/cuda/bindings/_internal/nvml.pxd b/cuda_bindings/cuda/bindings/_internal/nvml.pxd
index e62b29b2c8..d9ddec48fb 100644
--- a/cuda_bindings/cuda/bindings/_internal/nvml.pxd
+++ b/cuda_bindings/cuda/bindings/_internal/nvml.pxd
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 #
-# This code was automatically generated across versions from 12.9.1 to 13.1.1. Do not modify it directly.
+# This code was automatically generated across versions from 12.9.1 to 13.1.1, generator version 0.3.1.dev1322+g646ce84ec. Do not modify it directly.
 
 from ..cynvml cimport *
 
diff --git a/cuda_bindings/cuda/bindings/_internal/nvml_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvml_linux.pyx
index 84e17a4313..54e5d51748 100644
--- a/cuda_bindings/cuda/bindings/_internal/nvml_linux.pyx
+++ b/cuda_bindings/cuda/bindings/_internal/nvml_linux.pyx
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 #
-# This code was automatically generated across versions from 12.9.1 to 13.1.1. Do not modify it directly.
+# This code was automatically generated across versions from 12.9.1 to 13.1.1, generator version 0.3.1.dev1322+g646ce84ec. Do not modify it directly.
 
 from libc.stdint cimport intptr_t, uintptr_t
 
diff --git a/cuda_bindings/cuda/bindings/_internal/nvml_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvml_windows.pyx
index dd71b4ae0b..309b5a3039 100644
--- a/cuda_bindings/cuda/bindings/_internal/nvml_windows.pyx
+++ b/cuda_bindings/cuda/bindings/_internal/nvml_windows.pyx
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 #
-# This code was automatically generated across versions from 12.9.1 to 13.1.1. Do not modify it directly.
+# This code was automatically generated across versions from 12.9.1 to 13.1.1, generator version 0.3.1.dev1322+g646ce84ec. Do not modify it directly.
 
 from libc.stdint cimport intptr_t
 
diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm.pxd b/cuda_bindings/cuda/bindings/_internal/nvvm.pxd
index 00576dd8c9..c560367884 100644
--- a/cuda_bindings/cuda/bindings/_internal/nvvm.pxd
+++ b/cuda_bindings/cuda/bindings/_internal/nvvm.pxd
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 #
-# This code was automatically generated across versions from 12.0.1 to 13.1.1. Do not modify it directly.
+# This code was automatically generated across versions from 12.0.1 to 13.1.1, generator version 0.3.1.dev1322+g646ce84ec. Do not modify it directly.
 
 from ..cynvvm cimport *
 
diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx
index 2d03097235..f1d9febdb2 100644
--- a/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx
+++ b/cuda_bindings/cuda/bindings/_internal/nvvm_linux.pyx
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 #
-# This code was automatically generated across versions from 12.0.1 to 13.1.1. Do not modify it directly.
+# This code was automatically generated across versions from 12.0.1 to 13.1.1, generator version 0.3.1.dev1322+g646ce84ec. Do not modify it directly.
 
 from libc.stdint cimport intptr_t, uintptr_t
 
diff --git a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx
index 6684402bfc..3dd11074b2 100644
--- a/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx
+++ b/cuda_bindings/cuda/bindings/_internal/nvvm_windows.pyx
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 #
-# This code was automatically generated across versions from 12.0.1 to 13.1.1. Do not modify it directly.
+# This code was automatically generated across versions from 12.0.1 to 13.1.1, generator version 0.3.1.dev1322+g646ce84ec. Do not modify it directly.
 
 from libc.stdint cimport intptr_t
 
diff --git a/cuda_bindings/cuda/bindings/cufile.pxd b/cuda_bindings/cuda/bindings/cufile.pxd
index b330aa42f2..033da9ec84 100644
--- a/cuda_bindings/cuda/bindings/cufile.pxd
+++ b/cuda_bindings/cuda/bindings/cufile.pxd
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 #
-# This code was automatically generated across versions from 12.9.1 to 13.1.1. Do not modify it directly.
+# This code was automatically generated across versions from 12.9.1 to 13.1.1, generator version 0.3.1.dev1322+g646ce84ec. Do not modify it directly.
 
 from libc.stdint cimport intptr_t
 
diff --git a/cuda_bindings/cuda/bindings/cufile.pyx b/cuda_bindings/cuda/bindings/cufile.pyx
index 4ddc988fca..16c564e2a8 100644
--- a/cuda_bindings/cuda/bindings/cufile.pyx
+++ b/cuda_bindings/cuda/bindings/cufile.pyx
@@ -1,8 +1,8 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 #
-# This code was automatically generated across versions from 12.9.1 to 13.1.1. Do not modify it directly.
+# This code was automatically generated across versions from 12.9.1 to 13.1.1, generator version 0.3.1.dev1322+g646ce84ec. Do not modify it directly.
 
 cimport cython  # NOQA
 from libc cimport errno
@@ -36,6 +36,33 @@ cdef __from_data(data, dtype_name, expected_dtype, lowpp_type):
     return lowpp_type.from_ptr(data.ctypes.data, not data.flags.writeable, data)
 
 
+cdef __from_buffer(buffer, size, lowpp_type):
+    cdef Py_buffer view
+    if cpython.PyObject_GetBuffer(buffer, &view, cpython.PyBUF_SIMPLE) != 0:
+        raise TypeError("buffer argument does not support the buffer protocol")
+    try:
+        if view.itemsize != 1:
+            raise ValueError("buffer itemsize must be 1 byte")
+        if view.len != size:
+            raise ValueError(f"buffer length must be {size} bytes")
+        return lowpp_type.from_ptr(<intptr_t><void *>view.buf, not view.readonly, buffer)
+    finally:
+        cpython.PyBuffer_Release(&view)
+
+
+cdef __getbuffer(object self, cpython.Py_buffer *buffer, void *ptr, int size, bint readonly):
+    buffer.buf = <char *>ptr
+    buffer.format = 'b'
+    buffer.internal = NULL
+    buffer.itemsize = 1
+    buffer.len = size
+    buffer.ndim = 1
+    buffer.obj = self
+    buffer.readonly = readonly
+    buffer.shape = &buffer.len
+    buffer.strides = &buffer.itemsize
+    buffer.suboffsets = NULL
+
 ###############################################################################
 # POD
 ###############################################################################
@@ -97,6 +124,12 @@ cdef class _py_anon_pod1:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof((<CUfileDescr_t*>NULL).handle)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof((<CUfileDescr_t*>NULL).handle), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <_anon_pod1 *>malloc(sizeof((<CUfileDescr_t*>NULL).handle))
@@ -131,6 +164,11 @@ cdef class _py_anon_pod1:
             raise ValueError("This _py_anon_pod1 instance is read-only")
         self._ptr[0].handle = <void *><intptr_t>val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an _py_anon_pod1 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof((<CUfileDescr_t*>NULL).handle), _py_anon_pod1)
+
     @staticmethod
     def from_data(data):
         """Create an _py_anon_pod1 instance wrapping the given NumPy array.
@@ -231,6 +269,12 @@ cdef class _py_anon_pod3:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof((<CUfileIOParams_t*>NULL).u.batch)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof((<CUfileIOParams_t*>NULL).u.batch), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <_anon_pod3 *>malloc(sizeof((<CUfileIOParams_t*>NULL).u.batch))
@@ -287,6 +331,11 @@ cdef class _py_anon_pod3:
             raise ValueError("This _py_anon_pod3 instance is read-only")
         self._ptr[0].size = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an _py_anon_pod3 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof((<CUfileIOParams_t*>NULL).u.batch), _py_anon_pod3)
+
     @staticmethod
     def from_data(data):
         """Create an _py_anon_pod3 instance wrapping the given NumPy array.
@@ -390,6 +439,12 @@ cdef class IOEvents:
             return False
         return bool((self_data == other._data).all())
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        cpython.PyObject_GetBuffer(self._data, buffer, flags)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        cpython.PyBuffer_Release(buffer)
+
     @property
     def cookie(self):
         """Union[~_numpy.intp, int]: """
@@ -442,6 +497,11 @@ cdef class IOEvents:
     def __setitem__(self, key, val):
         self._data[key] = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an IOEvents instance with the memory from the given buffer."""
+        return IOEvents.from_data(_numpy.frombuffer(buffer, dtype=io_events_dtype))
+
     @staticmethod
     def from_data(data):
         """Create an IOEvents instance wrapping the given NumPy array.
@@ -543,6 +603,12 @@ cdef class OpCounter:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(CUfileOpCounter_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(CUfileOpCounter_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <CUfileOpCounter_t *>malloc(sizeof(CUfileOpCounter_t))
@@ -577,6 +643,11 @@ cdef class OpCounter:
             raise ValueError("This OpCounter instance is read-only")
         self._ptr[0].err = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an OpCounter instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(CUfileOpCounter_t), OpCounter)
+
     @staticmethod
     def from_data(data):
         """Create an OpCounter instance wrapping the given NumPy array.
@@ -707,6 +778,12 @@ cdef class PerGpuStats:
             return False
         return bool((self_data == other._data).all())
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        cpython.PyObject_GetBuffer(self._data, buffer, flags)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        cpython.PyBuffer_Release(buffer)
+
     @property
     def uuid(self):
         """~_numpy.int8: (array of length 16)."""
@@ -1054,6 +1131,11 @@ cdef class PerGpuStats:
     def __setitem__(self, key, val):
         self._data[key] = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an PerGpuStats instance with the memory from the given buffer."""
+        return PerGpuStats.from_data(_numpy.frombuffer(buffer, dtype=per_gpu_stats_dtype))
+
     @staticmethod
     def from_data(data):
         """Create an PerGpuStats instance wrapping the given NumPy array.
@@ -1160,6 +1242,12 @@ cdef class Descr:
             return False
         return bool((self_data == other._data).all())
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        cpython.PyObject_GetBuffer(self._data, buffer, flags)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        cpython.PyBuffer_Release(buffer)
+
     @property
     def type(self):
         """Union[~_numpy.int32, int]: """
@@ -1210,6 +1298,11 @@ cdef class Descr:
     def __setitem__(self, key, val):
         self._data[key] = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an Descr instance with the memory from the given buffer."""
+        return Descr.from_data(_numpy.frombuffer(buffer, dtype=descr_dtype))
+
     @staticmethod
     def from_data(data):
         """Create an Descr instance wrapping the given NumPy array.
@@ -1305,6 +1398,12 @@ cdef class _py_anon_pod2:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof((<CUfileIOParams_t*>NULL).u)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof((<CUfileIOParams_t*>NULL).u), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <_anon_pod2 *>malloc(sizeof((<CUfileIOParams_t*>NULL).u))
@@ -1329,6 +1428,11 @@ cdef class _py_anon_pod2:
         cdef _py_anon_pod3 val_ = val
         memcpy(<void *>&(self._ptr[0].batch), <void *>(val_._get_ptr()), sizeof(_anon_pod3) * 1)
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an _py_anon_pod2 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof((<CUfileIOParams_t*>NULL).u), _py_anon_pod2)
+
     @staticmethod
     def from_data(data):
         """Create an _py_anon_pod2 instance wrapping the given NumPy array.
@@ -1468,6 +1572,12 @@ cdef class StatsLevel1:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(CUfileStatsLevel1_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(CUfileStatsLevel1_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <CUfileStatsLevel1_t *>malloc(sizeof(CUfileStatsLevel1_t))
@@ -1974,6 +2084,11 @@ cdef class StatsLevel1:
             raise ValueError("This StatsLevel1 instance is read-only")
         self._ptr[0].last_batch_write_bytes = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an StatsLevel1 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(CUfileStatsLevel1_t), StatsLevel1)
+
     @staticmethod
     def from_data(data):
         """Create an StatsLevel1 instance wrapping the given NumPy array.
@@ -2079,6 +2194,12 @@ cdef class IOParams:
             return False
         return bool((self_data == other._data).all())
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        cpython.PyObject_GetBuffer(self._data, buffer, flags)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        cpython.PyBuffer_Release(buffer)
+
     @property
     def mode(self):
         """Union[~_numpy.int32, int]: """
@@ -2151,6 +2272,11 @@ cdef class IOParams:
     def __setitem__(self, key, val):
         self._data[key] = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an IOParams instance with the memory from the given buffer."""
+        return IOParams.from_data(_numpy.frombuffer(buffer, dtype=io_params_dtype))
+
     @staticmethod
     def from_data(data):
         """Create an IOParams instance wrapping the given NumPy array.
@@ -2253,6 +2379,12 @@ cdef class StatsLevel2:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(CUfileStatsLevel2_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(CUfileStatsLevel2_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <CUfileStatsLevel2_t *>malloc(sizeof(CUfileStatsLevel2_t))
@@ -2311,6 +2443,11 @@ cdef class StatsLevel2:
         arr[:] = _numpy.asarray(val, dtype=_numpy.uint64)
         memcpy(<void *>(&(self._ptr[0].write_size_kb_hist)), <void *>(arr.data), sizeof(uint64_t) * len(val))
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an StatsLevel2 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(CUfileStatsLevel2_t), StatsLevel2)
+
     @staticmethod
     def from_data(data):
         """Create an StatsLevel2 instance wrapping the given NumPy array.
@@ -2410,6 +2547,12 @@ cdef class StatsLevel3:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(CUfileStatsLevel3_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(CUfileStatsLevel3_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <CUfileStatsLevel3_t *>malloc(sizeof(CUfileStatsLevel3_t))
@@ -2459,6 +2602,11 @@ cdef class StatsLevel3:
             raise ValueError("This StatsLevel3 instance is read-only")
         self._ptr[0].num_gpus = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an StatsLevel3 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(CUfileStatsLevel3_t), StatsLevel3)
+
     @staticmethod
     def from_data(data):
         """Create an StatsLevel3 instance wrapping the given NumPy array.
diff --git a/cuda_bindings/cuda/bindings/cycufile.pxd b/cuda_bindings/cuda/bindings/cycufile.pxd
index 05358ec3ac..ce3f6bc94b 100644
--- a/cuda_bindings/cuda/bindings/cycufile.pxd
+++ b/cuda_bindings/cuda/bindings/cycufile.pxd
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 #
-# This code was automatically generated across versions from 12.9.1 to 13.1.1. Do not modify it directly.
+# This code was automatically generated across versions from 12.9.1 to 13.1.1, generator version 0.3.1.dev1322+g646ce84ec. Do not modify it directly.
 
 from libc.stdint cimport uint32_t, uint64_t
 from libc.time cimport time_t
diff --git a/cuda_bindings/cuda/bindings/cycufile.pyx b/cuda_bindings/cuda/bindings/cycufile.pyx
index 6bb831666f..32d7ae07b8 100644
--- a/cuda_bindings/cuda/bindings/cycufile.pyx
+++ b/cuda_bindings/cuda/bindings/cycufile.pyx
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 #
-# This code was automatically generated across versions from 12.9.1 to 13.1.1. Do not modify it directly.
+# This code was automatically generated across versions from 12.9.1 to 13.1.1, generator version 0.3.1.dev1322+g646ce84ec. Do not modify it directly.
 
 from ._internal cimport cufile as _cufile
 
diff --git a/cuda_bindings/cuda/bindings/cydriver.pxd.in b/cuda_bindings/cuda/bindings/cydriver.pxd.in
index 23681ca6e7..ccafc102f6 100644
--- a/cuda_bindings/cuda/bindings/cydriver.pxd.in
+++ b/cuda_bindings/cuda/bindings/cydriver.pxd.in
@@ -1,7 +1,7 @@
-# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
-# This code was automatically generated with version 13.1.0. Do not modify it directly.
+# This code was automatically generated with version 13.1.0, generator version 49a8141. Do not modify it directly.
 
 from libc.stdint cimport uint32_t, uint64_t
 
diff --git a/cuda_bindings/cuda/bindings/cydriver.pyx.in b/cuda_bindings/cuda/bindings/cydriver.pyx.in
index 301563b490..d54c5140a6 100644
--- a/cuda_bindings/cuda/bindings/cydriver.pyx.in
+++ b/cuda_bindings/cuda/bindings/cydriver.pyx.in
@@ -1,7 +1,7 @@
-# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
-# This code was automatically generated with version 13.1.0. Do not modify it directly.
+# This code was automatically generated with version 13.1.0, generator version 49a8141. Do not modify it directly.
 cimport cuda.bindings._bindings.cydriver as cydriver
 
 {{if 'cuGetErrorString' in found_functions}}
diff --git a/cuda_bindings/cuda/bindings/cynvjitlink.pxd b/cuda_bindings/cuda/bindings/cynvjitlink.pxd
index eff351fef1..d9ad2ec49a 100644
--- a/cuda_bindings/cuda/bindings/cynvjitlink.pxd
+++ b/cuda_bindings/cuda/bindings/cynvjitlink.pxd
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 #
-# This code was automatically generated across versions from 12.0.1 to 13.1.1. Do not modify it directly.
+# This code was automatically generated across versions from 12.0.1 to 13.1.1, generator version 0.3.1.dev1322+g646ce84ec. Do not modify it directly.
 
 from libc.stdint cimport intptr_t, uint32_t
 
diff --git a/cuda_bindings/cuda/bindings/cynvjitlink.pyx b/cuda_bindings/cuda/bindings/cynvjitlink.pyx
index e5e6b13210..669c6a3937 100644
--- a/cuda_bindings/cuda/bindings/cynvjitlink.pyx
+++ b/cuda_bindings/cuda/bindings/cynvjitlink.pyx
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 #
-# This code was automatically generated across versions from 12.0.1 to 13.1.1. Do not modify it directly.
+# This code was automatically generated across versions from 12.0.1 to 13.1.1, generator version 0.3.1.dev1322+g646ce84ec. Do not modify it directly.
 
 from ._internal cimport nvjitlink as _nvjitlink
 
diff --git a/cuda_bindings/cuda/bindings/cynvml.pxd b/cuda_bindings/cuda/bindings/cynvml.pxd
index 4765ef39b1..d95297e6cc 100644
--- a/cuda_bindings/cuda/bindings/cynvml.pxd
+++ b/cuda_bindings/cuda/bindings/cynvml.pxd
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 #
-# This code was automatically generated across versions from 12.9.1 to 13.1.1. Do not modify it directly.
+# This code was automatically generated across versions from 12.9.1 to 13.1.1, generator version 0.3.1.dev1322+g646ce84ec. Do not modify it directly.
 
 from libc.stdint cimport int64_t
 
diff --git a/cuda_bindings/cuda/bindings/cynvml.pyx b/cuda_bindings/cuda/bindings/cynvml.pyx
index b395a6ac53..200cf74e7d 100644
--- a/cuda_bindings/cuda/bindings/cynvml.pyx
+++ b/cuda_bindings/cuda/bindings/cynvml.pyx
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 #
-# This code was automatically generated across versions from 12.9.1 to 13.1.1. Do not modify it directly.
+# This code was automatically generated across versions from 12.9.1 to 13.1.1, generator version 0.3.1.dev1322+g646ce84ec. Do not modify it directly.
 
 from ._internal cimport nvml as _nvml
 
diff --git a/cuda_bindings/cuda/bindings/cynvrtc.pxd.in b/cuda_bindings/cuda/bindings/cynvrtc.pxd.in
index af5acab52d..a03d3a80f6 100644
--- a/cuda_bindings/cuda/bindings/cynvrtc.pxd.in
+++ b/cuda_bindings/cuda/bindings/cynvrtc.pxd.in
@@ -1,7 +1,7 @@
-# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
-# This code was automatically generated with version 13.1.0. Do not modify it directly.
+# This code was automatically generated with version 13.1.0, generator version 49a8141. Do not modify it directly.
 
 from libc.stdint cimport uint32_t, uint64_t
 
diff --git a/cuda_bindings/cuda/bindings/cynvrtc.pyx.in b/cuda_bindings/cuda/bindings/cynvrtc.pyx.in
index 423efcf54c..9781cfde24 100644
--- a/cuda_bindings/cuda/bindings/cynvrtc.pyx.in
+++ b/cuda_bindings/cuda/bindings/cynvrtc.pyx.in
@@ -1,7 +1,7 @@
-# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
-# This code was automatically generated with version 13.1.0. Do not modify it directly.
+# This code was automatically generated with version 13.1.0, generator version 49a8141. Do not modify it directly.
 cimport cuda.bindings._bindings.cynvrtc as cynvrtc
 
 {{if 'nvrtcGetErrorString' in found_functions}}
diff --git a/cuda_bindings/cuda/bindings/cynvvm.pxd b/cuda_bindings/cuda/bindings/cynvvm.pxd
index 5960917803..9548196a9e 100644
--- a/cuda_bindings/cuda/bindings/cynvvm.pxd
+++ b/cuda_bindings/cuda/bindings/cynvvm.pxd
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 #
-# This code was automatically generated across versions from 12.0.1 to 13.1.1. Do not modify it directly.
+# This code was automatically generated across versions from 12.0.1 to 13.1.1, generator version 0.3.1.dev1322+g646ce84ec. Do not modify it directly.
 
 
 ###############################################################################
diff --git a/cuda_bindings/cuda/bindings/cynvvm.pyx b/cuda_bindings/cuda/bindings/cynvvm.pyx
index 7fe09d572a..24e1899004 100644
--- a/cuda_bindings/cuda/bindings/cynvvm.pyx
+++ b/cuda_bindings/cuda/bindings/cynvvm.pyx
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 #
-# This code was automatically generated across versions from 12.0.1 to 13.1.1. Do not modify it directly.
+# This code was automatically generated across versions from 12.0.1 to 13.1.1, generator version 0.3.1.dev1322+g646ce84ec. Do not modify it directly.
 
 from ._internal cimport nvvm as _nvvm
 
diff --git a/cuda_bindings/cuda/bindings/cyruntime.pxd.in b/cuda_bindings/cuda/bindings/cyruntime.pxd.in
index 0b4344ab02..2b2cc4aae8 100644
--- a/cuda_bindings/cuda/bindings/cyruntime.pxd.in
+++ b/cuda_bindings/cuda/bindings/cyruntime.pxd.in
@@ -1,7 +1,7 @@
-# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
-# This code was automatically generated with version 13.1.0. Do not modify it directly.
+# This code was automatically generated with version 13.1.0, generator version 49a8141. Do not modify it directly.
 
 from libc.stdint cimport uint32_t, uint64_t
 
diff --git a/cuda_bindings/cuda/bindings/cyruntime.pyx.in b/cuda_bindings/cuda/bindings/cyruntime.pyx.in
index 248346d274..44b1cb86a2 100644
--- a/cuda_bindings/cuda/bindings/cyruntime.pyx.in
+++ b/cuda_bindings/cuda/bindings/cyruntime.pyx.in
@@ -1,7 +1,7 @@
-# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
-# This code was automatically generated with version 13.1.0. Do not modify it directly.
+# This code was automatically generated with version 13.1.0, generator version 49a8141. Do not modify it directly.
 cimport cuda.bindings._bindings.cyruntime as cyruntime
 cimport cython
 
diff --git a/cuda_bindings/cuda/bindings/cyruntime_functions.pxi.in b/cuda_bindings/cuda/bindings/cyruntime_functions.pxi.in
index 3ca4474fc5..3be1573eab 100644
--- a/cuda_bindings/cuda/bindings/cyruntime_functions.pxi.in
+++ b/cuda_bindings/cuda/bindings/cyruntime_functions.pxi.in
@@ -1,7 +1,7 @@
-# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
-# This code was automatically generated with version 13.1.0. Do not modify it directly.
+# This code was automatically generated with version 13.1.0, generator version 49a8141. Do not modify it directly.
 cdef extern from "cuda_runtime_api.h":
 
     {{if 'cudaDeviceReset' in found_functions}}
diff --git a/cuda_bindings/cuda/bindings/cyruntime_types.pxi.in b/cuda_bindings/cuda/bindings/cyruntime_types.pxi.in
index 3af28f67e7..c3166d195f 100644
--- a/cuda_bindings/cuda/bindings/cyruntime_types.pxi.in
+++ b/cuda_bindings/cuda/bindings/cyruntime_types.pxi.in
@@ -1,7 +1,7 @@
-# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
-# This code was automatically generated with version 13.1.0. Do not modify it directly.
+# This code was automatically generated with version 13.1.0, generator version 49a8141. Do not modify it directly.
 
 cdef extern from "vector_types.h":
 
diff --git a/cuda_bindings/cuda/bindings/driver.pxd.in b/cuda_bindings/cuda/bindings/driver.pxd.in
index ed992b8bd0..43d70e92f5 100644
--- a/cuda_bindings/cuda/bindings/driver.pxd.in
+++ b/cuda_bindings/cuda/bindings/driver.pxd.in
@@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
-# This code was automatically generated with version 13.1.0. Do not modify it directly.
+# This code was automatically generated with version 13.1.0, generator version 49a8141. Do not modify it directly.
 cimport cuda.bindings.cydriver as cydriver
 
 include "_lib/utils.pxd"
diff --git a/cuda_bindings/cuda/bindings/driver.pyx.in b/cuda_bindings/cuda/bindings/driver.pyx.in
index 60f510dde2..fbff464c87 100644
--- a/cuda_bindings/cuda/bindings/driver.pyx.in
+++ b/cuda_bindings/cuda/bindings/driver.pyx.in
@@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
-# This code was automatically generated with version 13.1.0, generator version c185cc3. Do not modify it directly.
+# This code was automatically generated with version 13.1.0, generator version 49a8141. Do not modify it directly.
 from typing import Any, Optional
 import cython
 import ctypes
diff --git a/cuda_bindings/cuda/bindings/nvjitlink.pxd b/cuda_bindings/cuda/bindings/nvjitlink.pxd
index 0080a46415..5155c0fbb1 100644
--- a/cuda_bindings/cuda/bindings/nvjitlink.pxd
+++ b/cuda_bindings/cuda/bindings/nvjitlink.pxd
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 #
-# This code was automatically generated across versions from 12.0.1 to 13.1.1. Do not modify it directly.
+# This code was automatically generated across versions from 12.0.1 to 13.1.1, generator version 0.3.1.dev1322+g646ce84ec. Do not modify it directly.
 
 from libc.stdint cimport intptr_t, uint32_t
 
diff --git a/cuda_bindings/cuda/bindings/nvjitlink.pyx b/cuda_bindings/cuda/bindings/nvjitlink.pyx
index 874ee55ce7..f50c76307b 100644
--- a/cuda_bindings/cuda/bindings/nvjitlink.pyx
+++ b/cuda_bindings/cuda/bindings/nvjitlink.pyx
@@ -1,8 +1,8 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 #
-# This code was automatically generated across versions from 12.0.1 to 13.1.1. Do not modify it directly.
+# This code was automatically generated across versions from 12.0.1 to 13.1.1, generator version 0.3.1.dev1322+g646ce84ec. Do not modify it directly.
 
 cimport cython  # NOQA
 
diff --git a/cuda_bindings/cuda/bindings/nvml.pxd b/cuda_bindings/cuda/bindings/nvml.pxd
index 7b37a14122..a7644091e2 100644
--- a/cuda_bindings/cuda/bindings/nvml.pxd
+++ b/cuda_bindings/cuda/bindings/nvml.pxd
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 #
-# This code was automatically generated across versions from 12.9.1 to 13.1.1, generator version 0.3.1.dev1283+gc7bc6fa75. Do not modify it directly.
+# This code was automatically generated across versions from 12.9.1 to 13.1.1, generator version 0.3.1.dev1322+g646ce84ec. Do not modify it directly.
 
 from libc.stdint cimport intptr_t
 
diff --git a/cuda_bindings/cuda/bindings/nvml.pyx b/cuda_bindings/cuda/bindings/nvml.pyx
index 990e098cec..f25485ad69 100644
--- a/cuda_bindings/cuda/bindings/nvml.pyx
+++ b/cuda_bindings/cuda/bindings/nvml.pyx
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 #
-# This code was automatically generated across versions from 12.9.1 to 13.1.1, generator version 0.3.1.dev1283+gc7bc6fa75. Do not modify it directly.
+# This code was automatically generated across versions from 12.9.1 to 13.1.1, generator version 0.3.1.dev1322+g646ce84ec. Do not modify it directly.
 
 cimport cython  # NOQA
 
@@ -36,6 +36,33 @@ cdef __from_data(data, dtype_name, expected_dtype, lowpp_type):
     return lowpp_type.from_ptr(data.ctypes.data, not data.flags.writeable, data)
 
 
+cdef __from_buffer(buffer, size, lowpp_type):
+    cdef Py_buffer view
+    if cpython.PyObject_GetBuffer(buffer, &view, cpython.PyBUF_SIMPLE) != 0:
+        raise TypeError("buffer argument does not support the buffer protocol")
+    try:
+        if view.itemsize != 1:
+            raise ValueError("buffer itemsize must be 1 byte")
+        if view.len != size:
+            raise ValueError(f"buffer length must be {size} bytes")
+        return lowpp_type.from_ptr(<intptr_t><void *>view.buf, not view.readonly, buffer)
+    finally:
+        cpython.PyBuffer_Release(&view)
+
+
+cdef __getbuffer(object self, cpython.Py_buffer *buffer, void *ptr, int size, bint readonly):
+    buffer.buf = <char *>ptr
+    buffer.format = 'b'
+    buffer.internal = NULL
+    buffer.itemsize = 1
+    buffer.len = size
+    buffer.ndim = 1
+    buffer.obj = self
+    buffer.readonly = readonly
+    buffer.shape = &buffer.len
+    buffer.strides = &buffer.itemsize
+    buffer.suboffsets = NULL
+
 
 cdef inline unsigned int NVML_VERSION_STRUCT(const unsigned int size, const unsigned int ver) nogil:
     return (size | (ver << 24))
@@ -2074,6 +2101,12 @@ cdef class PciInfoExt_v1:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlPciInfoExt_v1_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlPciInfoExt_v1_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlPciInfoExt_v1_t *>malloc(sizeof(nvmlPciInfoExt_v1_t))
@@ -2189,6 +2222,11 @@ cdef class PciInfoExt_v1:
         cdef char *ptr = buf
         memcpy(<void *>(self._ptr[0].busId), <void *>ptr, 32)
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an PciInfoExt_v1 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlPciInfoExt_v1_t), PciInfoExt_v1)
+
     @staticmethod
     def from_data(data):
         """Create an PciInfoExt_v1 instance wrapping the given NumPy array.
@@ -2292,6 +2330,12 @@ cdef class PciInfo:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlPciInfo_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlPciInfo_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlPciInfo_t *>malloc(sizeof(nvmlPciInfo_t))
@@ -2389,6 +2433,11 @@ cdef class PciInfo:
         cdef char *ptr = buf
         memcpy(<void *>(self._ptr[0].busId), <void *>ptr, 32)
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an PciInfo instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlPciInfo_t), PciInfo)
+
     @staticmethod
     def from_data(data):
         """Create an PciInfo instance wrapping the given NumPy array.
@@ -2487,6 +2536,12 @@ cdef class Utilization:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlUtilization_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlUtilization_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlUtilization_t *>malloc(sizeof(nvmlUtilization_t))
@@ -2521,6 +2576,11 @@ cdef class Utilization:
             raise ValueError("This Utilization instance is read-only")
         self._ptr[0].memory = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an Utilization instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlUtilization_t), Utilization)
+
     @staticmethod
     def from_data(data):
         """Create an Utilization instance wrapping the given NumPy array.
@@ -2620,6 +2680,12 @@ cdef class Memory:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlMemory_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlMemory_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlMemory_t *>malloc(sizeof(nvmlMemory_t))
@@ -2665,6 +2731,11 @@ cdef class Memory:
             raise ValueError("This Memory instance is read-only")
         self._ptr[0].used = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an Memory instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlMemory_t), Memory)
+
     @staticmethod
     def from_data(data):
         """Create an Memory instance wrapping the given NumPy array.
@@ -2766,6 +2837,12 @@ cdef class Memory_v2:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlMemory_v2_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlMemory_v2_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlMemory_v2_t *>malloc(sizeof(nvmlMemory_v2_t))
@@ -2833,6 +2910,11 @@ cdef class Memory_v2:
             raise ValueError("This Memory_v2 instance is read-only")
         self._ptr[0].used = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an Memory_v2 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlMemory_v2_t), Memory_v2)
+
     @staticmethod
     def from_data(data):
         """Create an Memory_v2 instance wrapping the given NumPy array.
@@ -2932,6 +3014,12 @@ cdef class BAR1Memory:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlBAR1Memory_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlBAR1Memory_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlBAR1Memory_t *>malloc(sizeof(nvmlBAR1Memory_t))
@@ -2977,6 +3065,11 @@ cdef class BAR1Memory:
             raise ValueError("This BAR1Memory instance is read-only")
         self._ptr[0].bar1Used = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an BAR1Memory instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlBAR1Memory_t), BAR1Memory)
+
     @staticmethod
     def from_data(data):
         """Create an BAR1Memory instance wrapping the given NumPy array.
@@ -3081,6 +3174,12 @@ cdef class ProcessInfo:
             return False
         return bool((self_data == other._data).all())
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        cpython.PyObject_GetBuffer(self._data, buffer, flags)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        cpython.PyBuffer_Release(buffer)
+
     @property
     def pid(self):
         """Union[~_numpy.uint32, int]: """
@@ -3144,6 +3243,11 @@ cdef class ProcessInfo:
     def __setitem__(self, key, val):
         self._data[key] = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an ProcessInfo instance with the memory from the given buffer."""
+        return ProcessInfo.from_data(_numpy.frombuffer(buffer, dtype=process_info_dtype))
+
     @staticmethod
     def from_data(data):
         """Create an ProcessInfo instance wrapping the given NumPy array.
@@ -3252,6 +3356,12 @@ cdef class ProcessDetail_v1:
             return False
         return bool((self_data == other._data).all())
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        cpython.PyObject_GetBuffer(self._data, buffer, flags)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        cpython.PyBuffer_Release(buffer)
+
     @property
     def pid(self):
         """Union[~_numpy.uint32, int]: Process ID."""
@@ -3326,6 +3436,11 @@ cdef class ProcessDetail_v1:
     def __setitem__(self, key, val):
         self._data[key] = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an ProcessDetail_v1 instance with the memory from the given buffer."""
+        return ProcessDetail_v1.from_data(_numpy.frombuffer(buffer, dtype=process_detail_v1_dtype))
+
     @staticmethod
     def from_data(data):
         """Create an ProcessDetail_v1 instance wrapping the given NumPy array.
@@ -3434,6 +3549,12 @@ cdef class DeviceAttributes:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlDeviceAttributes_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlDeviceAttributes_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlDeviceAttributes_t *>malloc(sizeof(nvmlDeviceAttributes_t))
@@ -3545,6 +3666,11 @@ cdef class DeviceAttributes:
             raise ValueError("This DeviceAttributes instance is read-only")
         self._ptr[0].memorySizeMB = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an DeviceAttributes instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlDeviceAttributes_t), DeviceAttributes)
+
     @staticmethod
     def from_data(data):
         """Create an DeviceAttributes instance wrapping the given NumPy array.
@@ -3642,6 +3768,12 @@ cdef class C2cModeInfo_v1:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlC2cModeInfo_v1_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlC2cModeInfo_v1_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlC2cModeInfo_v1_t *>malloc(sizeof(nvmlC2cModeInfo_v1_t))
@@ -3665,6 +3797,11 @@ cdef class C2cModeInfo_v1:
             raise ValueError("This C2cModeInfo_v1 instance is read-only")
         self._ptr[0].isC2cEnabled = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an C2cModeInfo_v1 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlC2cModeInfo_v1_t), C2cModeInfo_v1)
+
     @staticmethod
     def from_data(data):
         """Create an C2cModeInfo_v1 instance wrapping the given NumPy array.
@@ -3766,6 +3903,12 @@ cdef class RowRemapperHistogramValues:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlRowRemapperHistogramValues_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlRowRemapperHistogramValues_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlRowRemapperHistogramValues_t *>malloc(sizeof(nvmlRowRemapperHistogramValues_t))
@@ -3833,6 +3976,11 @@ cdef class RowRemapperHistogramValues:
             raise ValueError("This RowRemapperHistogramValues instance is read-only")
         self._ptr[0].none = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an RowRemapperHistogramValues instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlRowRemapperHistogramValues_t), RowRemapperHistogramValues)
+
     @staticmethod
     def from_data(data):
         """Create an RowRemapperHistogramValues instance wrapping the given NumPy array.
@@ -3935,6 +4083,12 @@ cdef class BridgeChipInfo:
             return False
         return bool((self_data == other._data).all())
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        cpython.PyObject_GetBuffer(self._data, buffer, flags)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        cpython.PyBuffer_Release(buffer)
+
     @property
     def type(self):
         """Union[~_numpy.int32, int]: """
@@ -3976,6 +4130,11 @@ cdef class BridgeChipInfo:
     def __setitem__(self, key, val):
         self._data[key] = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an BridgeChipInfo instance with the memory from the given buffer."""
+        return BridgeChipInfo.from_data(_numpy.frombuffer(buffer, dtype=bridge_chip_info_dtype))
+
     @staticmethod
     def from_data(data):
         """Create an BridgeChipInfo instance wrapping the given NumPy array.
@@ -4077,6 +4236,12 @@ cdef class Value:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlValue_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlValue_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlValue_t *>malloc(sizeof(nvmlValue_t))
@@ -4166,6 +4331,11 @@ cdef class Value:
             raise ValueError("This Value instance is read-only")
         self._ptr[0].usVal = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an Value instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlValue_t), Value)
+
     @staticmethod
     def from_data(data):
         """Create an Value instance wrapping the given NumPy array.
@@ -4267,6 +4437,12 @@ cdef class _py_anon_pod0:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(_anon_pod0)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(_anon_pod0), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <_anon_pod0 *>malloc(sizeof(_anon_pod0))
@@ -4334,6 +4510,11 @@ cdef class _py_anon_pod0:
             raise ValueError("This _py_anon_pod0 instance is read-only")
         self._ptr[0].target = <nvmlThermalTarget_t><int>val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an _py_anon_pod0 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(_anon_pod0), _py_anon_pod0)
+
     @staticmethod
     def from_data(data):
         """Create an _py_anon_pod0 instance wrapping the given NumPy array.
@@ -4434,6 +4615,12 @@ cdef class CoolerInfo_v1:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlCoolerInfo_v1_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlCoolerInfo_v1_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlCoolerInfo_v1_t *>malloc(sizeof(nvmlCoolerInfo_v1_t))
@@ -4490,6 +4677,11 @@ cdef class CoolerInfo_v1:
             raise ValueError("This CoolerInfo_v1 instance is read-only")
         self._ptr[0].target = <nvmlCoolerTarget_t><int>val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an CoolerInfo_v1 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlCoolerInfo_v1_t), CoolerInfo_v1)
+
     @staticmethod
     def from_data(data):
         """Create an CoolerInfo_v1 instance wrapping the given NumPy array.
@@ -4592,6 +4784,12 @@ cdef class ClkMonFaultInfo:
             return False
         return bool((self_data == other._data).all())
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        cpython.PyObject_GetBuffer(self._data, buffer, flags)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        cpython.PyBuffer_Release(buffer)
+
     @property
     def clk_api_domain(self):
         """Union[~_numpy.uint32, int]: """
@@ -4633,6 +4831,11 @@ cdef class ClkMonFaultInfo:
     def __setitem__(self, key, val):
         self._data[key] = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an ClkMonFaultInfo instance with the memory from the given buffer."""
+        return ClkMonFaultInfo.from_data(_numpy.frombuffer(buffer, dtype=clk_mon_fault_info_dtype))
+
     @staticmethod
     def from_data(data):
         """Create an ClkMonFaultInfo instance wrapping the given NumPy array.
@@ -4738,6 +4941,12 @@ cdef class ClockOffset_v1:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlClockOffset_v1_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlClockOffset_v1_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlClockOffset_v1_t *>malloc(sizeof(nvmlClockOffset_v1_t))
@@ -4816,6 +5025,11 @@ cdef class ClockOffset_v1:
             raise ValueError("This ClockOffset_v1 instance is read-only")
         self._ptr[0].maxClockOffsetMHz = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an ClockOffset_v1 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlClockOffset_v1_t), ClockOffset_v1)
+
     @staticmethod
     def from_data(data):
         """Create an ClockOffset_v1 instance wrapping the given NumPy array.
@@ -4922,6 +5136,12 @@ cdef class ProcessUtilizationSample:
             return False
         return bool((self_data == other._data).all())
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        cpython.PyObject_GetBuffer(self._data, buffer, flags)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        cpython.PyBuffer_Release(buffer)
+
     @property
     def pid(self):
         """Union[~_numpy.uint32, int]: """
@@ -5007,6 +5227,11 @@ cdef class ProcessUtilizationSample:
     def __setitem__(self, key, val):
         self._data[key] = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an ProcessUtilizationSample instance with the memory from the given buffer."""
+        return ProcessUtilizationSample.from_data(_numpy.frombuffer(buffer, dtype=process_utilization_sample_dtype))
+
     @staticmethod
     def from_data(data):
         """Create an ProcessUtilizationSample instance wrapping the given NumPy array.
@@ -5118,6 +5343,12 @@ cdef class ProcessUtilizationInfo_v1:
             return False
         return bool((self_data == other._data).all())
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        cpython.PyObject_GetBuffer(self._data, buffer, flags)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        cpython.PyBuffer_Release(buffer)
+
     @property
     def time_stamp(self):
         """Union[~_numpy.uint64, int]: CPU Timestamp in microseconds."""
@@ -5225,6 +5456,11 @@ cdef class ProcessUtilizationInfo_v1:
     def __setitem__(self, key, val):
         self._data[key] = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an ProcessUtilizationInfo_v1 instance with the memory from the given buffer."""
+        return ProcessUtilizationInfo_v1.from_data(_numpy.frombuffer(buffer, dtype=process_utilization_info_v1_dtype))
+
     @staticmethod
     def from_data(data):
         """Create an ProcessUtilizationInfo_v1 instance wrapping the given NumPy array.
@@ -5337,6 +5573,12 @@ cdef class EccSramErrorStatus_v1:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlEccSramErrorStatus_v1_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlEccSramErrorStatus_v1_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlEccSramErrorStatus_v1_t *>malloc(sizeof(nvmlEccSramErrorStatus_v1_t))
@@ -5492,6 +5734,11 @@ cdef class EccSramErrorStatus_v1:
             raise ValueError("This EccSramErrorStatus_v1 instance is read-only")
         self._ptr[0].bThresholdExceeded = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an EccSramErrorStatus_v1 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlEccSramErrorStatus_v1_t), EccSramErrorStatus_v1)
+
     @staticmethod
     def from_data(data):
         """Create an EccSramErrorStatus_v1 instance wrapping the given NumPy array.
@@ -5596,6 +5843,12 @@ cdef class PlatformInfo_v1:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlPlatformInfo_v1_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlPlatformInfo_v1_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlPlatformInfo_v1_t *>malloc(sizeof(nvmlPlatformInfo_v1_t))
@@ -5708,6 +5961,11 @@ cdef class PlatformInfo_v1:
             raise ValueError("This PlatformInfo_v1 instance is read-only")
         self._ptr[0].moduleId = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an PlatformInfo_v1 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlPlatformInfo_v1_t), PlatformInfo_v1)
+
     @staticmethod
     def from_data(data):
         """Create an PlatformInfo_v1 instance wrapping the given NumPy array.
@@ -5812,6 +6070,12 @@ cdef class PlatformInfo_v2:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlPlatformInfo_v2_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlPlatformInfo_v2_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlPlatformInfo_v2_t *>malloc(sizeof(nvmlPlatformInfo_v2_t))
@@ -5924,6 +6188,11 @@ cdef class PlatformInfo_v2:
             raise ValueError("This PlatformInfo_v2 instance is read-only")
         self._ptr[0].moduleId = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an PlatformInfo_v2 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlPlatformInfo_v2_t), PlatformInfo_v2)
+
     @staticmethod
     def from_data(data):
         """Create an PlatformInfo_v2 instance wrapping the given NumPy array.
@@ -6024,6 +6293,12 @@ cdef class _py_anon_pod1:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(_anon_pod1)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(_anon_pod1), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <_anon_pod1 *>malloc(sizeof(_anon_pod1))
@@ -6080,6 +6355,11 @@ cdef class _py_anon_pod1:
             raise ValueError("This _py_anon_pod1 instance is read-only")
         self._ptr[0].decThreshold = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an _py_anon_pod1 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(_anon_pod1), _py_anon_pod1)
+
     @staticmethod
     def from_data(data):
         """Create an _py_anon_pod1 instance wrapping the given NumPy array.
@@ -6183,6 +6463,12 @@ cdef class VgpuPlacementList_v2:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlVgpuPlacementList_v2_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlVgpuPlacementList_v2_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlVgpuPlacementList_v2_t *>malloc(sizeof(nvmlVgpuPlacementList_v2_t))
@@ -6247,6 +6533,11 @@ cdef class VgpuPlacementList_v2:
             raise ValueError("This VgpuPlacementList_v2 instance is read-only")
         self._ptr[0].mode = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an VgpuPlacementList_v2 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlVgpuPlacementList_v2_t), VgpuPlacementList_v2)
+
     @staticmethod
     def from_data(data):
         """Create an VgpuPlacementList_v2 instance wrapping the given NumPy array.
@@ -6346,6 +6637,12 @@ cdef class VgpuTypeBar1Info_v1:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlVgpuTypeBar1Info_v1_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlVgpuTypeBar1Info_v1_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlVgpuTypeBar1Info_v1_t *>malloc(sizeof(nvmlVgpuTypeBar1Info_v1_t))
@@ -6380,6 +6677,11 @@ cdef class VgpuTypeBar1Info_v1:
             raise ValueError("This VgpuTypeBar1Info_v1 instance is read-only")
         self._ptr[0].bar1Size = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an VgpuTypeBar1Info_v1 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlVgpuTypeBar1Info_v1_t), VgpuTypeBar1Info_v1)
+
     @staticmethod
     def from_data(data):
         """Create an VgpuTypeBar1Info_v1 instance wrapping the given NumPy array.
@@ -6490,6 +6792,12 @@ cdef class VgpuProcessUtilizationInfo_v1:
             return False
         return bool((self_data == other._data).all())
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        cpython.PyObject_GetBuffer(self._data, buffer, flags)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        cpython.PyBuffer_Release(buffer)
+
     @property
     def process_name(self):
         """~_numpy.int8: (array of length 64).Name of process running within the vGPU VM."""
@@ -6617,6 +6925,11 @@ cdef class VgpuProcessUtilizationInfo_v1:
     def __setitem__(self, key, val):
         self._data[key] = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an VgpuProcessUtilizationInfo_v1 instance with the memory from the given buffer."""
+        return VgpuProcessUtilizationInfo_v1.from_data(_numpy.frombuffer(buffer, dtype=vgpu_process_utilization_info_v1_dtype))
+
     @staticmethod
     def from_data(data):
         """Create an VgpuProcessUtilizationInfo_v1 instance wrapping the given NumPy array.
@@ -6718,6 +7031,12 @@ cdef class _py_anon_pod2:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(_anon_pod2)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(_anon_pod2), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <_anon_pod2 *>malloc(sizeof(_anon_pod2))
@@ -6752,6 +7071,11 @@ cdef class _py_anon_pod2:
             raise ValueError("This _py_anon_pod2 instance is read-only")
         self._ptr[0].timeslice = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an _py_anon_pod2 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(_anon_pod2), _py_anon_pod2)
+
     @staticmethod
     def from_data(data):
         """Create an _py_anon_pod2 instance wrapping the given NumPy array.
@@ -6849,6 +7173,12 @@ cdef class _py_anon_pod3:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(_anon_pod3)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(_anon_pod3), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <_anon_pod3 *>malloc(sizeof(_anon_pod3))
@@ -6872,6 +7202,11 @@ cdef class _py_anon_pod3:
             raise ValueError("This _py_anon_pod3 instance is read-only")
         self._ptr[0].timeslice = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an _py_anon_pod3 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(_anon_pod3), _py_anon_pod3)
+
     @staticmethod
     def from_data(data):
         """Create an _py_anon_pod3 instance wrapping the given NumPy array.
@@ -6978,6 +7313,12 @@ cdef class VgpuSchedulerLogEntry:
             return False
         return bool((self_data == other._data).all())
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        cpython.PyObject_GetBuffer(self._data, buffer, flags)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        cpython.PyBuffer_Release(buffer)
+
     @property
     def timestamp(self):
         """Union[~_numpy.uint64, int]: """
@@ -7063,6 +7404,11 @@ cdef class VgpuSchedulerLogEntry:
     def __setitem__(self, key, val):
         self._data[key] = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an VgpuSchedulerLogEntry instance with the memory from the given buffer."""
+        return VgpuSchedulerLogEntry.from_data(_numpy.frombuffer(buffer, dtype=vgpu_scheduler_log_entry_dtype))
+
     @staticmethod
     def from_data(data):
         """Create an VgpuSchedulerLogEntry instance wrapping the given NumPy array.
@@ -7164,6 +7510,12 @@ cdef class _py_anon_pod4:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(_anon_pod4)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(_anon_pod4), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <_anon_pod4 *>malloc(sizeof(_anon_pod4))
@@ -7198,6 +7550,11 @@ cdef class _py_anon_pod4:
             raise ValueError("This _py_anon_pod4 instance is read-only")
         self._ptr[0].frequency = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an _py_anon_pod4 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(_anon_pod4), _py_anon_pod4)
+
     @staticmethod
     def from_data(data):
         """Create an _py_anon_pod4 instance wrapping the given NumPy array.
@@ -7295,6 +7652,12 @@ cdef class _py_anon_pod5:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(_anon_pod5)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(_anon_pod5), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <_anon_pod5 *>malloc(sizeof(_anon_pod5))
@@ -7318,6 +7681,11 @@ cdef class _py_anon_pod5:
             raise ValueError("This _py_anon_pod5 instance is read-only")
         self._ptr[0].timeslice = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an _py_anon_pod5 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(_anon_pod5), _py_anon_pod5)
+
     @staticmethod
     def from_data(data):
         """Create an _py_anon_pod5 instance wrapping the given NumPy array.
@@ -7422,6 +7790,12 @@ cdef class VgpuSchedulerCapabilities:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlVgpuSchedulerCapabilities_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlVgpuSchedulerCapabilities_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlVgpuSchedulerCapabilities_t *>malloc(sizeof(nvmlVgpuSchedulerCapabilities_t))
@@ -7528,6 +7902,11 @@ cdef class VgpuSchedulerCapabilities:
             raise ValueError("This VgpuSchedulerCapabilities instance is read-only")
         self._ptr[0].minAvgFactorForARR = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an VgpuSchedulerCapabilities instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlVgpuSchedulerCapabilities_t), VgpuSchedulerCapabilities)
+
     @staticmethod
     def from_data(data):
         """Create an VgpuSchedulerCapabilities instance wrapping the given NumPy array.
@@ -7631,6 +8010,12 @@ cdef class VgpuLicenseExpiry:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlVgpuLicenseExpiry_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlVgpuLicenseExpiry_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlVgpuLicenseExpiry_t *>malloc(sizeof(nvmlVgpuLicenseExpiry_t))
@@ -7720,6 +8105,11 @@ cdef class VgpuLicenseExpiry:
             raise ValueError("This VgpuLicenseExpiry instance is read-only")
         self._ptr[0].status = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an VgpuLicenseExpiry instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlVgpuLicenseExpiry_t), VgpuLicenseExpiry)
+
     @staticmethod
     def from_data(data):
         """Create an VgpuLicenseExpiry instance wrapping the given NumPy array.
@@ -7823,6 +8213,12 @@ cdef class GridLicenseExpiry:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlGridLicenseExpiry_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlGridLicenseExpiry_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlGridLicenseExpiry_t *>malloc(sizeof(nvmlGridLicenseExpiry_t))
@@ -7912,6 +8308,11 @@ cdef class GridLicenseExpiry:
             raise ValueError("This GridLicenseExpiry instance is read-only")
         self._ptr[0].status = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an GridLicenseExpiry instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlGridLicenseExpiry_t), GridLicenseExpiry)
+
     @staticmethod
     def from_data(data):
         """Create an GridLicenseExpiry instance wrapping the given NumPy array.
@@ -8011,6 +8412,12 @@ cdef class VgpuTypeIdInfo_v1:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlVgpuTypeIdInfo_v1_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlVgpuTypeIdInfo_v1_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlVgpuTypeIdInfo_v1_t *>malloc(sizeof(nvmlVgpuTypeIdInfo_v1_t))
@@ -8053,6 +8460,11 @@ cdef class VgpuTypeIdInfo_v1:
         self._ptr[0].vgpuCount = len(val)
         self._refs["vgpu_type_ids"] = arr
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an VgpuTypeIdInfo_v1 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlVgpuTypeIdInfo_v1_t), VgpuTypeIdInfo_v1)
+
     @staticmethod
     def from_data(data):
         """Create an VgpuTypeIdInfo_v1 instance wrapping the given NumPy array.
@@ -8152,6 +8564,12 @@ cdef class ActiveVgpuInstanceInfo_v1:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlActiveVgpuInstanceInfo_v1_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlActiveVgpuInstanceInfo_v1_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlActiveVgpuInstanceInfo_v1_t *>malloc(sizeof(nvmlActiveVgpuInstanceInfo_v1_t))
@@ -8194,6 +8612,11 @@ cdef class ActiveVgpuInstanceInfo_v1:
         self._ptr[0].vgpuCount = len(val)
         self._refs["vgpu_instances"] = arr
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an ActiveVgpuInstanceInfo_v1 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlActiveVgpuInstanceInfo_v1_t), ActiveVgpuInstanceInfo_v1)
+
     @staticmethod
     def from_data(data):
         """Create an ActiveVgpuInstanceInfo_v1 instance wrapping the given NumPy array.
@@ -8297,6 +8720,12 @@ cdef class VgpuCreatablePlacementInfo_v1:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlVgpuCreatablePlacementInfo_v1_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlVgpuCreatablePlacementInfo_v1_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlVgpuCreatablePlacementInfo_v1_t *>malloc(sizeof(nvmlVgpuCreatablePlacementInfo_v1_t))
@@ -8361,6 +8790,11 @@ cdef class VgpuCreatablePlacementInfo_v1:
         self._ptr[0].placementSize = len(val)
         self._refs["placement_ids"] = arr
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an VgpuCreatablePlacementInfo_v1 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlVgpuCreatablePlacementInfo_v1_t), VgpuCreatablePlacementInfo_v1)
+
     @staticmethod
     def from_data(data):
         """Create an VgpuCreatablePlacementInfo_v1 instance wrapping the given NumPy array.
@@ -8464,6 +8898,12 @@ cdef class HwbcEntry:
             return False
         return bool((self_data == other._data).all())
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        cpython.PyObject_GetBuffer(self._data, buffer, flags)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        cpython.PyBuffer_Release(buffer)
+
     @property
     def hwbc_id(self):
         """Union[~_numpy.uint32, int]: """
@@ -8503,6 +8943,11 @@ cdef class HwbcEntry:
     def __setitem__(self, key, val):
         self._data[key] = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an HwbcEntry instance with the memory from the given buffer."""
+        return HwbcEntry.from_data(_numpy.frombuffer(buffer, dtype=hwbc_entry_dtype))
+
     @staticmethod
     def from_data(data):
         """Create an HwbcEntry instance wrapping the given NumPy array.
@@ -8604,6 +9049,12 @@ cdef class LedState:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlLedState_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlLedState_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlLedState_t *>malloc(sizeof(nvmlLedState_t))
@@ -8642,6 +9093,11 @@ cdef class LedState:
             raise ValueError("This LedState instance is read-only")
         self._ptr[0].color = <nvmlLedColor_t><int>val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an LedState instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlLedState_t), LedState)
+
     @staticmethod
     def from_data(data):
         """Create an LedState instance wrapping the given NumPy array.
@@ -8742,6 +9198,12 @@ cdef class UnitInfo:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlUnitInfo_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlUnitInfo_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlUnitInfo_t *>malloc(sizeof(nvmlUnitInfo_t))
@@ -8814,6 +9276,11 @@ cdef class UnitInfo:
         cdef char *ptr = buf
         memcpy(<void *>(self._ptr[0].firmwareVersion), <void *>ptr, 96)
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an UnitInfo instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlUnitInfo_t), UnitInfo)
+
     @staticmethod
     def from_data(data):
         """Create an UnitInfo instance wrapping the given NumPy array.
@@ -8914,6 +9381,12 @@ cdef class PSUInfo:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlPSUInfo_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlPSUInfo_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlPSUInfo_t *>malloc(sizeof(nvmlPSUInfo_t))
@@ -8974,6 +9447,11 @@ cdef class PSUInfo:
             raise ValueError("This PSUInfo instance is read-only")
         self._ptr[0].power = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an PSUInfo instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlPSUInfo_t), PSUInfo)
+
     @staticmethod
     def from_data(data):
         """Create an PSUInfo instance wrapping the given NumPy array.
@@ -9076,6 +9554,12 @@ cdef class UnitFanInfo:
             return False
         return bool((self_data == other._data).all())
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        cpython.PyObject_GetBuffer(self._data, buffer, flags)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        cpython.PyBuffer_Release(buffer)
+
     @property
     def speed(self):
         """Union[~_numpy.uint32, int]: """
@@ -9117,6 +9601,11 @@ cdef class UnitFanInfo:
     def __setitem__(self, key, val):
         self._data[key] = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an UnitFanInfo instance with the memory from the given buffer."""
+        return UnitFanInfo.from_data(_numpy.frombuffer(buffer, dtype=unit_fan_info_dtype))
+
     @staticmethod
     def from_data(data):
         """Create an UnitFanInfo instance wrapping the given NumPy array.
@@ -9221,6 +9710,12 @@ cdef class EventData:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlEventData_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlEventData_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlEventData_t *>malloc(sizeof(nvmlEventData_t))
@@ -9288,6 +9783,11 @@ cdef class EventData:
             raise ValueError("This EventData instance is read-only")
         self._ptr[0].computeInstanceId = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an EventData instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlEventData_t), EventData)
+
     @staticmethod
     def from_data(data):
         """Create an EventData instance wrapping the given NumPy array.
@@ -9390,6 +9890,12 @@ cdef class SystemEventData_v1:
             return False
         return bool((self_data == other._data).all())
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        cpython.PyObject_GetBuffer(self._data, buffer, flags)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        cpython.PyBuffer_Release(buffer)
+
     @property
     def event_type(self):
         """Union[~_numpy.uint64, int]: Information about what specific system event occurred."""
@@ -9431,6 +9937,11 @@ cdef class SystemEventData_v1:
     def __setitem__(self, key, val):
         self._data[key] = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an SystemEventData_v1 instance with the memory from the given buffer."""
+        return SystemEventData_v1.from_data(_numpy.frombuffer(buffer, dtype=system_event_data_v1_dtype))
+
     @staticmethod
     def from_data(data):
         """Create an SystemEventData_v1 instance wrapping the given NumPy array.
@@ -9537,6 +10048,12 @@ cdef class AccountingStats:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlAccountingStats_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlAccountingStats_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlAccountingStats_t *>malloc(sizeof(nvmlAccountingStats_t))
@@ -9615,6 +10132,11 @@ cdef class AccountingStats:
             raise ValueError("This AccountingStats instance is read-only")
         self._ptr[0].isRunning = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an AccountingStats instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlAccountingStats_t), AccountingStats)
+
     @staticmethod
     def from_data(data):
         """Create an AccountingStats instance wrapping the given NumPy array.
@@ -9723,6 +10245,12 @@ cdef class EncoderSessionInfo:
             return False
         return bool((self_data == other._data).all())
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        cpython.PyObject_GetBuffer(self._data, buffer, flags)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        cpython.PyBuffer_Release(buffer)
+
     @property
     def session_id(self):
         """Union[~_numpy.uint32, int]: """
@@ -9830,6 +10358,11 @@ cdef class EncoderSessionInfo:
     def __setitem__(self, key, val):
         self._data[key] = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an EncoderSessionInfo instance with the memory from the given buffer."""
+        return EncoderSessionInfo.from_data(_numpy.frombuffer(buffer, dtype=encoder_session_info_dtype))
+
     @staticmethod
     def from_data(data):
         """Create an EncoderSessionInfo instance wrapping the given NumPy array.
@@ -9932,6 +10465,12 @@ cdef class FBCStats:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlFBCStats_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlFBCStats_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlFBCStats_t *>malloc(sizeof(nvmlFBCStats_t))
@@ -9977,6 +10516,11 @@ cdef class FBCStats:
             raise ValueError("This FBCStats instance is read-only")
         self._ptr[0].averageLatency = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an FBCStats instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlFBCStats_t), FBCStats)
+
     @staticmethod
     def from_data(data):
         """Create an FBCStats instance wrapping the given NumPy array.
@@ -10089,6 +10633,12 @@ cdef class FBCSessionInfo:
             return False
         return bool((self_data == other._data).all())
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        cpython.PyObject_GetBuffer(self._data, buffer, flags)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        cpython.PyBuffer_Release(buffer)
+
     @property
     def session_id(self):
         """Union[~_numpy.uint32, int]: """
@@ -10240,6 +10790,11 @@ cdef class FBCSessionInfo:
     def __setitem__(self, key, val):
         self._data[key] = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an FBCSessionInfo instance with the memory from the given buffer."""
+        return FBCSessionInfo.from_data(_numpy.frombuffer(buffer, dtype=fbc_session_info_dtype))
+
     @staticmethod
     def from_data(data):
         """Create an FBCSessionInfo instance wrapping the given NumPy array.
@@ -10341,6 +10896,12 @@ cdef class ConfComputeSystemCaps:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlConfComputeSystemCaps_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlConfComputeSystemCaps_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlConfComputeSystemCaps_t *>malloc(sizeof(nvmlConfComputeSystemCaps_t))
@@ -10375,6 +10936,11 @@ cdef class ConfComputeSystemCaps:
             raise ValueError("This ConfComputeSystemCaps instance is read-only")
         self._ptr[0].gpusCaps = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an ConfComputeSystemCaps instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlConfComputeSystemCaps_t), ConfComputeSystemCaps)
+
     @staticmethod
     def from_data(data):
         """Create an ConfComputeSystemCaps instance wrapping the given NumPy array.
@@ -10474,6 +11040,12 @@ cdef class ConfComputeSystemState:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlConfComputeSystemState_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlConfComputeSystemState_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlConfComputeSystemState_t *>malloc(sizeof(nvmlConfComputeSystemState_t))
@@ -10519,6 +11091,11 @@ cdef class ConfComputeSystemState:
             raise ValueError("This ConfComputeSystemState instance is read-only")
         self._ptr[0].devToolsMode = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an ConfComputeSystemState instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlConfComputeSystemState_t), ConfComputeSystemState)
+
     @staticmethod
     def from_data(data):
         """Create an ConfComputeSystemState instance wrapping the given NumPy array.
@@ -10620,6 +11197,12 @@ cdef class SystemConfComputeSettings_v1:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlSystemConfComputeSettings_v1_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlSystemConfComputeSettings_v1_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlSystemConfComputeSettings_v1_t *>malloc(sizeof(nvmlSystemConfComputeSettings_v1_t))
@@ -10687,6 +11270,11 @@ cdef class SystemConfComputeSettings_v1:
             raise ValueError("This SystemConfComputeSettings_v1 instance is read-only")
         self._ptr[0].multiGpuMode = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an SystemConfComputeSettings_v1 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlSystemConfComputeSettings_v1_t), SystemConfComputeSettings_v1)
+
     @staticmethod
     def from_data(data):
         """Create an SystemConfComputeSettings_v1 instance wrapping the given NumPy array.
@@ -10785,6 +11373,12 @@ cdef class ConfComputeMemSizeInfo:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlConfComputeMemSizeInfo_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlConfComputeMemSizeInfo_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlConfComputeMemSizeInfo_t *>malloc(sizeof(nvmlConfComputeMemSizeInfo_t))
@@ -10819,6 +11413,11 @@ cdef class ConfComputeMemSizeInfo:
             raise ValueError("This ConfComputeMemSizeInfo instance is read-only")
         self._ptr[0].unprotectedMemSizeKib = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an ConfComputeMemSizeInfo instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlConfComputeMemSizeInfo_t), ConfComputeMemSizeInfo)
+
     @staticmethod
     def from_data(data):
         """Create an ConfComputeMemSizeInfo instance wrapping the given NumPy array.
@@ -10919,6 +11518,12 @@ cdef class ConfComputeGpuCertificate:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlConfComputeGpuCertificate_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlConfComputeGpuCertificate_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlConfComputeGpuCertificate_t *>malloc(sizeof(nvmlConfComputeGpuCertificate_t))
@@ -10975,6 +11580,11 @@ cdef class ConfComputeGpuCertificate:
         arr[:] = _numpy.asarray(val, dtype=_numpy.uint8)
         memcpy(<void *>(&(self._ptr[0].attestationCertChain)), <void *>(arr.data), sizeof(unsigned char) * len(val))
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an ConfComputeGpuCertificate instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlConfComputeGpuCertificate_t), ConfComputeGpuCertificate)
+
     @staticmethod
     def from_data(data):
         """Create an ConfComputeGpuCertificate instance wrapping the given NumPy array.
@@ -11077,6 +11687,12 @@ cdef class ConfComputeGpuAttestationReport:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlConfComputeGpuAttestationReport_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlConfComputeGpuAttestationReport_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlConfComputeGpuAttestationReport_t *>malloc(sizeof(nvmlConfComputeGpuAttestationReport_t))
@@ -11161,6 +11777,11 @@ cdef class ConfComputeGpuAttestationReport:
         arr[:] = _numpy.asarray(val, dtype=_numpy.uint8)
         memcpy(<void *>(&(self._ptr[0].cecAttestationReport)), <void *>(arr.data), sizeof(unsigned char) * len(val))
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an ConfComputeGpuAttestationReport instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlConfComputeGpuAttestationReport_t), ConfComputeGpuAttestationReport)
+
     @staticmethod
     def from_data(data):
         """Create an ConfComputeGpuAttestationReport instance wrapping the given NumPy array.
@@ -11263,6 +11884,12 @@ cdef class GpuFabricInfo_v2:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlGpuFabricInfo_v2_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlGpuFabricInfo_v2_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlGpuFabricInfo_v2_t *>malloc(sizeof(nvmlGpuFabricInfo_v2_t))
@@ -11347,6 +11974,11 @@ cdef class GpuFabricInfo_v2:
             raise ValueError("This GpuFabricInfo_v2 instance is read-only")
         self._ptr[0].healthMask = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an GpuFabricInfo_v2 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlGpuFabricInfo_v2_t), GpuFabricInfo_v2)
+
     @staticmethod
     def from_data(data):
         """Create an GpuFabricInfo_v2 instance wrapping the given NumPy array.
@@ -11446,6 +12078,12 @@ cdef class NvlinkSupportedBwModes_v1:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlNvlinkSupportedBwModes_v1_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlNvlinkSupportedBwModes_v1_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlNvlinkSupportedBwModes_v1_t *>malloc(sizeof(nvmlNvlinkSupportedBwModes_v1_t))
@@ -11491,6 +12129,11 @@ cdef class NvlinkSupportedBwModes_v1:
         arr[:] = _numpy.asarray(val, dtype=_numpy.uint8)
         memcpy(<void *>(&(self._ptr[0].bwModes)), <void *>(arr.data), sizeof(unsigned char) * len(val))
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an NvlinkSupportedBwModes_v1 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlNvlinkSupportedBwModes_v1_t), NvlinkSupportedBwModes_v1)
+
     @staticmethod
     def from_data(data):
         """Create an NvlinkSupportedBwModes_v1 instance wrapping the given NumPy array.
@@ -11590,6 +12233,12 @@ cdef class NvlinkGetBwMode_v1:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlNvlinkGetBwMode_v1_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlNvlinkGetBwMode_v1_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlNvlinkGetBwMode_v1_t *>malloc(sizeof(nvmlNvlinkGetBwMode_v1_t))
@@ -11635,6 +12284,11 @@ cdef class NvlinkGetBwMode_v1:
             raise ValueError("This NvlinkGetBwMode_v1 instance is read-only")
         self._ptr[0].bwMode = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an NvlinkGetBwMode_v1 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlNvlinkGetBwMode_v1_t), NvlinkGetBwMode_v1)
+
     @staticmethod
     def from_data(data):
         """Create an NvlinkGetBwMode_v1 instance wrapping the given NumPy array.
@@ -11734,6 +12388,12 @@ cdef class NvlinkSetBwMode_v1:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlNvlinkSetBwMode_v1_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlNvlinkSetBwMode_v1_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlNvlinkSetBwMode_v1_t *>malloc(sizeof(nvmlNvlinkSetBwMode_v1_t))
@@ -11779,6 +12439,11 @@ cdef class NvlinkSetBwMode_v1:
             raise ValueError("This NvlinkSetBwMode_v1 instance is read-only")
         self._ptr[0].bwMode = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an NvlinkSetBwMode_v1 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlNvlinkSetBwMode_v1_t), NvlinkSetBwMode_v1)
+
     @staticmethod
     def from_data(data):
         """Create an NvlinkSetBwMode_v1 instance wrapping the given NumPy array.
@@ -11877,6 +12542,12 @@ cdef class VgpuVersion:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlVgpuVersion_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlVgpuVersion_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlVgpuVersion_t *>malloc(sizeof(nvmlVgpuVersion_t))
@@ -11911,6 +12582,11 @@ cdef class VgpuVersion:
             raise ValueError("This VgpuVersion instance is read-only")
         self._ptr[0].maxVersion = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an VgpuVersion instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlVgpuVersion_t), VgpuVersion)
+
     @staticmethod
     def from_data(data):
         """Create an VgpuVersion instance wrapping the given NumPy array.
@@ -12017,6 +12693,12 @@ cdef class VgpuMetadata:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlVgpuMetadata_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlVgpuMetadata_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlVgpuMetadata_t *>malloc(sizeof(nvmlVgpuMetadata_t))
@@ -12140,6 +12822,11 @@ cdef class VgpuMetadata:
         cdef char *ptr = buf
         memcpy(<void *>(self._ptr[0].opaqueData), <void *>ptr, 4)
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an VgpuMetadata instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlVgpuMetadata_t), VgpuMetadata)
+
     @staticmethod
     def from_data(data):
         """Create an VgpuMetadata instance wrapping the given NumPy array.
@@ -12238,6 +12925,12 @@ cdef class VgpuPgpuCompatibility:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlVgpuPgpuCompatibility_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlVgpuPgpuCompatibility_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlVgpuPgpuCompatibility_t *>malloc(sizeof(nvmlVgpuPgpuCompatibility_t))
@@ -12272,6 +12965,11 @@ cdef class VgpuPgpuCompatibility:
             raise ValueError("This VgpuPgpuCompatibility instance is read-only")
         self._ptr[0].compatibilityLimitCode = <nvmlVgpuPgpuCompatibilityLimitCode_t><int>val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an VgpuPgpuCompatibility instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlVgpuPgpuCompatibility_t), VgpuPgpuCompatibility)
+
     @staticmethod
     def from_data(data):
         """Create an VgpuPgpuCompatibility instance wrapping the given NumPy array.
@@ -12374,6 +13072,12 @@ cdef class GpuInstancePlacement:
             return False
         return bool((self_data == other._data).all())
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        cpython.PyObject_GetBuffer(self._data, buffer, flags)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        cpython.PyBuffer_Release(buffer)
+
     @property
     def start(self):
         """Union[~_numpy.uint32, int]: """
@@ -12415,6 +13119,11 @@ cdef class GpuInstancePlacement:
     def __setitem__(self, key, val):
         self._data[key] = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an GpuInstancePlacement instance with the memory from the given buffer."""
+        return GpuInstancePlacement.from_data(_numpy.frombuffer(buffer, dtype=gpu_instance_placement_dtype))
+
     @staticmethod
     def from_data(data):
         """Create an GpuInstancePlacement instance wrapping the given NumPy array.
@@ -12527,6 +13236,12 @@ cdef class GpuInstanceProfileInfo_v3:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlGpuInstanceProfileInfo_v3_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlGpuInstanceProfileInfo_v3_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlGpuInstanceProfileInfo_v3_t *>malloc(sizeof(nvmlGpuInstanceProfileInfo_v3_t))
@@ -12686,6 +13401,11 @@ cdef class GpuInstanceProfileInfo_v3:
             raise ValueError("This GpuInstanceProfileInfo_v3 instance is read-only")
         self._ptr[0].capabilities = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an GpuInstanceProfileInfo_v3 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlGpuInstanceProfileInfo_v3_t), GpuInstanceProfileInfo_v3)
+
     @staticmethod
     def from_data(data):
         """Create an GpuInstanceProfileInfo_v3 instance wrapping the given NumPy array.
@@ -12788,6 +13508,12 @@ cdef class ComputeInstancePlacement:
             return False
         return bool((self_data == other._data).all())
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        cpython.PyObject_GetBuffer(self._data, buffer, flags)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        cpython.PyBuffer_Release(buffer)
+
     @property
     def start(self):
         """Union[~_numpy.uint32, int]: """
@@ -12829,6 +13555,11 @@ cdef class ComputeInstancePlacement:
     def __setitem__(self, key, val):
         self._data[key] = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an ComputeInstancePlacement instance with the memory from the given buffer."""
+        return ComputeInstancePlacement.from_data(_numpy.frombuffer(buffer, dtype=compute_instance_placement_dtype))
+
     @staticmethod
     def from_data(data):
         """Create an ComputeInstancePlacement instance wrapping the given NumPy array.
@@ -12939,6 +13670,12 @@ cdef class ComputeInstanceProfileInfo_v2:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlComputeInstanceProfileInfo_v2_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlComputeInstanceProfileInfo_v2_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlComputeInstanceProfileInfo_v2_t *>malloc(sizeof(nvmlComputeInstanceProfileInfo_v2_t))
@@ -13076,6 +13813,11 @@ cdef class ComputeInstanceProfileInfo_v2:
         cdef char *ptr = buf
         memcpy(<void *>(self._ptr[0].name), <void *>ptr, 96)
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an ComputeInstanceProfileInfo_v2 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlComputeInstanceProfileInfo_v2_t), ComputeInstanceProfileInfo_v2)
+
     @staticmethod
     def from_data(data):
         """Create an ComputeInstanceProfileInfo_v2 instance wrapping the given NumPy array.
@@ -13184,6 +13926,12 @@ cdef class ComputeInstanceProfileInfo_v3:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlComputeInstanceProfileInfo_v3_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlComputeInstanceProfileInfo_v3_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlComputeInstanceProfileInfo_v3_t *>malloc(sizeof(nvmlComputeInstanceProfileInfo_v3_t))
@@ -13332,6 +14080,11 @@ cdef class ComputeInstanceProfileInfo_v3:
             raise ValueError("This ComputeInstanceProfileInfo_v3 instance is read-only")
         self._ptr[0].capabilities = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an ComputeInstanceProfileInfo_v3 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlComputeInstanceProfileInfo_v3_t), ComputeInstanceProfileInfo_v3)
+
     @staticmethod
     def from_data(data):
         """Create an ComputeInstanceProfileInfo_v3 instance wrapping the given NumPy array.
@@ -13430,6 +14183,12 @@ cdef class DeviceAddressingMode_v1:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlDeviceAddressingMode_v1_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlDeviceAddressingMode_v1_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlDeviceAddressingMode_v1_t *>malloc(sizeof(nvmlDeviceAddressingMode_v1_t))
@@ -13464,6 +14223,11 @@ cdef class DeviceAddressingMode_v1:
             raise ValueError("This DeviceAddressingMode_v1 instance is read-only")
         self._ptr[0].value = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an DeviceAddressingMode_v1 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlDeviceAddressingMode_v1_t), DeviceAddressingMode_v1)
+
     @staticmethod
     def from_data(data):
         """Create an DeviceAddressingMode_v1 instance wrapping the given NumPy array.
@@ -13563,6 +14327,12 @@ cdef class RepairStatus_v1:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlRepairStatus_v1_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlRepairStatus_v1_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlRepairStatus_v1_t *>malloc(sizeof(nvmlRepairStatus_v1_t))
@@ -13608,6 +14378,11 @@ cdef class RepairStatus_v1:
             raise ValueError("This RepairStatus_v1 instance is read-only")
         self._ptr[0].bTpcRepairPending = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an RepairStatus_v1 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlRepairStatus_v1_t), RepairStatus_v1)
+
     @staticmethod
     def from_data(data):
         """Create an RepairStatus_v1 instance wrapping the given NumPy array.
@@ -13707,6 +14482,12 @@ cdef class DevicePowerMizerModes_v1:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlDevicePowerMizerModes_v1_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlDevicePowerMizerModes_v1_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlDevicePowerMizerModes_v1_t *>malloc(sizeof(nvmlDevicePowerMizerModes_v1_t))
@@ -13752,6 +14533,11 @@ cdef class DevicePowerMizerModes_v1:
             raise ValueError("This DevicePowerMizerModes_v1 instance is read-only")
         self._ptr[0].supportedPowerMizerModes = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an DevicePowerMizerModes_v1 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlDevicePowerMizerModes_v1_t), DevicePowerMizerModes_v1)
+
     @staticmethod
     def from_data(data):
         """Create an DevicePowerMizerModes_v1 instance wrapping the given NumPy array.
@@ -13859,6 +14645,12 @@ cdef class EccSramUniqueUncorrectedErrorEntry_v1:
             return False
         return bool((self_data == other._data).all())
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        cpython.PyObject_GetBuffer(self._data, buffer, flags)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        cpython.PyBuffer_Release(buffer)
+
     @property
     def unit(self):
         """Union[~_numpy.uint32, int]: the SRAM unit index"""
@@ -13955,6 +14747,11 @@ cdef class EccSramUniqueUncorrectedErrorEntry_v1:
     def __setitem__(self, key, val):
         self._data[key] = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an EccSramUniqueUncorrectedErrorEntry_v1 instance with the memory from the given buffer."""
+        return EccSramUniqueUncorrectedErrorEntry_v1.from_data(_numpy.frombuffer(buffer, dtype=ecc_sram_unique_uncorrected_error_entry_v1_dtype))
+
     @staticmethod
     def from_data(data):
         """Create an EccSramUniqueUncorrectedErrorEntry_v1 instance wrapping the given NumPy array.
@@ -14061,6 +14858,12 @@ cdef class GpuFabricInfo_v3:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlGpuFabricInfo_v3_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlGpuFabricInfo_v3_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlGpuFabricInfo_v3_t *>malloc(sizeof(nvmlGpuFabricInfo_v3_t))
@@ -14156,6 +14959,11 @@ cdef class GpuFabricInfo_v3:
             raise ValueError("This GpuFabricInfo_v3 instance is read-only")
         self._ptr[0].healthSummary = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an GpuFabricInfo_v3 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlGpuFabricInfo_v3_t), GpuFabricInfo_v3)
+
     @staticmethod
     def from_data(data):
         """Create an GpuFabricInfo_v3 instance wrapping the given NumPy array.
@@ -14254,6 +15062,12 @@ cdef class NvLinkInfo_v1:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlNvLinkInfo_v1_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlNvLinkInfo_v1_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlNvLinkInfo_v1_t *>malloc(sizeof(nvmlNvLinkInfo_v1_t))
@@ -14288,6 +15102,11 @@ cdef class NvLinkInfo_v1:
             raise ValueError("This NvLinkInfo_v1 instance is read-only")
         self._ptr[0].isNvleEnabled = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an NvLinkInfo_v1 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlNvLinkInfo_v1_t), NvLinkInfo_v1)
+
     @staticmethod
     def from_data(data):
         """Create an NvLinkInfo_v1 instance wrapping the given NumPy array.
@@ -14388,6 +15207,12 @@ cdef class NvlinkFirmwareVersion:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlNvlinkFirmwareVersion_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlNvlinkFirmwareVersion_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlNvlinkFirmwareVersion_t *>malloc(sizeof(nvmlNvlinkFirmwareVersion_t))
@@ -14444,6 +15269,11 @@ cdef class NvlinkFirmwareVersion:
             raise ValueError("This NvlinkFirmwareVersion instance is read-only")
         self._ptr[0].subMinor = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an NvlinkFirmwareVersion instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlNvlinkFirmwareVersion_t), NvlinkFirmwareVersion)
+
     @staticmethod
     def from_data(data):
         """Create an NvlinkFirmwareVersion instance wrapping the given NumPy array.
@@ -14541,6 +15371,12 @@ cdef class PRMCounterInput_v1:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlPRMCounterInput_v1_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlPRMCounterInput_v1_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlPRMCounterInput_v1_t *>malloc(sizeof(nvmlPRMCounterInput_v1_t))
@@ -14564,6 +15400,11 @@ cdef class PRMCounterInput_v1:
             raise ValueError("This PRMCounterInput_v1 instance is read-only")
         self._ptr[0].localPort = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an PRMCounterInput_v1 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlPRMCounterInput_v1_t), PRMCounterInput_v1)
+
     @staticmethod
     def from_data(data):
         """Create an PRMCounterInput_v1 instance wrapping the given NumPy array.
@@ -14662,6 +15503,12 @@ cdef class ExcludedDeviceInfo:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlExcludedDeviceInfo_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlExcludedDeviceInfo_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlExcludedDeviceInfo_t *>malloc(sizeof(nvmlExcludedDeviceInfo_t))
@@ -14701,6 +15548,11 @@ cdef class ExcludedDeviceInfo:
         cdef char *ptr = buf
         memcpy(<void *>(self._ptr[0].uuid), <void *>ptr, 80)
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an ExcludedDeviceInfo instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlExcludedDeviceInfo_t), ExcludedDeviceInfo)
+
     @staticmethod
     def from_data(data):
         """Create an ExcludedDeviceInfo instance wrapping the given NumPy array.
@@ -14803,6 +15655,12 @@ cdef class ProcessDetailList_v1:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlProcessDetailList_v1_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlProcessDetailList_v1_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlProcessDetailList_v1_t *>malloc(sizeof(nvmlProcessDetailList_v1_t))
@@ -14853,6 +15711,11 @@ cdef class ProcessDetailList_v1:
         self._ptr[0].numProcArrayEntries = len(arr)
         self._refs["proc_array"] = arr
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an ProcessDetailList_v1 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlProcessDetailList_v1_t), ProcessDetailList_v1)
+
     @staticmethod
     def from_data(data):
         """Create an ProcessDetailList_v1 instance wrapping the given NumPy array.
@@ -14952,6 +15815,12 @@ cdef class BridgeChipHierarchy:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlBridgeChipHierarchy_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlBridgeChipHierarchy_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlBridgeChipHierarchy_t *>malloc(sizeof(nvmlBridgeChipHierarchy_t))
@@ -14981,6 +15850,11 @@ cdef class BridgeChipHierarchy:
             return
         memcpy(<void *>&(self._ptr[0].bridgeChipInfo), <void *>(val_._get_ptr()), sizeof(nvmlBridgeChipInfo_t) * self._ptr[0].bridgeCount)
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an BridgeChipHierarchy instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlBridgeChipHierarchy_t), BridgeChipHierarchy)
+
     @staticmethod
     def from_data(data):
         """Create an BridgeChipHierarchy instance wrapping the given NumPy array.
@@ -15083,6 +15957,12 @@ cdef class Sample:
             return False
         return bool((self_data == other._data).all())
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        cpython.PyObject_GetBuffer(self._data, buffer, flags)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        cpython.PyBuffer_Release(buffer)
+
     @property
     def time_stamp(self):
         """Union[~_numpy.uint64, int]: """
@@ -15122,6 +16002,11 @@ cdef class Sample:
     def __setitem__(self, key, val):
         self._data[key] = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an Sample instance with the memory from the given buffer."""
+        return Sample.from_data(_numpy.frombuffer(buffer, dtype=sample_dtype))
+
     @staticmethod
     def from_data(data):
         """Create an Sample instance wrapping the given NumPy array.
@@ -15231,6 +16116,12 @@ cdef class VgpuInstanceUtilizationSample:
             return False
         return bool((self_data == other._data).all())
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        cpython.PyObject_GetBuffer(self._data, buffer, flags)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        cpython.PyBuffer_Release(buffer)
+
     @property
     def vgpu_instance(self):
         """Union[~_numpy.uint32, int]: """
@@ -15308,6 +16199,11 @@ cdef class VgpuInstanceUtilizationSample:
     def __setitem__(self, key, val):
         self._data[key] = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an VgpuInstanceUtilizationSample instance with the memory from the given buffer."""
+        return VgpuInstanceUtilizationSample.from_data(_numpy.frombuffer(buffer, dtype=vgpu_instance_utilization_sample_dtype))
+
     @staticmethod
     def from_data(data):
         """Create an VgpuInstanceUtilizationSample instance wrapping the given NumPy array.
@@ -15419,6 +16315,12 @@ cdef class VgpuInstanceUtilizationInfo_v1:
             return False
         return bool((self_data == other._data).all())
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        cpython.PyObject_GetBuffer(self._data, buffer, flags)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        cpython.PyBuffer_Release(buffer)
+
     @property
     def time_stamp(self):
         """Union[~_numpy.uint64, int]: CPU Timestamp in microseconds."""
@@ -15514,6 +16416,11 @@ cdef class VgpuInstanceUtilizationInfo_v1:
     def __setitem__(self, key, val):
         self._data[key] = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an VgpuInstanceUtilizationInfo_v1 instance with the memory from the given buffer."""
+        return VgpuInstanceUtilizationInfo_v1.from_data(_numpy.frombuffer(buffer, dtype=vgpu_instance_utilization_info_v1_dtype))
+
     @staticmethod
     def from_data(data):
         """Create an VgpuInstanceUtilizationInfo_v1 instance wrapping the given NumPy array.
@@ -15624,6 +16531,12 @@ cdef class FieldValue:
             return False
         return bool((self_data == other._data).all())
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        cpython.PyObject_GetBuffer(self._data, buffer, flags)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        cpython.PyBuffer_Release(buffer)
+
     @property
     def field_id(self):
         """Union[~_numpy.uint32, int]: """
@@ -15718,6 +16631,11 @@ cdef class FieldValue:
     def __setitem__(self, key, val):
         self._data[key] = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an FieldValue instance with the memory from the given buffer."""
+        return FieldValue.from_data(_numpy.frombuffer(buffer, dtype=field_value_dtype))
+
     @staticmethod
     def from_data(data):
         """Create an FieldValue instance wrapping the given NumPy array.
@@ -15820,6 +16738,12 @@ cdef class PRMCounterValue_v1:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlPRMCounterValue_v1_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlPRMCounterValue_v1_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlPRMCounterValue_v1_t *>malloc(sizeof(nvmlPRMCounterValue_v1_t))
@@ -15866,6 +16790,11 @@ cdef class PRMCounterValue_v1:
             raise ValueError("This PRMCounterValue_v1 instance is read-only")
         self._ptr[0].outputType = <nvmlValueType_t><int>val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an PRMCounterValue_v1 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlPRMCounterValue_v1_t), PRMCounterValue_v1)
+
     @staticmethod
     def from_data(data):
         """Create an PRMCounterValue_v1 instance wrapping the given NumPy array.
@@ -15964,6 +16893,12 @@ cdef class GpuThermalSettings:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlGpuThermalSettings_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlGpuThermalSettings_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlGpuThermalSettings_t *>malloc(sizeof(nvmlGpuThermalSettings_t))
@@ -16001,6 +16936,11 @@ cdef class GpuThermalSettings:
             raise ValueError("This GpuThermalSettings instance is read-only")
         self._ptr[0].count = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an GpuThermalSettings instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlGpuThermalSettings_t), GpuThermalSettings)
+
     @staticmethod
     def from_data(data):
         """Create an GpuThermalSettings instance wrapping the given NumPy array.
@@ -16100,6 +17040,12 @@ cdef class ClkMonStatus:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlClkMonStatus_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlClkMonStatus_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlClkMonStatus_t *>malloc(sizeof(nvmlClkMonStatus_t))
@@ -16140,6 +17086,11 @@ cdef class ClkMonStatus:
             raise ValueError("This ClkMonStatus instance is read-only")
         self._ptr[0].bGlobalStatus = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an ClkMonStatus instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlClkMonStatus_t), ClkMonStatus)
+
     @staticmethod
     def from_data(data):
         """Create an ClkMonStatus instance wrapping the given NumPy array.
@@ -16242,6 +17193,12 @@ cdef class ProcessesUtilizationInfo_v1:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlProcessesUtilizationInfo_v1_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlProcessesUtilizationInfo_v1_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlProcessesUtilizationInfo_v1_t *>malloc(sizeof(nvmlProcessesUtilizationInfo_v1_t))
@@ -16292,6 +17249,11 @@ cdef class ProcessesUtilizationInfo_v1:
         self._ptr[0].processSamplesCount = len(arr)
         self._refs["proc_util_array"] = arr
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an ProcessesUtilizationInfo_v1 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlProcessesUtilizationInfo_v1_t), ProcessesUtilizationInfo_v1)
+
     @staticmethod
     def from_data(data):
         """Create an ProcessesUtilizationInfo_v1 instance wrapping the given NumPy array.
@@ -16391,6 +17353,12 @@ cdef class GpuDynamicPstatesInfo:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlGpuDynamicPstatesInfo_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlGpuDynamicPstatesInfo_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlGpuDynamicPstatesInfo_t *>malloc(sizeof(nvmlGpuDynamicPstatesInfo_t))
@@ -16428,6 +17396,11 @@ cdef class GpuDynamicPstatesInfo:
             raise ValueError("This GpuDynamicPstatesInfo instance is read-only")
         self._ptr[0].flags = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an GpuDynamicPstatesInfo instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlGpuDynamicPstatesInfo_t), GpuDynamicPstatesInfo)
+
     @staticmethod
     def from_data(data):
         """Create an GpuDynamicPstatesInfo instance wrapping the given NumPy array.
@@ -16530,6 +17503,12 @@ cdef class VgpuProcessesUtilizationInfo_v1:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlVgpuProcessesUtilizationInfo_v1_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlVgpuProcessesUtilizationInfo_v1_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlVgpuProcessesUtilizationInfo_v1_t *>malloc(sizeof(nvmlVgpuProcessesUtilizationInfo_v1_t))
@@ -16580,6 +17559,11 @@ cdef class VgpuProcessesUtilizationInfo_v1:
         self._ptr[0].vgpuProcessCount = len(arr)
         self._refs["vgpu_proc_util_array"] = arr
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an VgpuProcessesUtilizationInfo_v1 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlVgpuProcessesUtilizationInfo_v1_t), VgpuProcessesUtilizationInfo_v1)
+
     @staticmethod
     def from_data(data):
         """Create an VgpuProcessesUtilizationInfo_v1 instance wrapping the given NumPy array.
@@ -16674,6 +17658,12 @@ cdef class VgpuSchedulerParams:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlVgpuSchedulerParams_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlVgpuSchedulerParams_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlVgpuSchedulerParams_t *>malloc(sizeof(nvmlVgpuSchedulerParams_t))
@@ -16710,6 +17700,11 @@ cdef class VgpuSchedulerParams:
         cdef _py_anon_pod3 val_ = val
         memcpy(<void *>&(self._ptr[0].vgpuSchedData), <void *>(val_._get_ptr()), sizeof(_anon_pod3) * 1)
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an VgpuSchedulerParams instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlVgpuSchedulerParams_t), VgpuSchedulerParams)
+
     @staticmethod
     def from_data(data):
         """Create an VgpuSchedulerParams instance wrapping the given NumPy array.
@@ -16803,6 +17798,12 @@ cdef class VgpuSchedulerSetParams:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlVgpuSchedulerSetParams_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlVgpuSchedulerSetParams_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlVgpuSchedulerSetParams_t *>malloc(sizeof(nvmlVgpuSchedulerSetParams_t))
@@ -16839,6 +17840,11 @@ cdef class VgpuSchedulerSetParams:
         cdef _py_anon_pod5 val_ = val
         memcpy(<void *>&(self._ptr[0].vgpuSchedData), <void *>(val_._get_ptr()), sizeof(_anon_pod5) * 1)
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an VgpuSchedulerSetParams instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlVgpuSchedulerSetParams_t), VgpuSchedulerSetParams)
+
     @staticmethod
     def from_data(data):
         """Create an VgpuSchedulerSetParams instance wrapping the given NumPy array.
@@ -16938,6 +17944,12 @@ cdef class VgpuLicenseInfo:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlVgpuLicenseInfo_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlVgpuLicenseInfo_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlVgpuLicenseInfo_t *>malloc(sizeof(nvmlVgpuLicenseInfo_t))
@@ -16984,6 +17996,11 @@ cdef class VgpuLicenseInfo:
             raise ValueError("This VgpuLicenseInfo instance is read-only")
         self._ptr[0].currentState = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an VgpuLicenseInfo instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlVgpuLicenseInfo_t), VgpuLicenseInfo)
+
     @staticmethod
     def from_data(data):
         """Create an VgpuLicenseInfo instance wrapping the given NumPy array.
@@ -17090,6 +18107,12 @@ cdef class GridLicensableFeature:
             return False
         return bool((self_data == other._data).all())
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        cpython.PyObject_GetBuffer(self._data, buffer, flags)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        cpython.PyBuffer_Release(buffer)
+
     @property
     def feature_code(self):
         """Union[~_numpy.int32, int]: """
@@ -17169,6 +18192,11 @@ cdef class GridLicensableFeature:
     def __setitem__(self, key, val):
         self._data[key] = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an GridLicensableFeature instance with the memory from the given buffer."""
+        return GridLicensableFeature.from_data(_numpy.frombuffer(buffer, dtype=grid_licensable_feature_dtype))
+
     @staticmethod
     def from_data(data):
         """Create an GridLicensableFeature instance wrapping the given NumPy array.
@@ -17270,6 +18298,12 @@ cdef class UnitFanSpeeds:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlUnitFanSpeeds_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlUnitFanSpeeds_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlUnitFanSpeeds_t *>malloc(sizeof(nvmlUnitFanSpeeds_t))
@@ -17307,6 +18341,11 @@ cdef class UnitFanSpeeds:
             raise ValueError("This UnitFanSpeeds instance is read-only")
         self._ptr[0].count = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an UnitFanSpeeds instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlUnitFanSpeeds_t), UnitFanSpeeds)
+
     @staticmethod
     def from_data(data):
         """Create an UnitFanSpeeds instance wrapping the given NumPy array.
@@ -17411,6 +18450,12 @@ cdef class VgpuPgpuMetadata:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlVgpuPgpuMetadata_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlVgpuPgpuMetadata_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlVgpuPgpuMetadata_t *>malloc(sizeof(nvmlVgpuPgpuMetadata_t))
@@ -17509,6 +18554,11 @@ cdef class VgpuPgpuMetadata:
         cdef char *ptr = buf
         memcpy(<void *>(self._ptr[0].opaqueData), <void *>ptr, 4)
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an VgpuPgpuMetadata instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlVgpuPgpuMetadata_t), VgpuPgpuMetadata)
+
     @staticmethod
     def from_data(data):
         """Create an VgpuPgpuMetadata instance wrapping the given NumPy array.
@@ -17609,6 +18659,12 @@ cdef class GpuInstanceInfo:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlGpuInstanceInfo_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlGpuInstanceInfo_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlGpuInstanceInfo_t *>malloc(sizeof(nvmlGpuInstanceInfo_t))
@@ -17666,6 +18722,11 @@ cdef class GpuInstanceInfo:
             raise ValueError("This GpuInstanceInfo instance is read-only")
         self._ptr[0].profileId = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an GpuInstanceInfo instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlGpuInstanceInfo_t), GpuInstanceInfo)
+
     @staticmethod
     def from_data(data):
         """Create an GpuInstanceInfo instance wrapping the given NumPy array.
@@ -17767,6 +18828,12 @@ cdef class ComputeInstanceInfo:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlComputeInstanceInfo_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlComputeInstanceInfo_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlComputeInstanceInfo_t *>malloc(sizeof(nvmlComputeInstanceInfo_t))
@@ -17835,6 +18902,11 @@ cdef class ComputeInstanceInfo:
             raise ValueError("This ComputeInstanceInfo instance is read-only")
         self._ptr[0].profileId = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an ComputeInstanceInfo instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlComputeInstanceInfo_t), ComputeInstanceInfo)
+
     @staticmethod
     def from_data(data):
         """Create an ComputeInstanceInfo instance wrapping the given NumPy array.
@@ -17936,6 +19008,12 @@ cdef class EccSramUniqueUncorrectedErrorCounts_v1:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlEccSramUniqueUncorrectedErrorCounts_v1_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlEccSramUniqueUncorrectedErrorCounts_v1_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlEccSramUniqueUncorrectedErrorCounts_v1_t *>malloc(sizeof(nvmlEccSramUniqueUncorrectedErrorCounts_v1_t))
@@ -17975,6 +19053,11 @@ cdef class EccSramUniqueUncorrectedErrorCounts_v1:
         self._ptr[0].entryCount = len(arr)
         self._refs["entries"] = arr
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an EccSramUniqueUncorrectedErrorCounts_v1 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlEccSramUniqueUncorrectedErrorCounts_v1_t), EccSramUniqueUncorrectedErrorCounts_v1)
+
     @staticmethod
     def from_data(data):
         """Create an EccSramUniqueUncorrectedErrorCounts_v1 instance wrapping the given NumPy array.
@@ -18074,6 +19157,12 @@ cdef class NvlinkFirmwareInfo:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlNvlinkFirmwareInfo_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlNvlinkFirmwareInfo_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlNvlinkFirmwareInfo_t *>malloc(sizeof(nvmlNvlinkFirmwareInfo_t))
@@ -18111,6 +19200,11 @@ cdef class NvlinkFirmwareInfo:
             raise ValueError("This NvlinkFirmwareInfo instance is read-only")
         self._ptr[0].numValidEntries = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an NvlinkFirmwareInfo instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlNvlinkFirmwareInfo_t), NvlinkFirmwareInfo)
+
     @staticmethod
     def from_data(data):
         """Create an NvlinkFirmwareInfo instance wrapping the given NumPy array.
@@ -18214,6 +19308,12 @@ cdef class VgpuInstancesUtilizationInfo_v1:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlVgpuInstancesUtilizationInfo_v1_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlVgpuInstancesUtilizationInfo_v1_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlVgpuInstancesUtilizationInfo_v1_t *>malloc(sizeof(nvmlVgpuInstancesUtilizationInfo_v1_t))
@@ -18275,6 +19375,11 @@ cdef class VgpuInstancesUtilizationInfo_v1:
         self._ptr[0].vgpuInstanceCount = len(arr)
         self._refs["vgpu_util_array"] = arr
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an VgpuInstancesUtilizationInfo_v1 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlVgpuInstancesUtilizationInfo_v1_t), VgpuInstancesUtilizationInfo_v1)
+
     @staticmethod
     def from_data(data):
         """Create an VgpuInstancesUtilizationInfo_v1 instance wrapping the given NumPy array.
@@ -18379,6 +19484,12 @@ cdef class PRMCounter_v1:
             return False
         return bool((self_data == other._data).all())
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        cpython.PyObject_GetBuffer(self._data, buffer, flags)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        cpython.PyBuffer_Release(buffer)
+
     @property
     def counter_id(self):
         """Union[~_numpy.uint32, int]: Counter ID, one of nvmlPRMCounterId_t."""
@@ -18427,6 +19538,11 @@ cdef class PRMCounter_v1:
     def __setitem__(self, key, val):
         self._data[key] = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an PRMCounter_v1 instance with the memory from the given buffer."""
+        return PRMCounter_v1.from_data(_numpy.frombuffer(buffer, dtype=prm_counter_v1_dtype))
+
     @staticmethod
     def from_data(data):
         """Create an PRMCounter_v1 instance wrapping the given NumPy array.
@@ -18532,6 +19648,12 @@ cdef class VgpuSchedulerLog:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlVgpuSchedulerLog_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlVgpuSchedulerLog_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlVgpuSchedulerLog_t *>malloc(sizeof(nvmlVgpuSchedulerLog_t))
@@ -18614,6 +19736,11 @@ cdef class VgpuSchedulerLog:
             raise ValueError("This VgpuSchedulerLog instance is read-only")
         self._ptr[0].entriesCount = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an VgpuSchedulerLog instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlVgpuSchedulerLog_t), VgpuSchedulerLog)
+
     @staticmethod
     def from_data(data):
         """Create an VgpuSchedulerLog instance wrapping the given NumPy array.
@@ -18713,6 +19840,12 @@ cdef class VgpuSchedulerGetState:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlVgpuSchedulerGetState_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlVgpuSchedulerGetState_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlVgpuSchedulerGetState_t *>malloc(sizeof(nvmlVgpuSchedulerGetState_t))
@@ -18759,6 +19892,11 @@ cdef class VgpuSchedulerGetState:
             raise ValueError("This VgpuSchedulerGetState instance is read-only")
         self._ptr[0].arrMode = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an VgpuSchedulerGetState instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlVgpuSchedulerGetState_t), VgpuSchedulerGetState)
+
     @staticmethod
     def from_data(data):
         """Create an VgpuSchedulerGetState instance wrapping the given NumPy array.
@@ -18860,6 +19998,12 @@ cdef class VgpuSchedulerStateInfo_v1:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlVgpuSchedulerStateInfo_v1_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlVgpuSchedulerStateInfo_v1_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlVgpuSchedulerStateInfo_v1_t *>malloc(sizeof(nvmlVgpuSchedulerStateInfo_v1_t))
@@ -18928,6 +20072,11 @@ cdef class VgpuSchedulerStateInfo_v1:
             raise ValueError("This VgpuSchedulerStateInfo_v1 instance is read-only")
         self._ptr[0].arrMode = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an VgpuSchedulerStateInfo_v1 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlVgpuSchedulerStateInfo_v1_t), VgpuSchedulerStateInfo_v1)
+
     @staticmethod
     def from_data(data):
         """Create an VgpuSchedulerStateInfo_v1 instance wrapping the given NumPy array.
@@ -19031,6 +20180,12 @@ cdef class VgpuSchedulerLogInfo_v1:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlVgpuSchedulerLogInfo_v1_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlVgpuSchedulerLogInfo_v1_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlVgpuSchedulerLogInfo_v1_t *>malloc(sizeof(nvmlVgpuSchedulerLogInfo_v1_t))
@@ -19124,6 +20279,11 @@ cdef class VgpuSchedulerLogInfo_v1:
             raise ValueError("This VgpuSchedulerLogInfo_v1 instance is read-only")
         self._ptr[0].entriesCount = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an VgpuSchedulerLogInfo_v1 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlVgpuSchedulerLogInfo_v1_t), VgpuSchedulerLogInfo_v1)
+
     @staticmethod
     def from_data(data):
         """Create an VgpuSchedulerLogInfo_v1 instance wrapping the given NumPy array.
@@ -19225,6 +20385,12 @@ cdef class VgpuSchedulerState_v1:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlVgpuSchedulerState_v1_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlVgpuSchedulerState_v1_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlVgpuSchedulerState_v1_t *>malloc(sizeof(nvmlVgpuSchedulerState_v1_t))
@@ -19293,6 +20459,11 @@ cdef class VgpuSchedulerState_v1:
             raise ValueError("This VgpuSchedulerState_v1 instance is read-only")
         self._ptr[0].enableARRMode = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an VgpuSchedulerState_v1 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlVgpuSchedulerState_v1_t), VgpuSchedulerState_v1)
+
     @staticmethod
     def from_data(data):
         """Create an VgpuSchedulerState_v1 instance wrapping the given NumPy array.
@@ -19392,6 +20563,12 @@ cdef class GridLicensableFeatures:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlGridLicensableFeatures_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlGridLicensableFeatures_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlGridLicensableFeatures_t *>malloc(sizeof(nvmlGridLicensableFeatures_t))
@@ -19432,6 +20609,11 @@ cdef class GridLicensableFeatures:
             raise ValueError("This GridLicensableFeatures instance is read-only")
         self._ptr[0].isGridLicenseSupported = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an GridLicensableFeatures instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlGridLicensableFeatures_t), GridLicensableFeatures)
+
     @staticmethod
     def from_data(data):
         """Create an GridLicensableFeatures instance wrapping the given NumPy array.
@@ -19531,6 +20713,12 @@ cdef class NvLinkInfo_v2:
         other_ = other
         return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlNvLinkInfo_v2_t)) == 0)
 
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        __getbuffer(self, buffer, <void *>self._ptr, sizeof(nvmlNvLinkInfo_v2_t), self._readonly)
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
     def __setitem__(self, key, val):
         if key == 0 and isinstance(val, _numpy.ndarray):
             self._ptr = <nvmlNvLinkInfo_v2_t *>malloc(sizeof(nvmlNvLinkInfo_v2_t))
@@ -19577,6 +20765,11 @@ cdef class NvLinkInfo_v2:
             raise ValueError("This NvLinkInfo_v2 instance is read-only")
         self._ptr[0].isNvleEnabled = val
 
+    @staticmethod
+    def from_buffer(buffer):
+        """Create an NvLinkInfo_v2 instance with the memory from the given buffer."""
+        return __from_buffer(buffer, sizeof(nvmlNvLinkInfo_v2_t), NvLinkInfo_v2)
+
     @staticmethod
     def from_data(data):
         """Create an NvLinkInfo_v2 instance wrapping the given NumPy array.
@@ -26227,4 +27420,4 @@ cpdef str vgpu_type_get_name(unsigned int vgpu_type_id):
     with nogil:
         __status__ = nvmlVgpuTypeGetName(<nvmlVgpuTypeId_t>vgpu_type_id, vgpu_type_name, <unsigned int*>size)
     check_status(__status__)
-    return cpython.PyUnicode_FromString(vgpu_type_name)
+    return cpython.PyUnicode_FromStringAndSize(vgpu_type_name, size[0])
diff --git a/cuda_bindings/cuda/bindings/nvrtc.pxd.in b/cuda_bindings/cuda/bindings/nvrtc.pxd.in
index fbda11a161..cb2b0c260a 100644
--- a/cuda_bindings/cuda/bindings/nvrtc.pxd.in
+++ b/cuda_bindings/cuda/bindings/nvrtc.pxd.in
@@ -1,7 +1,7 @@
-# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
-# This code was automatically generated with version 13.1.0. Do not modify it directly.
+# This code was automatically generated with version 13.1.0, generator version 49a8141. Do not modify it directly.
 cimport cuda.bindings.cynvrtc as cynvrtc
 
 include "_lib/utils.pxd"
diff --git a/cuda_bindings/cuda/bindings/nvrtc.pyx.in b/cuda_bindings/cuda/bindings/nvrtc.pyx.in
index 3586d33f7a..3cb0381b63 100644
--- a/cuda_bindings/cuda/bindings/nvrtc.pyx.in
+++ b/cuda_bindings/cuda/bindings/nvrtc.pyx.in
@@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
-# This code was automatically generated with version 13.1.0, generator version fd3f910. Do not modify it directly.
+# This code was automatically generated with version 13.1.0, generator version 49a8141. Do not modify it directly.
 from typing import Any, Optional
 import cython
 import ctypes
diff --git a/cuda_bindings/cuda/bindings/nvvm.pxd b/cuda_bindings/cuda/bindings/nvvm.pxd
index d18c880860..fd8bbbdcf9 100644
--- a/cuda_bindings/cuda/bindings/nvvm.pxd
+++ b/cuda_bindings/cuda/bindings/nvvm.pxd
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 #
-# This code was automatically generated across versions from 12.0.1 to 13.1.1. Do not modify it directly.
+# This code was automatically generated across versions from 12.0.1 to 13.1.1, generator version 0.3.1.dev1322+g646ce84ec. Do not modify it directly.
 
 from libc.stdint cimport intptr_t
 
diff --git a/cuda_bindings/cuda/bindings/nvvm.pyx b/cuda_bindings/cuda/bindings/nvvm.pyx
index 2f55020235..81ca09754a 100644
--- a/cuda_bindings/cuda/bindings/nvvm.pyx
+++ b/cuda_bindings/cuda/bindings/nvvm.pyx
@@ -1,8 +1,8 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 #
-# This code was automatically generated across versions from 12.0.1 to 13.1.1. Do not modify it directly.
+# This code was automatically generated across versions from 12.0.1 to 13.1.1, generator version 0.3.1.dev1322+g646ce84ec. Do not modify it directly.
 
 cimport cython  # NOQA
 
diff --git a/cuda_bindings/cuda/bindings/runtime.pxd.in b/cuda_bindings/cuda/bindings/runtime.pxd.in
index 6d88763abb..91ecd45b31 100644
--- a/cuda_bindings/cuda/bindings/runtime.pxd.in
+++ b/cuda_bindings/cuda/bindings/runtime.pxd.in
@@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
-# This code was automatically generated with version 13.1.0. Do not modify it directly.
+# This code was automatically generated with version 13.1.0, generator version 49a8141. Do not modify it directly.
 cimport cuda.bindings.cyruntime as cyruntime
 
 include "_lib/utils.pxd"
diff --git a/cuda_bindings/cuda/bindings/runtime.pyx.in b/cuda_bindings/cuda/bindings/runtime.pyx.in
index f4473554eb..0fe497fb41 100644
--- a/cuda_bindings/cuda/bindings/runtime.pyx.in
+++ b/cuda_bindings/cuda/bindings/runtime.pyx.in
@@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
-# This code was automatically generated with version 13.1.0, generator version c185cc3. Do not modify it directly.
+# This code was automatically generated with version 13.1.0, generator version 49a8141. Do not modify it directly.
 from typing import Any, Optional
 import cython
 import ctypes
@@ -41333,10 +41333,10 @@ def sizeof(objType):
     {{if True}}
     if objType == VdpOutputSurface:
         return sizeof(cyruntime.VdpOutputSurface){{endif}}
-    {{if 'cudaStreamAttrValue' in found_types}}
+    {{if True}}
     if objType == cudaStreamAttrValue:
         return sizeof(cyruntime.cudaStreamAttrValue){{endif}}
-    {{if 'cudaKernelNodeAttrValue' in found_types}}
+    {{if True}}
     if objType == cudaKernelNodeAttrValue:
         return sizeof(cyruntime.cudaKernelNodeAttrValue){{endif}}
     {{if True}}
diff --git a/cuda_core/cuda/core/_memory/_managed_memory_resource.pxd b/cuda_core/cuda/core/_memory/_managed_memory_resource.pxd
index 5a73a57ee9..8dd0bbbeb1 100644
--- a/cuda_core/cuda/core/_memory/_managed_memory_resource.pxd
+++ b/cuda_core/cuda/core/_memory/_managed_memory_resource.pxd
@@ -6,4 +6,6 @@ from cuda.core._memory._memory_pool cimport _MemPool
 
 
 cdef class ManagedMemoryResource(_MemPool):
-    pass
+    cdef:
+        str _pref_loc_type
+        int _pref_loc_id
diff --git a/cuda_core/cuda/core/_memory/_managed_memory_resource.pyx b/cuda_core/cuda/core/_memory/_managed_memory_resource.pyx
index 64f523087c..4f24bd8d11 100644
--- a/cuda_core/cuda/core/_memory/_managed_memory_resource.pyx
+++ b/cuda_core/cuda/core/_memory/_managed_memory_resource.pyx
@@ -26,12 +26,35 @@ cdef class ManagedMemoryResourceOptions:
     Attributes
     ----------
     preferred_location : int | None, optional
-        The preferred device location for the managed memory.
-        Use a device ID (0, 1, 2, ...) for device preference, -1 for CPU/host,
-        or None to let the driver decide.
-        (Default to None)
+        A location identifier (device ordinal or NUMA node ID) whose
+        meaning depends on ``preferred_location_type``.
+        (Default to ``None``)
+
+    preferred_location_type : ``"device"`` | ``"host"`` | ``"host_numa"`` | None, optional
+        Controls how ``preferred_location`` is interpreted.
+
+        When set to ``None`` (the default), legacy behavior is used:
+        ``preferred_location`` is interpreted as a device ordinal,
+        ``-1`` for host, or ``None`` for no preference.
+
+        When set explicitly, the type determines both the kind of
+        preferred location and the valid values for
+        ``preferred_location``:
+
+        - ``"device"``: prefer a specific GPU. ``preferred_location``
+          must be a device ordinal (``>= 0``).
+        - ``"host"``: prefer host memory (OS-managed NUMA placement).
+          ``preferred_location`` must be ``None``.
+        - ``"host_numa"``: prefer a specific host NUMA node.
+          ``preferred_location`` must be a NUMA node ID (``>= 0``),
+          or ``None`` to derive the NUMA node from the current CUDA
+          device's ``host_numa_id`` attribute (requires an active
+          CUDA context).
+
+        (Default to ``None``)
     """
     preferred_location: int | None = None
+    preferred_location_type: str | None = None
 
 
 cdef class ManagedMemoryResource(_MemPool):
@@ -68,9 +91,26 @@ cdef class ManagedMemoryResource(_MemPool):
 
     @property
     def device_id(self) -> int:
-        """Return -1. Managed memory migrates automatically and is not tied to a specific device."""
+        """The preferred device ordinal, or -1 if the preferred location is not a device."""
+        if self._pref_loc_type == "device":
+            return self._pref_loc_id
         return -1
 
+    @property
+    def preferred_location(self) -> tuple | None:
+        """The preferred location for managed memory allocations.
+
+        Returns ``None`` if no preferred location is set (driver decides),
+        or a tuple ``(type, id)`` where *type* is one of ``"device"``,
+        ``"host"``, or ``"host_numa"``, and *id* is the device ordinal,
+        ``None`` (for ``"host"``), or the NUMA node ID, respectively.
+        """
+        if self._pref_loc_type is None:
+            return None
+        if self._pref_loc_type == "host":
+            return ("host", None)
+        return (self._pref_loc_type, self._pref_loc_id)
+
     @property
     def is_device_accessible(self) -> bool:
         """Return True. This memory resource provides device-accessible buffers."""
@@ -82,40 +122,121 @@ cdef class ManagedMemoryResource(_MemPool):
         return True
 
 
-cdef inline _MMR_init(ManagedMemoryResource self, options):
-    cdef ManagedMemoryResourceOptions opts = check_or_create_options(
-        ManagedMemoryResourceOptions, options, "ManagedMemoryResource options",
-        keep_none=True
-    )
-    cdef int location_id = -1
-    cdef object preferred_location = None
-    cdef cydriver.CUmemLocationType loc_type
-
-    if opts is not None:
-        preferred_location = opts.preferred_location
-        if preferred_location is not None:
-            location_id = preferred_location
+IF CUDA_CORE_BUILD_MAJOR >= 13:
+    cdef tuple _VALID_LOCATION_TYPES = ("device", "host", "host_numa")
+
+
+    cdef _resolve_preferred_location(ManagedMemoryResourceOptions opts):
+        """Resolve preferred location options into driver and stored values.
+
+        Returns a 4-tuple:
+            (CUmemLocationType, loc_id, pref_loc_type_str, pref_loc_id)
+        """
+        cdef object pref_loc = opts.preferred_location if opts is not None else None
+        cdef object pref_type = opts.preferred_location_type if opts is not None else None
 
+        if pref_type is not None and pref_type not in _VALID_LOCATION_TYPES:
+            raise ValueError(
+                f"preferred_location_type must be one of {_VALID_LOCATION_TYPES!r} "
+                f"or None, got {pref_type!r}"
+            )
+
+        if pref_type is None:
+            # Legacy behavior
+            if pref_loc is None:
+                return (
+                    cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_NONE,
+                    -1, None, -1,
+                )
+            if pref_loc == -1:
+                return (
+                    cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST,
+                    -1, "host", -1,
+                )
+            if pref_loc < 0:
+                raise ValueError(
+                    f"preferred_location must be a device ordinal (>= 0), -1 for "
+                    f"host, or None for no preference, got {pref_loc}"
+                )
+            return (
+                cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE,
+                pref_loc, "device", pref_loc,
+            )
+
+        if pref_type == "device":
+            if pref_loc is None or pref_loc < 0:
+                raise ValueError(
+                    f"preferred_location must be a device ordinal (>= 0) when "
+                    f"preferred_location_type is 'device', got {pref_loc!r}"
+                )
+            return (
+                cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE,
+                pref_loc, "device", pref_loc,
+            )
+
+        if pref_type == "host":
+            if pref_loc is not None:
+                raise ValueError(
+                    f"preferred_location must be None when "
+                    f"preferred_location_type is 'host', got {pref_loc!r}"
+                )
+            return (
+                cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST,
+                -1, "host", -1,
+            )
+
+        # pref_type == "host_numa"
+        if pref_loc is None:
+            from .._device import Device
+            dev = Device()
+            numa_id = dev.properties.host_numa_id
+            if numa_id < 0:
+                raise RuntimeError(
+                    "Cannot determine host NUMA ID for the current CUDA device. "
+                    "The system may not support NUMA, or no CUDA context is "
+                    "active. Set preferred_location to an explicit NUMA node ID "
+                    "or call Device.set_current() first."
+                )
+            return (
+                cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST_NUMA,
+                numa_id, "host_numa", numa_id,
+            )
+        if pref_loc < 0:
+            raise ValueError(
+                f"preferred_location must be a NUMA node ID (>= 0) or None "
+                f"when preferred_location_type is 'host_numa', got {pref_loc}"
+            )
+        return (
+            cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST_NUMA,
+            pref_loc, "host_numa", pref_loc,
+        )
+
+
+cdef inline _MMR_init(ManagedMemoryResource self, options):
     IF CUDA_CORE_BUILD_MAJOR >= 13:
-        if preferred_location is None:
-            loc_type = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_NONE
-        elif location_id == -1:
-            loc_type = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST
-        else:
-            loc_type = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
+        cdef ManagedMemoryResourceOptions opts = check_or_create_options(
+            ManagedMemoryResourceOptions, options, "ManagedMemoryResource options",
+            keep_none=True
+        )
+        cdef cydriver.CUmemLocationType loc_type
+        cdef int loc_id
+
+        loc_type, loc_id, self._pref_loc_type, self._pref_loc_id = (
+            _resolve_preferred_location(opts)
+        )
 
         if opts is None:
             MP_init_current_pool(
                 self,
                 loc_type,
-                location_id,
+                loc_id,
                 cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_MANAGED,
             )
         else:
             MP_init_create_pool(
                 self,
                 loc_type,
-                location_id,
+                loc_id,
                 cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_MANAGED,
                 False,
                 0,
diff --git a/cuda_core/cuda/core/_program.pxd b/cuda_core/cuda/core/_program.pxd
index 02d436d3f3..7a6717059b 100644
--- a/cuda_core/cuda/core/_program.pxd
+++ b/cuda_core/cuda/core/_program.pxd
@@ -16,3 +16,5 @@ cdef class Program:
         object _compile_lock  # Per-instance lock for compile-time mutation
         bint _use_libdevice      # Flag for libdevice loading
         bint _libdevice_added
+        bytes _nvrtc_code       # Source code for NVRTC retry (PCH auto-resize)
+        str _pch_status         # PCH creation outcome after compile
diff --git a/cuda_core/cuda/core/_program.pyx b/cuda_core/cuda/core/_program.pyx
index 68c0476b09..0b1fa93279 100644
--- a/cuda_core/cuda/core/_program.pyx
+++ b/cuda_core/cuda/core/_program.pyx
@@ -105,6 +105,32 @@ cdef class Program:
         """
         return Program_compile(self, target_type, name_expressions, logs)
 
+    @property
+    def pch_status(self) -> str | None:
+        """PCH creation outcome from the most recent :meth:`compile` call.
+
+        Possible values:
+
+        * ``"created"`` — PCH file was written successfully.
+        * ``"not_attempted"`` — PCH creation was not attempted (e.g. the
+          compiler decided not to, or automatic PCH processing skipped it).
+        * ``"failed"`` — an error prevented PCH creation.
+        * ``None`` — PCH was not requested, the program has not been
+          compiled yet, the backend is not NVRTC (e.g. PTX or NVVM),
+          or the NVRTC bindings are too old to report status.
+
+        When ``create_pch`` is set in :class:`ProgramOptions` and the PCH
+        heap is too small, :meth:`compile` automatically resizes the heap
+        and retries, so ``"created"`` should be the common outcome.
+
+        .. note::
+
+           PCH is only supported for ``code_type="c++"`` programs that
+           use the NVRTC backend. For PTX and NVVM programs this property
+           always returns ``None``.
+        """
+        return self._pch_status
+
     @property
     def backend(self) -> str:
         """Return this Program instance's underlying backend."""
@@ -477,6 +503,8 @@ def _find_libdevice_path():
     return find_bitcode_lib("device")
 
 
+
+
 cdef inline bint _process_define_macro_inner(list options, object macro) except? -1:
     """Process a single define macro, returning True if successful."""
     if isinstance(macro, str):
@@ -548,6 +576,8 @@ cdef inline int Program_init(Program self, object code, str code_type, object op
     self._use_libdevice = False
     self._libdevice_added = False
 
+    self._pch_status = None
+
     if code_type == "c++":
         assert_type(code, str)
         if options.extra_sources is not None:
@@ -562,6 +592,7 @@ cdef inline int Program_init(Program self, object code, str code_type, object op
             HANDLE_RETURN_NVRTC(NULL, cynvrtc.nvrtcCreateProgram(
                 &nvrtc_prog, code_ptr, name_ptr, 0, NULL, NULL))
         self._h_nvrtc = create_nvrtc_program_handle(nvrtc_prog)
+        self._nvrtc_code = code_bytes
         self._backend = "NVRTC"
         self._linker = None
 
@@ -649,9 +680,15 @@ cdef inline int Program_init(Program self, object code, str code_type, object op
     return 0
 
 
-cdef object Program_compile_nvrtc(Program self, str target_type, object name_expressions, object logs):
-    """Compile using NVRTC backend and return ObjectCode."""
-    cdef cynvrtc.nvrtcProgram prog = as_cu(self._h_nvrtc)
+cdef object _nvrtc_compile_and_extract(
+    cynvrtc.nvrtcProgram prog, str target_type, object name_expressions,
+    object logs, list options_list, str name,
+):
+    """Run nvrtcCompileProgram on *prog* and extract the output.
+
+    This is the inner compile+extract loop, factored out so the PCH
+    auto-retry path can call it on a fresh program handle.
+    """
     cdef size_t output_size = 0
     cdef size_t logsize = 0
     cdef vector[const char*] options_vec
@@ -669,7 +706,6 @@ cdef object Program_compile_nvrtc(Program self, str target_type, object name_exp
             HANDLE_RETURN_NVRTC(prog, cynvrtc.nvrtcAddNameExpression(prog, name_ptr))
 
     # Build options array
-    options_list = self._options.as_bytes("nvrtc", target_type)
     options_vec.resize(len(options_list))
     for i in range(len(options_list)):
         options_vec[i] = <const char*>(<bytes>options_list[i])
@@ -716,7 +752,84 @@ cdef object Program_compile_nvrtc(Program self, str target_type, object name_exp
                 HANDLE_RETURN_NVRTC(prog, cynvrtc.nvrtcGetProgramLog(prog, data_ptr))
             logs.write(log.decode("utf-8", errors="backslashreplace"))
 
-    return ObjectCode._init(bytes(data), target_type, symbol_mapping=symbol_mapping, name=self._options.name)
+    return ObjectCode._init(bytes(data), target_type, symbol_mapping=symbol_mapping, name=name)
+
+
+cdef int _nvrtc_pch_apis_cached = -1  # -1 = unchecked
+
+cdef bint _has_nvrtc_pch_apis():
+    global _nvrtc_pch_apis_cached
+    if _nvrtc_pch_apis_cached < 0:
+        _nvrtc_pch_apis_cached = hasattr(nvrtc, "nvrtcGetPCHCreateStatus")
+    return _nvrtc_pch_apis_cached
+
+
+cdef str _PCH_STATUS_CREATED = "created"
+cdef str _PCH_STATUS_NOT_ATTEMPTED = "not_attempted"
+cdef str _PCH_STATUS_FAILED = "failed"
+
+
+cdef str _read_pch_status(cynvrtc.nvrtcProgram prog):
+    """Query nvrtcGetPCHCreateStatus and translate to a high-level string."""
+    cdef cynvrtc.nvrtcResult err
+    with nogil:
+        err = cynvrtc.nvrtcGetPCHCreateStatus(prog)
+    if err == cynvrtc.nvrtcResult.NVRTC_SUCCESS:
+        return _PCH_STATUS_CREATED
+    if err == cynvrtc.nvrtcResult.NVRTC_ERROR_PCH_CREATE_HEAP_EXHAUSTED:
+        return None  # sentinel: caller should auto-retry
+    if err == cynvrtc.nvrtcResult.NVRTC_ERROR_NO_PCH_CREATE_ATTEMPTED:
+        return _PCH_STATUS_NOT_ATTEMPTED
+    return _PCH_STATUS_FAILED
+
+
+cdef object Program_compile_nvrtc(Program self, str target_type, object name_expressions, object logs):
+    """Compile using NVRTC backend and return ObjectCode."""
+    cdef cynvrtc.nvrtcProgram prog = as_cu(self._h_nvrtc)
+    cdef list options_list = self._options.as_bytes("nvrtc", target_type)
+
+    result = _nvrtc_compile_and_extract(
+        prog, target_type, name_expressions, logs, options_list, self._options.name,
+    )
+
+    cdef bint pch_creation_possible = self._options.create_pch or self._options.pch
+    if not pch_creation_possible or not _has_nvrtc_pch_apis():
+        self._pch_status = None
+        return result
+
+    try:
+        status = _read_pch_status(prog)
+    except RuntimeError as e:
+        raise RuntimeError(
+            "PCH was requested but the runtime libnvrtc does not support "
+            "PCH APIs. Update to CUDA toolkit 12.8 or newer."
+        ) from e
+
+    if status is not None:
+        self._pch_status = status
+        return result
+
+    # Heap exhausted — auto-resize and retry with a fresh program
+    cdef size_t required = 0
+    with nogil:
+        HANDLE_RETURN_NVRTC(prog, cynvrtc.nvrtcGetPCHHeapSizeRequired(prog, &required))
+        HANDLE_RETURN_NVRTC(NULL, cynvrtc.nvrtcSetPCHHeapSize(required))
+
+    cdef cynvrtc.nvrtcProgram retry_prog
+    cdef const char* code_ptr = <const char*>self._nvrtc_code
+    cdef const char* name_ptr = <const char*>self._options._name
+    with nogil:
+        HANDLE_RETURN_NVRTC(NULL, cynvrtc.nvrtcCreateProgram(
+            &retry_prog, code_ptr, name_ptr, 0, NULL, NULL))
+    self._h_nvrtc = create_nvrtc_program_handle(retry_prog)
+
+    result = _nvrtc_compile_and_extract(
+        retry_prog, target_type, name_expressions, logs, options_list, self._options.name,
+    )
+
+    status = _read_pch_status(retry_prog)
+    self._pch_status = status if status is not None else _PCH_STATUS_FAILED
+    return result
 
 
 cdef object Program_compile_nvvm(Program self, str target_type, object logs):
diff --git a/cuda_core/cuda/core/_stream.pyx b/cuda_core/cuda/core/_stream.pyx
index c6c25874c8..bada70c7b9 100644
--- a/cuda_core/cuda/core/_stream.pyx
+++ b/cuda_core/cuda/core/_stream.pyx
@@ -470,18 +470,14 @@ cdef Stream Stream_accept(arg, bint allow_stream_protocol=False):
         return <Stream>(arg)
     elif isinstance(arg, GraphBuilder):
         return <Stream>(arg.stream)
-    elif allow_stream_protocol:
-        try:
-            stream = Stream._init(arg)
-        except:
-            pass
-        else:
-            warnings.warn(
-                "Passing foreign stream objects to this function via the "
-                "stream protocol is deprecated. Convert the object explicitly "
-                "using Stream(obj) instead.",
-                stacklevel=2,
-                category=DeprecationWarning,
-            )
-            return <Stream>(stream)
+    elif allow_stream_protocol and hasattr(arg, "__cuda_stream__"):
+        stream = Stream._init(arg)
+        warnings.warn(
+            "Passing foreign stream objects to this function via the "
+            "stream protocol is deprecated. Convert the object explicitly "
+            "using Stream(obj) instead.",
+            stacklevel=2,
+            category=DeprecationWarning,
+        )
+        return <Stream>(stream)
     raise TypeError(f"Stream or GraphBuilder expected, got {type(arg).__name__}")
diff --git a/cuda_core/docs/source/release/0.6.0-notes.rst b/cuda_core/docs/source/release/0.6.0-notes.rst
index b7d6188cc2..654eb7641b 100644
--- a/cuda_core/docs/source/release/0.6.0-notes.rst
+++ b/cuda_core/docs/source/release/0.6.0-notes.rst
@@ -54,6 +54,11 @@ New features
 - Added CUDA version compatibility check at import time to detect mismatches between
   ``cuda.core`` and the installed ``cuda-bindings`` version.
 
+- ``Program.compile()`` now automatically resizes the NVRTC PCH heap and
+  retries when precompiled header creation fails due to heap exhaustion.
+  The ``pch_status`` property reports the PCH creation outcome
+  (``"created"``, ``"not_attempted"``, ``"failed"``, or ``None``).
+
 
 Fixes and enhancements
 ----------------------
diff --git a/cuda_core/docs/source/release/0.7.x-notes.rst b/cuda_core/docs/source/release/0.7.x-notes.rst
new file mode 100644
index 0000000000..032f5a7005
--- /dev/null
+++ b/cuda_core/docs/source/release/0.7.x-notes.rst
@@ -0,0 +1,57 @@
+.. SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+.. SPDX-License-Identifier: Apache-2.0
+
+.. currentmodule:: cuda.core
+
+``cuda.core`` 0.7.x Release Notes
+=================================
+
+
+Highlights
+----------
+
+None.
+
+
+Breaking Changes
+----------------
+
+None.
+
+
+New features
+------------
+
+- Added ``preferred_location_type`` option to :class:`ManagedMemoryResourceOptions`
+  for explicit control over the preferred location kind (``"device"``,
+  ``"host"``, or ``"host_numa"``). This enables NUMA-aware managed memory
+  pool placement. The existing ``preferred_location`` parameter retains full
+  backwards compatibility when ``preferred_location_type`` is not set.
+
+- Added :attr:`ManagedMemoryResource.preferred_location` property to query the
+  resolved preferred location of a managed memory pool. Returns ``None`` for no
+  preference, or a tuple such as ``("device", 0)``, ``("host", None)``, or
+  ``("host_numa", 3)``.
+
+- Added ``numa_id`` option to :class:`PinnedMemoryResourceOptions` for explicit
+  control over host NUMA node placement. When ``ipc_enabled=True`` and
+  ``numa_id`` is not set, the NUMA node is automatically derived from the
+  current CUDA device.
+
+- Added :attr:`PinnedMemoryResource.numa_id` property to query the host NUMA
+  node ID used for pool placement. Returns ``-1`` for OS-managed placement.
+
+
+New examples
+------------
+
+None.
+
+
+Fixes and enhancements
+----------------------
+
+- Fixed IPC-enabled pinned memory pools using a hardcoded NUMA node ID of ``0``
+  instead of the NUMA node closest to the active CUDA device. On multi-NUMA
+  systems where the device is attached to a non-zero host NUMA node, this could
+  cause pool creation or allocation failures. (:issue:`1603`)
diff --git a/cuda_core/pyproject.toml b/cuda_core/pyproject.toml
index b08f435031..a2828a9274 100644
--- a/cuda_core/pyproject.toml
+++ b/cuda_core/pyproject.toml
@@ -57,12 +57,12 @@ cu13 = ["cuda-bindings[all]==13.*"]
 [dependency-groups]
 test = ["cython>=3.2,<3.3", "setuptools", "pytest>=6.2.4", "pytest-randomly", "pytest-repeat", "pytest-rerunfailures"]
 ml-dtypes = ["ml-dtypes>=0.5.4,<0.6.0"]
-test-cu12 = [ {include-group = "ml-dtypes" }, "cuda-core[test]", "cupy-cuda12x; python_version < '3.14'", "cuda-toolkit[cudart]==12.*"]  # runtime headers needed by CuPy
-test-cu13 = [ {include-group = "ml-dtypes" }, "cuda-core[test]", "cupy-cuda13x; python_version < '3.14'", "cuda-toolkit[cudart]==13.*"]  # runtime headers needed by CuPy
+test-cu12 = [ {include-group = "ml-dtypes" }, {include-group = "test" }, "cupy-cuda12x; python_version < '3.14'", "cuda-toolkit[cudart]==12.*"]  # runtime headers needed by CuPy
+test-cu13 = [ {include-group = "ml-dtypes" }, {include-group = "test" }, "cupy-cuda13x; python_version < '3.14'", "cuda-toolkit[cudart]==13.*"]  # runtime headers needed by CuPy
 # free threaded build, cupy doesn't support free-threaded builds yet, so avoid installing it for now
 # TODO: cupy should support free threaded builds
-test-cu12-ft = [ {include-group = "ml-dtypes" }, "cuda-core[test]", "cuda-toolkit[cudart]==12.*"]
-test-cu13-ft = [ {include-group = "ml-dtypes" }, "cuda-core[test]", "cuda-toolkit[cudart]==13.*"]
+test-cu12-ft = [ {include-group = "ml-dtypes" }, {include-group = "test" }, "cuda-toolkit[cudart]==12.*"]
+test-cu13-ft = [ {include-group = "ml-dtypes" }, {include-group = "test" }, "cuda-toolkit[cudart]==13.*"]
 
 [project.urls]
 homepage = "https://nvidia.github.io/cuda-python/"
diff --git a/cuda_core/tests/conftest.py b/cuda_core/tests/conftest.py
index 4e1500b491..df20d76aed 100644
--- a/cuda_core/tests/conftest.py
+++ b/cuda_core/tests/conftest.py
@@ -57,6 +57,12 @@ def skip_if_managed_memory_unsupported(device):
             pytest.skip("Device does not support managed memory pool operations")
     except AttributeError:
         pytest.skip("ManagedMemoryResource requires CUDA 13.0 or later")
+    try:
+        ManagedMemoryResource()
+    except RuntimeError as e:
+        if "requires CUDA 13.0" in str(e):
+            pytest.skip("ManagedMemoryResource requires CUDA 13.0 or later")
+        raise
 
 
 def create_managed_memory_resource_or_skip(*args, **kwargs):
diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
index 8933dcba09..0f63d9b9f8 100644
--- a/cuda_core/tests/test_memory.py
+++ b/cuda_core/tests/test_memory.py
@@ -998,6 +998,155 @@ def test_managed_memory_resource_with_options(init_cuda):
     src_buffer.close()
 
 
+def test_managed_memory_resource_preferred_location_default(init_cuda):
+    """preferred_location property returns None when no preference is set."""
+    device = Device()
+    skip_if_managed_memory_unsupported(device)
+    device.set_current()
+
+    mr = create_managed_memory_resource_or_skip()
+    assert mr.preferred_location is None
+
+
+def test_managed_memory_resource_preferred_location_device(init_cuda):
+    """preferred_location returns ("device", ordinal) for device preference."""
+    device = Device()
+    skip_if_managed_memory_unsupported(device)
+    device.set_current()
+
+    # Legacy style
+    opts = ManagedMemoryResourceOptions(preferred_location=device.device_id)
+    mr = create_managed_memory_resource_or_skip(opts)
+    assert mr.preferred_location == ("device", device.device_id)
+
+    # Explicit style
+    opts = ManagedMemoryResourceOptions(
+        preferred_location=device.device_id,
+        preferred_location_type="device",
+    )
+    mr = create_managed_memory_resource_or_skip(opts)
+    assert mr.preferred_location == ("device", device.device_id)
+
+
+def test_managed_memory_resource_preferred_location_host(init_cuda):
+    """preferred_location returns ("host", None) for host preference."""
+    device = Device()
+    skip_if_managed_memory_unsupported(device)
+    device.set_current()
+
+    # Legacy style
+    opts = ManagedMemoryResourceOptions(preferred_location=-1)
+    mr = create_managed_memory_resource_or_skip(opts)
+    assert mr.preferred_location == ("host", None)
+
+    # Explicit style
+    opts = ManagedMemoryResourceOptions(preferred_location_type="host")
+    mr = create_managed_memory_resource_or_skip(opts)
+    assert mr.preferred_location == ("host", None)
+
+
+def test_managed_memory_resource_preferred_location_host_numa(init_cuda):
+    """preferred_location returns ("host_numa", id) for NUMA preference."""
+    device = Device()
+    skip_if_managed_memory_unsupported(device)
+    device.set_current()
+
+    numa_id = device.properties.host_numa_id
+    if numa_id < 0:
+        pytest.skip("System does not support NUMA")
+
+    # Auto-resolved from current device
+    opts = ManagedMemoryResourceOptions(preferred_location_type="host_numa")
+    mr = create_managed_memory_resource_or_skip(opts)
+    assert mr.preferred_location == ("host_numa", numa_id)
+
+    # Explicit NUMA node ID
+    opts = ManagedMemoryResourceOptions(
+        preferred_location=numa_id,
+        preferred_location_type="host_numa",
+    )
+    mr = create_managed_memory_resource_or_skip(opts)
+    assert mr.preferred_location == ("host_numa", numa_id)
+
+
+def test_managed_memory_resource_preferred_location_validation(init_cuda):
+    """Invalid preferred_location combinations raise errors."""
+    device = Device()
+    skip_if_managed_memory_unsupported(device)
+    device.set_current()
+
+    # Invalid preferred_location_type
+    with pytest.raises(ValueError, match="preferred_location_type must be one of"):
+        ManagedMemoryResource(
+            ManagedMemoryResourceOptions(
+                preferred_location_type="invalid",
+            )
+        )
+
+    # "device" requires a non-negative int
+    with pytest.raises(ValueError, match="must be a device ordinal"):
+        ManagedMemoryResource(
+            ManagedMemoryResourceOptions(
+                preferred_location_type="device",
+            )
+        )
+    with pytest.raises(ValueError, match="must be a device ordinal"):
+        ManagedMemoryResource(
+            ManagedMemoryResourceOptions(
+                preferred_location=-1,
+                preferred_location_type="device",
+            )
+        )
+
+    # "host" requires preferred_location=None
+    with pytest.raises(ValueError, match="must be None"):
+        ManagedMemoryResource(
+            ManagedMemoryResourceOptions(
+                preferred_location=0,
+                preferred_location_type="host",
+            )
+        )
+
+    # "host_numa" rejects negative IDs
+    with pytest.raises(ValueError, match="must be a NUMA node ID"):
+        ManagedMemoryResource(
+            ManagedMemoryResourceOptions(
+                preferred_location=-1,
+                preferred_location_type="host_numa",
+            )
+        )
+
+    # Legacy mode rejects invalid negative values
+    with pytest.raises(ValueError, match="preferred_location must be"):
+        ManagedMemoryResource(
+            ManagedMemoryResourceOptions(
+                preferred_location=-2,
+            )
+        )
+
+
+def test_managed_memory_resource_host_numa_auto_resolve_failure(init_cuda):
+    """host_numa with None raises RuntimeError when NUMA ID cannot be determined."""
+    from unittest.mock import MagicMock, patch
+
+    device = Device()
+    skip_if_managed_memory_unsupported(device)
+    device.set_current()
+
+    mock_dev = MagicMock()
+    mock_dev.properties.host_numa_id = -1
+
+    with (
+        patch("cuda.core._device.Device", return_value=mock_dev),
+        pytest.raises(RuntimeError, match="Cannot determine host NUMA ID"),
+    ):
+        ManagedMemoryResource(
+            ManagedMemoryResourceOptions(
+                preferred_location_type="host_numa",
+            )
+        )
+
+
 def test_mempool_ipc_errors(mempool_device):
     """Test error cases when IPC operations are disabled."""
     device = mempool_device
diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py
index edf249eb60..0005777b52 100644
--- a/cuda_core/tests/test_program.py
+++ b/cuda_core/tests/test_program.py
@@ -57,6 +57,22 @@ def _get_nvrtc_version_for_tests():
         return None
 
 
+def _has_nvrtc_pch_apis_for_tests():
+    required = (
+        "nvrtcGetPCHHeapSize",
+        "nvrtcSetPCHHeapSize",
+        "nvrtcGetPCHCreateStatus",
+        "nvrtcGetPCHHeapSizeRequired",
+    )
+    return all(hasattr(nvrtc, name) for name in required)
+
+
+nvrtc_pch_available = pytest.mark.skipif(
+    (_get_nvrtc_version_for_tests() or 0) < 12800 or not _has_nvrtc_pch_apis_for_tests(),
+    reason="PCH runtime APIs require NVRTC >= 12.8 bindings",
+)
+
+
 _libnvvm_version = None
 _libnvvm_version_attempted = False
 
@@ -316,6 +332,25 @@ def test_cpp_program_with_pch_options(init_cuda, tmp_path):
         program.close()
 
 
+@nvrtc_pch_available
+def test_cpp_program_pch_auto_creates(init_cuda, tmp_path):
+    code = 'extern "C" __global__ void my_kernel() {}'
+    pch_path = str(tmp_path / "test.pch")
+    program = Program(code, "c++", ProgramOptions(create_pch=pch_path))
+    assert program.pch_status is None  # not compiled yet
+    program.compile("ptx")
+    assert program.pch_status in ("created", "not_attempted", "failed")
+    program.close()
+
+
+def test_cpp_program_pch_status_none_without_pch(init_cuda):
+    code = 'extern "C" __global__ void my_kernel() {}'
+    program = Program(code, "c++")
+    program.compile("ptx")
+    assert program.pch_status is None
+    program.close()
+
+
 options = [
     ProgramOptions(max_register_count=32),
     ProgramOptions(debug=True),
diff --git a/pytest.ini b/pytest.ini
index 0543760cd7..978e659bf0 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -19,3 +19,4 @@ markers =
     core: tests for cuda_core
     cython: cython tests
     smoke: meta-level smoke tests
+    flaky: mark test as flaky (provided by pytest-rerunfailures)

From c7f85ff825ec3dd7524a2ac0baebd2d872ddf00d Mon Sep 17 00:00:00 2001
From: Andy Jost <ajost@nvidia.com>
Date: Wed, 4 Mar 2026 09:55:45 -0800
Subject: [PATCH 4/4] Remove redundant Python-side peer access cleanup; fix
 peer access tests

- Remove __dealloc__ and close() override from DeviceMemoryResource
  that cleared peer access before destruction. The C++ RAII deleter
  already handles this for owned pools (nvbug 5698116 workaround).
  For non-owned pools (default device pool), clearing peer access
  on handle disposal was incorrect behavior.

- Update peer access tests to use owned pools (DeviceMemoryResourceOptions())
  instead of default pools. Default pools are shared and may have stale
  peer access state from prior tests, causing test failures.

Made-with: Cursor
---
 .../cuda/core/_memory/_device_memory_resource.pyx  | 14 --------------
 cuda_core/tests/test_memory_peer_access.py         | 12 ++++++++----
 2 files changed, 8 insertions(+), 18 deletions(-)

diff --git a/cuda_core/cuda/core/_memory/_device_memory_resource.pyx b/cuda_core/cuda/core/_memory/_device_memory_resource.pyx
index 09aa482234..1299f1bd57 100644
--- a/cuda_core/cuda/core/_memory/_device_memory_resource.pyx
+++ b/cuda_core/cuda/core/_memory/_device_memory_resource.pyx
@@ -135,20 +135,6 @@ cdef class DeviceMemoryResource(_MemPool):
     def __init__(self, device_id: Device | int, options=None):
         _DMR_init(self, device_id, options)
 
-    def __dealloc__(self):
-        try:
-            self.close()
-        except Exception:
-            pass
-
-    def close(self):
-        """Close the memory resource, revoking peer access before destruction."""
-        # nvbug 5698116: clear peer access before pool destruction; also
-        # needed for non-owned (default) pools to undo modifications.
-        if self._peer_accessible_by:
-            _DMR_set_peer_accessible_by(self, [])
-        super().close()
-
     def __reduce__(self):
         return DeviceMemoryResource.from_registry, (self.uuid,)
 
diff --git a/cuda_core/tests/test_memory_peer_access.py b/cuda_core/tests/test_memory_peer_access.py
index bcae9576da..99426391db 100644
--- a/cuda_core/tests/test_memory_peer_access.py
+++ b/cuda_core/tests/test_memory_peer_access.py
@@ -3,7 +3,7 @@
 
 import cuda.core
 import pytest
-from cuda.core import DeviceMemoryResource
+from cuda.core import DeviceMemoryResource, DeviceMemoryResourceOptions
 from cuda.core._utils.cuda_utils import CUDAError
 from helpers.buffers import PatternGen, compare_buffer_to_constant, make_scratch_buffer
 
@@ -16,7 +16,8 @@ def test_peer_access_basic(mempool_device_x2):
     zero_on_dev0 = make_scratch_buffer(dev0, 0, NBYTES)
     one_on_dev0 = make_scratch_buffer(dev0, 1, NBYTES)
     stream_on_dev0 = dev0.create_stream()
-    dmr_on_dev1 = DeviceMemoryResource(dev1)
+    # Use owned pool to ensure clean initial state (no stale peer access).
+    dmr_on_dev1 = DeviceMemoryResource(dev1, DeviceMemoryResourceOptions())
     buf_on_dev1 = dmr_on_dev1.allocate(NBYTES)
 
     # No access at first.
@@ -51,7 +52,8 @@ def test_peer_access_property_x2(mempool_device_x2):
     # The peer access list is a sorted tuple and always excludes the self
     # device.
     dev0, dev1 = mempool_device_x2
-    dmr = DeviceMemoryResource(dev0)
+    # Use owned pool to ensure clean initial state (no stale peer access).
+    dmr = DeviceMemoryResource(dev0, DeviceMemoryResourceOptions())
 
     def check(expected):
         assert isinstance(dmr.peer_accessible_by, tuple)
@@ -97,7 +99,9 @@ def test_peer_access_transitions(mempool_device_x3):
     # Allocate per-device resources.
     streams = [dev.create_stream() for dev in devs]
     pgens = [PatternGen(devs[i], NBYTES, streams[i]) for i in range(3)]
-    dmrs = [DeviceMemoryResource(dev) for dev in devs]
+    # Use owned pools (with options) to ensure clean initial state.
+    # Default pools are shared and may have stale peer access from prior tests.
+    dmrs = [DeviceMemoryResource(dev, DeviceMemoryResourceOptions()) for dev in devs]
     bufs = [dmr.allocate(NBYTES) for dmr in dmrs]
 
     def verify_state(state, pattern_seed):