pytorch · meta-codesync · Mar 21, 2026 · Feb 18, 2026 · Feb 18, 2026 · Mar 10, 2026
@@ -48,6 +48,17 @@ class TensorDataLocation(IntEnum):
     EXTERNAL = 1
 
 
+class DeviceType(IntEnum):
+    """
+    Device type enum indicating where a tensor resides or should be allocated.
+    Note that this enum is not directly mapped to the DeviceType enum in pytorch/pytorch
+    Check program.fbs for explanations of this enum.
+    """
+
+    CPU = 0
+    CUDA = 1
+
+
 @dataclass
 class ExtraTensorInfo:
     """
@@ -57,6 +68,12 @@ class ExtraTensorInfo:
     mutable_data_segments_idx: int = 0
     fully_qualified_name: Optional[str] = None
     location: TensorDataLocation = TensorDataLocation.SEGMENT
+    # Device type where this tensor resides or should be allocated.
+    # Defaults to CPU for backward compatibility.
+    device_type: DeviceType = DeviceType.CPU
+    # Device index for multi-device scenarios (e.g., cuda:0, cuda:1).
+    # A value of -1 indicates the default device.
+    device_index: int = -1
 
 
 @dataclass
@@ -261,6 +278,26 @@ class Operator:
     overload: str
 
 
+@dataclass
+class NonConstBufferDevice:
+    """
+    Device placement information for a non-constant memory buffer.
+    This is a sparse representation: only buffers that are NOT on CPU need entries.
+    Buffers not listed in ExecutionPlan.non_const_buffer_device default to CPU.
+    Check program.fbs for explanations.
+    """
+
+    # Index into ExecutionPlan.non_const_buffer_sizes identifying which buffer
+    # this entry applies to.
+    buffer_index: int
+    # The device type where this buffer should be allocated.
+    # Defaults to CPU for backward compatibility.
+    device_type: DeviceType = DeviceType.CPU
+    # The device index for multi-device scenarios (e.g., cuda:0, cuda:1).
+    # A value of -1 indicates the default device.
+    device_index: int = -1
+
+
 @dataclass
 class ExecutionPlan:
     name: str
@@ -276,6 +313,12 @@ class ExecutionPlan:
     # Runtime should use the len(constant_buffer) as the ground truch of
     # constant memory buffer size, and ignore non_const_buffer_sizes[0].
     non_const_buffer_sizes: List[int]
+    # [Optional] Sparse device placement information for non-constant buffers.
+    # Only buffers that are NOT on CPU need to be listed here. Each entry
+    # specifies a buffer_index (into non_const_buffer_sizes) and its device.
+    # Buffers not listed here default to CPU, saving binary size when most
+    # buffers are on CPU.
+    non_const_buffer_device: Optional[List[NonConstBufferDevice]] = None
 
 
 @dataclass

@@ -61,6 +61,25 @@ enum TensorDataLocation : byte {
   EXTERNAL = 1,
 }
 
+// Device type enum indicating where a tensor resides or should be allocated.
+// Follows PyTorch DeviceType convention for compatibility.
+enum DeviceType : byte {
+  CPU = 0,
+  CUDA = 1,
+  // Reserve slots for future device types following PyTorch convention:
+  // MKLDNN = 2,
+  // OPENGL = 3,
+  // OPENCL = 4,
+  // IDEEP = 5,
+  // HIP = 6,
+  // FPGA = 7,
+  // MAIA = 8,
+  // XLA = 9,
+  // MPS = 10,
+  // XPU = 11,
+  // PrivateUse1 = 12,
+}
+
 // Table to put additional information about tensors in that is not applicable
 // to the vast majority of tensors in the vast majority of programs.
 table ExtraTensorInfo {
@@ -79,6 +98,15 @@ table ExtraTensorInfo {
   //   must be non-empty, and is used as a key to find the tensor's external
   //   data. Tensor.data_buffer_idx is ignored.
   location: TensorDataLocation;
+
+  // [Optional] The device type where this tensor resides or should be allocated.
+  // Defaults to CPU for backward compatibility with existing PTE files.
+  device_type: DeviceType = CPU;
+
+  // [Optional] The device index for multi-device scenarios (e.g., cuda:0, cuda:1).
+  // A value of -1 indicates the default device. Defaults to -1 for backward
+  // compatibility.
+  device_index: byte = -1;
 }
 
 table Tensor {
@@ -386,6 +414,13 @@ table ExecutionPlan {
   // constants memory buffer size, and ignore non_const_buffer_sizes[0].
   non_const_buffer_sizes: [int64];
 
+  // [Optional] Sparse device placement information for non-constant buffers.
+  // Only buffers that are NOT on CPU need to be listed here. Each entry
+  // specifies a buffer_index (into non_const_buffer_sizes) and its device.
+  // Buffers not listed here default to CPU, saving binary size when most
+  // buffers are on CPU.
+  non_const_buffer_device: [NonConstBufferDevice];
+
 }
 
 // Constant tensor data stored directly in the flatbuffer.
@@ -406,6 +441,24 @@ table BackendDelegateInlineData {
   data: [ubyte] (force_align: 16);  // @executorch-delegate-alignment
 }
 
+// Device placement information for a non-constant memory buffer.
+// This is a sparse representation: only buffers that are NOT on CPU need entries.
+// Buffers not listed in ExecutionPlan.non_const_buffer_device default to CPU.
+table NonConstBufferDevice {
+  // Index into ExecutionPlan.non_const_buffer_sizes identifying which buffer
+  // this entry applies to.
+  buffer_index: uint32;
+
+  // The device type where this buffer should be allocated.
+  // Defaults to CPU for backward compatibility with existing PTE files.
+  device_type: DeviceType = CPU;
+
+  // The device index for multi-device scenarios (e.g., cuda:0, cuda:1).
+  // A value of -1 indicates the default device. Defaults to -1 for backward
+  // compatibility.
+  device_index: byte = -1;
+}
+
 // Describes a contiguous piece of data that lives outside of the flatbuffer data,
 // typically appended afterwards in the file. The "extended header" in the file,
 // when present, points to the segment base offset.