Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
14 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions exir/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,17 @@ class TensorDataLocation(IntEnum):
EXTERNAL = 1


class DeviceType(IntEnum):
"""
Device type enum indicating where a tensor resides or should be allocated.
Note that this enum is not directly mapped to the DeviceType enum in pytorch/pytorch
Check program.fbs for explanations of this enum.
"""

CPU = 0
CUDA = 1


@dataclass
class ExtraTensorInfo:
"""
Expand All @@ -57,6 +68,12 @@ class ExtraTensorInfo:
mutable_data_segments_idx: int = 0
fully_qualified_name: Optional[str] = None
location: TensorDataLocation = TensorDataLocation.SEGMENT
# Device type where this tensor resides or should be allocated.
# Defaults to CPU for backward compatibility.
device_type: DeviceType = DeviceType.CPU
# Device index for multi-device scenarios (e.g., cuda:0, cuda:1).
# A value of -1 indicates the default device.
device_index: int = -1
Comment thread
lucylq marked this conversation as resolved.
Outdated


@dataclass
Expand Down Expand Up @@ -261,6 +278,26 @@ class Operator:
overload: str


@dataclass
class NonConstBufferDevice:
"""
Device placement information for a non-constant memory buffer.
This is a sparse representation: only buffers that are NOT on CPU need entries.
Buffers not listed in ExecutionPlan.non_const_buffer_device default to CPU.
Check program.fbs for explanations.
"""

# Index into ExecutionPlan.non_const_buffer_sizes identifying which buffer
# this entry applies to.
buffer_index: int
# The device type where this buffer should be allocated.
# Defaults to CPU for backward compatibility.
device_type: DeviceType = DeviceType.CPU
# The device index for multi-device scenarios (e.g., cuda:0, cuda:1).
# A value of -1 indicates the default device.
device_index: int = -1


@dataclass
class ExecutionPlan:
name: str
Expand All @@ -276,6 +313,12 @@ class ExecutionPlan:
# Runtime should use the len(constant_buffer) as the ground truch of
# constant memory buffer size, and ignore non_const_buffer_sizes[0].
non_const_buffer_sizes: List[int]
# [Optional] Sparse device placement information for non-constant buffers.
# Only buffers that are NOT on CPU need to be listed here. Each entry
# specifies a buffer_index (into non_const_buffer_sizes) and its device.
# Buffers not listed here default to CPU, saving binary size when most
# buffers are on CPU.
non_const_buffer_device: Optional[List[NonConstBufferDevice]] = None


@dataclass
Expand Down
53 changes: 53 additions & 0 deletions schema/program.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,25 @@ enum TensorDataLocation : byte {
EXTERNAL = 1,
}

// Device type enum indicating where a tensor resides or should be allocated.
// Follows PyTorch DeviceType convention for compatibility.
enum DeviceType : byte {
CPU = 0,
CUDA = 1,
// Reserve slots for future device types following PyTorch convention:
// MKLDNN = 2,
// OPENGL = 3,
// OPENCL = 4,
// IDEEP = 5,
// HIP = 6,
// FPGA = 7,
// MAIA = 8,
// XLA = 9,
// MPS = 10,
// XPU = 11,
// PrivateUse1 = 12,
}

// Table to put additional information about tensors in that is not applicable
// to the vast majority of tensors in the vast majority of programs.
table ExtraTensorInfo {
Expand All @@ -79,6 +98,15 @@ table ExtraTensorInfo {
// must be non-empty, and is used as a key to find the tensor's external
// data. Tensor.data_buffer_idx is ignored.
location: TensorDataLocation;

// [Optional] The device type where this tensor resides or should be allocated.
// Defaults to CPU for backward compatibility with existing PTE files.
device_type: DeviceType = CPU;

// [Optional] The device index for multi-device scenarios (e.g., cuda:0, cuda:1).
// A value of -1 indicates the default device. Defaults to -1 for backward
// compatibility.
device_index: byte = -1;
}

table Tensor {
Expand Down Expand Up @@ -386,6 +414,13 @@ table ExecutionPlan {
// constants memory buffer size, and ignore non_const_buffer_sizes[0].
non_const_buffer_sizes: [int64];

// [Optional] Sparse device placement information for non-constant buffers.
// Only buffers that are NOT on CPU need to be listed here. Each entry
// specifies a buffer_index (into non_const_buffer_sizes) and its device.
// Buffers not listed here default to CPU, saving binary size when most
// buffers are on CPU.
non_const_buffer_device: [NonConstBufferDevice];

}

// Constant tensor data stored directly in the flatbuffer.
Expand All @@ -406,6 +441,24 @@ table BackendDelegateInlineData {
data: [ubyte] (force_align: 16); // @executorch-delegate-alignment
}

// Device placement information for a non-constant memory buffer.
// This is a sparse representation: only buffers that are NOT on CPU need entries.
// Buffers not listed in ExecutionPlan.non_const_buffer_device default to CPU.
table NonConstBufferDevice {
// Index into ExecutionPlan.non_const_buffer_sizes identifying which buffer
// this entry applies to.
buffer_index: uint32;

// The device type where this buffer should be allocated.
// Defaults to CPU for backward compatibility with existing PTE files.
device_type: DeviceType = CPU;

// The device index for multi-device scenarios (e.g., cuda:0, cuda:1).
// A value of -1 indicates the default device. Defaults to -1 for backward
// compatibility.
device_index: byte = -1;
}

// Describes a contiguous piece of data that lives outside of the flatbuffer data,
// typically appended afterwards in the file. The "extended header" in the file,
// when present, points to the segment base offset.
Expand Down
Loading