Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions gpu/mse.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,13 @@
# DEALINGS IN THE SOFTWARE.
#

import atexit
from logging import debug

from utils import NiceStruct

from .mnoc import GpuMnoc

from logging import debug
import atexit

class MseHeader(NiceStruct):
_fields_ = [
Expand Down Expand Up @@ -107,7 +109,7 @@ def send_cmd(self, cmd_class, cmd_opcode, cmd_data, reset=False):
mse_header.ssid = 9
mse_header.dsid = 4

data_to_send = mse_header.to_int_array() + cmd_data
data_to_send = mse_header.to_int_list() + cmd_data

self.mnoc.send_data(data_to_send)

Expand All @@ -118,7 +120,7 @@ def process_incoming(self):

while True:
resp_data = self.mnoc.receive_data()
mse_header.from_int_array(resp_data[:4])
mse_header.from_ints(resp_data[:4])
if mse_header.is_response:
return resp_data[4:]
else:
Expand Down Expand Up @@ -166,6 +168,5 @@ def goodbye(self):
def get_platform_info(self):
platform_info_raw = self.send_cmd(2, 0x30, [])
platform_info = GetPlatformInfoRsp()
platform_info.from_int_array(platform_info_raw)
platform_info.from_ints(platform_info_raw)
return platform_info

10 changes: 5 additions & 5 deletions nvidia_gpu_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#

from __future__ import print_function
import array
import collections
import time
import sys
Expand All @@ -32,7 +33,7 @@
from pathlib import Path

from utils import platform_config
from utils import data_from_int, ints_from_bytearray, read_ints_from_path
from utils import data_from_int, array_view_from_bytearray, read_ints_from_path
from utils import formatted_tuple_from_data
from gpu.defines import *
from pci.defines import *
Expand Down Expand Up @@ -3040,14 +3041,13 @@ def is_driver_loaded(self):
return False

def dump_bar0(self):
bar0_data = bytearray()
bar0_data_array = array.array('I')
for offset in range(0, self.bar0_size, 4):
if offset % (128 * 1024) == 0:
debug("Dumped %d bytes so far", offset)
data = self.bar0.read32(offset)
bar0_data.extend(data_from_int(data, 4))

return bar0_data
bar0_data_array.append(data)
return memoryview(bar0_data_array.tobytes()).toreadonly()

def flr_resettable_scratch(self):
return 0xdfe0
Expand Down
3 changes: 3 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[pytest]
markers =
performance: Lightweight throughput checks for core utils
102 changes: 102 additions & 0 deletions tests/test_ints_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import array
import struct
import time
from collections import namedtuple

import pytest

from utils.formatted_tuple import FormattedTuple
from utils.ints_to_bytes import (
array_view_from_bytearray,
bytearray_view_from_ints,
data_from_int,
int_from_data,
ints_from_data,
)
from utils.nice_struct import NiceStruct


class DemoStruct(NiceStruct):
name = "DemoStruct"
_fields_ = [
("a", "I"),
("b", "I"),
]


class DemoFormattedTuple(FormattedTuple):
namedtuple = namedtuple("DemoTuple", ["value"])
struct = struct.Struct("<I")


def test_ints_from_data_roundtrip():
raw = bytes(range(32))
ints = ints_from_data(raw, 4)
assert ints == [int.from_bytes(raw[i : i + 4], "little") for i in range(0, len(raw), 4)]
rebuilt = bytearray()
for value in ints:
rebuilt.extend(data_from_int(value, 4))
assert rebuilt == raw


def test_ints_from_data_rejects_partial_chunks():
with pytest.raises(struct.error):
ints_from_data(b"\x01\x02\x03", 2)


def test_int_from_data_validates_length():
assert int_from_data(b"\x01\x02\x03\x04", 4) == 0x04030201
with pytest.raises(struct.error):
int_from_data(b"\x01\x02", 4)


def test_int_helpers_reject_unknown_sizes():
with pytest.raises(AssertionError):
ints_from_data(b"\x01\x02\x03", 3)
with pytest.raises(AssertionError):
data_from_int(0x01, 3)


def test_bytearray_view_from_int_array_accepts_iterables():
values = (0x01020304, 0x05060708)
view_from_tuple = bytearray_view_from_ints(values)
assert isinstance(view_from_tuple, memoryview)
assert view_from_tuple.readonly
assert view_from_tuple.tolist() == [4, 3, 2, 1, 8, 7, 6, 5]

gen_values = (i for i in values)
view_from_gen = bytearray_view_from_ints(gen_values)
assert view_from_gen.tolist() == [4, 3, 2, 1, 8, 7, 6, 5]


def test_array_view_from_bytearray_returns_int_view():
as_ints = array.array("I", [0x01020304, 0x05060708])
mv = array_view_from_bytearray(as_ints.tobytes())
assert isinstance(mv, memoryview)
assert mv.readonly
assert list(mv) == list(as_ints)


def test_nice_struct_from_int_array_with_non_list():
values = (0x01020304, 0x05060708)
instance = DemoStruct()
instance.from_ints(values)
assert instance.a == values[0]
assert instance.b == values[1]

instance.from_ints(i for i in values)
assert instance.a == values[0]
assert instance.b == values[1]

assert instance.to_int_list() == list(values)


def test_formatted_tuple_make_accepts_sequence():
result = DemoFormattedTuple._make([1, 2, 3, 4])
assert result.value == 0x04030201

result = DemoFormattedTuple._make(bytearray(b"\x01\x02\x03\x04"))
assert result.value == 0x04030201

with pytest.raises(struct.error):
DemoFormattedTuple._make(b"\x01\x02")
138 changes: 138 additions & 0 deletions tests/test_ints_performance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import struct
import time

import pytest

# New, optimized implementations from the codebase
from utils.ints_to_bytes import bytearray_view_from_ints as bytearray_view_from_ints_new
from utils.ints_to_bytes import data_from_int as data_from_int_new
from utils.ints_to_bytes import int_from_data as int_from_data_new
from utils.ints_to_bytes import ints_from_data as ints_from_data_new

# --- Original implementations (pre-optimization) for comparison ---


def _struct_fmt_original(size):
if size == 1:
return "B"
elif size == 2:
return "=H"
elif size == 4:
return "=I"
elif size == 8:
return "=Q"
else:
assert 0, f"Unhandled size {size}"


def ints_from_data_original(data, size):
fmt = _struct_fmt_original(size)
data = bytes(data)
ints = []
for offset in range(0, len(data), size):
ints.append(struct.unpack(fmt, data[offset : offset + size])[0])
return ints


def int_from_data_original(data, size):
fmt = _struct_fmt_original(size)
return struct.unpack(fmt, bytes(data))[0]


def data_from_int_original(integer, size=4):
fmt = _struct_fmt_original(size)
return struct.pack(fmt, integer)


def bytearray_from_ints_original(array_of_ints, size=4):
ba = bytearray()
for i in array_of_ints:
ba.extend(data_from_int_original(i, size))
return ba


# ----------------------------------------------------------------


@pytest.mark.performance
def test_ints_from_data_performance_comparison():
chunk = bytes(range(256)) * 1024 # 256 KiB
iterations = 100

start_original = time.perf_counter()
for _ in range(iterations):
ints_from_data_original(chunk, 4)
duration_original = time.perf_counter() - start_original

start_new = time.perf_counter()
for _ in range(iterations):
ints_from_data_new(chunk, 4)
duration_new = time.perf_counter() - start_new

print(f"\nPerformance for ints_from_data ({iterations} iterations on 256KiB chunk):")
print(f" - Original (struct): {duration_original:.4f}s")
print(f" - New (int.from_bytes): {duration_new:.4f}s")
assert duration_new < duration_original


@pytest.mark.performance
def test_int_from_data_performance_comparison():
data = b"\xde\xad\xbe\xef"
iterations = 500000

start_original = time.perf_counter()
for _ in range(iterations):
int_from_data_original(data, 4)
duration_original = time.perf_counter() - start_original

start_new = time.perf_counter()
for _ in range(iterations):
int_from_data_new(data, 4)
duration_new = time.perf_counter() - start_new

print(f"\nPerformance for int_from_data ({iterations} iterations):")
print(f" - Original (struct): {duration_original:.4f}s")
print(f" - New (int.from_bytes): {duration_new:.4f}s")
assert duration_new < duration_original


@pytest.mark.performance
def test_data_from_int_performance_comparison():
integer = 0xDEADBEEF
iterations = 500000

start_original = time.perf_counter()
for _ in range(iterations):
data_from_int_original(integer, 4)
duration_original = time.perf_counter() - start_original

start_new = time.perf_counter()
for _ in range(iterations):
data_from_int_new(integer, 4)
duration_new = time.perf_counter() - start_new

print(f"\nPerformance for data_from_int ({iterations} iterations):")
print(f" - Original (struct): {duration_original:.4f}s")
print(f" - New (int.to_bytes): {duration_new:.4f}s")
assert duration_new < duration_original


@pytest.mark.performance
def test_array_creation_performance_comparison():
int_list = list(range(1024 * 10)) # 10k integers
iterations = 100

start_original = time.perf_counter()
for _ in range(iterations):
bytearray_from_ints_original(int_list, 4)
duration_original = time.perf_counter() - start_original

start_new = time.perf_counter()
for _ in range(iterations):
bytearray_view_from_ints_new(int_list)
duration_new = time.perf_counter() - start_new

print(f"\nPerformance for array creation ({iterations} iterations on 10k ints):")
print(f" - Original (bytearray extend): {duration_original:.4f}s")
print(f" - New (array.extend + memoryview): {duration_new:.4f}s")
assert duration_new < duration_original
11 changes: 7 additions & 4 deletions utils/formatted_tuple.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,12 @@
# DEALINGS IN THE SOFTWARE.
#


def formatted_tuple_from_data(fmtTuple, data, offset=0):
size = fmtTuple._size()
return fmtTuple._make(data[offset : offset + size])


class FormattedTuple(object):
namedtuple = None
struct = None
Expand All @@ -34,10 +36,11 @@ def post_make(cls, instance):
return instance

@classmethod
def _make(cls, data):
size = cls._size()
# Wrap data in bytes() for python 2.6 compatibility
instance = cls.namedtuple._make(cls.struct.unpack_from(bytes(data)))
def _make(cls, data: object):
from .ints_to_bytes import _byte_view # local import to avoid cycle

buffer = _byte_view(data)
instance = cls.namedtuple._make(cls.struct.unpack_from(buffer))
return cls.post_make(instance)

@classmethod
Expand Down
Loading