Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions superbench/common/utils/device_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,10 +389,17 @@ def get_device_power(self, idx):
"""
try:
power_measure = rocml.amdsmi_get_power_info(self._device_handlers[idx])
# amdsmi sets fields to 'N/A' when the hardware reports 0xFFFF (unsupported).
# On MI300X, average_socket_power is unsupported, so fall back to current_socket_power.
power = power_measure.get('average_socket_power')
if not isinstance(power, (int, float)):
power = power_measure.get('current_socket_power')
if not isinstance(power, (int, float)):
return None
Comment thread
polarG marked this conversation as resolved.
Outdated
return int(power)
Comment thread
polarG marked this conversation as resolved.
Outdated
Comment thread
polarG marked this conversation as resolved.
Outdated
except Exception as err:
logger.warning('Get device power failed: {}'.format(str(err)))
return None
return int(power_measure['average_socket_power'])

def get_device_power_limit(self, idx):
"""Get the power management limit of device, unit: watt.
Expand All @@ -405,10 +412,16 @@ def get_device_power_limit(self, idx):
"""
try:
power_measure = rocml.amdsmi_get_power_info(self._device_handlers[idx])
power_limit = power_measure.get('power_limit')
if not isinstance(power_limit, (int, float)):
return None
# amdsmi returns power_limit in microwatts (e.g. 750000000 for 750W), convert to watts.
if power_limit > 100000:
power_limit = power_limit // 1000000
Comment thread
polarG marked this conversation as resolved.
Outdated
return int(power_limit)
Comment thread
polarG marked this conversation as resolved.
Outdated
except Exception as err:
logger.warning('Get device power limit failed: {}'.format(str(err)))
return None
return int(power_measure['power_limit'])

def get_device_memory(self, idx):
"""Get the memory information of device, unit: byte.
Expand Down
Loading