forked from OpenRLHF/OpenRLHF
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
48 lines (38 loc) · 2.53 KB
/
utils.py
File metadata and controls
48 lines (38 loc) · 2.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import os
# Address https://github.com/ray-project/ray/issues/51117
# This function is used to get the bundle indices of a placement group
# and ensure that the bundles placed on the same node are grouped together.
def get_bundle_indices(placement_group, index, length):
import ray
pg_infos = ray.util.placement_group_table(placement_group)
node_id_to_bundles = {}
for bundle, node_id in pg_infos["bundles_to_node_id"].items():
node_id_to_bundles.setdefault(node_id, []).append(bundle)
sorted_bundle_indices = sum(node_id_to_bundles.values(), [])
return sorted_bundle_indices[index * length : (index + 1) * length]
def ray_noset_visible_devices(env_vars=os.environ):
# Refer to
# https://github.com/ray-project/ray/blob/161849364a784442cc659fb9780f1a6adee85fce/python/ray/_private/accelerators/nvidia_gpu.py#L95-L96
# https://github.com/ray-project/ray/blob/161849364a784442cc659fb9780f1a6adee85fce/python/ray/_private/accelerators/amd_gpu.py#L102-L103
# https://github.com/ray-project/ray/blob/3b9e729f6a669ffd85190f901f5e262af79771b0/python/ray/_private/accelerators/amd_gpu.py#L114-L115
# https://github.com/ray-project/ray/blob/161849364a784442cc659fb9780f1a6adee85fce/python/ray/_private/accelerators/npu.py#L94-L95
# https://github.com/ray-project/ray/blob/161849364a784442cc659fb9780f1a6adee85fce/python/ray/_private/accelerators/hpu.py#L116-L117
# https://github.com/ray-project/ray/blob/161849364a784442cc659fb9780f1a6adee85fce/python/ray/_private/accelerators/neuron.py#L108-L109
# https://github.com/ray-project/ray/blob/161849364a784442cc659fb9780f1a6adee85fce/python/ray/_private/accelerators/tpu.py#L171-L172
# https://github.com/ray-project/ray/blob/161849364a784442cc659fb9780f1a6adee85fce/python/ray/_private/accelerators/intel_gpu.py#L97-L98
NOSET_VISIBLE_DEVICES_ENV_VARS_LIST = [
"RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES",
"RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES",
"RAY_EXPERIMENTAL_NOSET_HIP_VISIBLE_DEVICES",
"RAY_EXPERIMENTAL_NOSET_ASCEND_RT_VISIBLE_DEVICES",
"RAY_EXPERIMENTAL_NOSET_HABANA_VISIBLE_MODULES",
"RAY_EXPERIMENTAL_NOSET_NEURON_RT_VISIBLE_CORES",
"RAY_EXPERIMENTAL_NOSET_TPU_VISIBLE_CHIPS",
"RAY_EXPERIMENTAL_NOSET_ONEAPI_DEVICE_SELECTOR",
]
return any(env_vars.get(env_var) for env_var in NOSET_VISIBLE_DEVICES_ENV_VARS_LIST)
def get_physical_gpu_id():
import torch
device = torch.cuda.current_device()
props = torch.cuda.get_device_properties(device)
return str(props.uuid)