hami-mock-device-plugin server log
`kubectl logs -f hami-mock-device-plugin-daemonset-6w8jd -n kube-system
I0109 07:14:55.482871 1 config.go:116] Loading device configuration from file: /device-config.yaml
I0109 07:14:55.482943 1 config.go:59] Reading config file from path: /device-config.yaml
I0109 07:14:55.484277 1 config.go:69] Successfully read and parsed config file
I0109 07:14:55.484287 1 config.go:121] Loaded config: &{{{0xc000014e80 0xc000014e88 0xc000014e90 0xc00069c2c0} nvidia.com/gpu nvidia.com/gpumem nvidia.com/gpucores nvidia.com/gpumem-percentage nvidia.com/priority false 0 0 1 1 false [{[A30] [[{1g.6gb 6144 4}] [{2g.12gb 12288 2}] [{4g.24gb 24576 1}]]} {[A100-SXM4-40GB A100-40GB-PCIe A100-PCIE-40GB] [[{1g.5gb 5120 7}] [{1g.5gb 5120 1} {2g.10gb 10240 3}] [{3g.20gb 20480 2}] [{7g.40gb 40960 1}]]} {[A100-SXM4-80GB A100-80GB-PCIe A100-PCIE-80GB] [[{1g.10gb 10240 7}] [{1g.10gb 10240 1} {2g.20gb 20480 3}] [{3g.40gb 40960 2}] [{7g.79gb 80896 1}]]} {[H100-PCIE-80GB H100-SXM5-80GB] [[{1g.10gb 10240 7}] [{1g.10gb 10240 1} {2g.20gb 20480 3}] [{3g.40gb 40960 2}] [{7g.80gb 81920 1}]]} {[H100-PCIE-94GB H100-SXM5-94GB] [[{1g.12gb 12288 7}] [{1g.12gb 12288 1} {2g.24gb 24576 3}] [{3g.47gb 48128 2}] [{7g.94gb 96256 1}]]} {[H20 H100 on GH200] [[{1g.12gb 12288 7}] [{1g.12gb 12288 1} {2g.24gb 24576 3}] [{3g.48gb 49152 2}] [{7g.96gb 98304 1}]]} {[H200 NVL H200-SXM5] [[{1g.18gb 18432 7}] [{1g.18gb 18432 1} {2g.35gb 35840 3}] [{3g.71gb 72704 2}] [{7g.141gb 144384 1}]]} {[B200] [[{1g.23gb 23552 7}] [{1g.23gb 23552 1} {2g.45gb 46080 3}] [{3g.90gb 92160 2}] [{7g.180gb 184320 1}]]}] default } {metax-tech.com/gpu metax-tech.com/sgpu metax-tech.com/vmemory metax-tech.com/vcore false} {hygon.com/dcunum hygon.com/dcumem hygon.com/dcucores 1} {cambricon.com/vmlu cambricon.com/mlu.smlu.vmemory cambricon.com/mlu.smlu.vcore} {mthreads.com/vgpu mthreads.com/sgpu-memory mthreads.com/sgpu-core} [{MR-V100 MR-V100 iluvatar.ai/MR-V100-vgpu iluvatar.ai/MR-V100.vMem iluvatar.ai/MR-V100.vCore} {MR-V50 MR-V50 iluvatar.ai/MR-V50-vgpu iluvatar.ai/MR-V50.vMem iluvatar.ai/MR-V50.vCore} {BI-V150 BI-V150 iluvatar.ai/BI-V150-vgpu iluvatar.ai/BI-V150.vMem iluvatar.ai/BI-V150.vCore} {BI-V100 BI-V100 iluvatar.ai/BI-V100-vgpu iluvatar.ai/BI-V100.vMem iluvatar.ai/BI-V100.vCore}] {enflame.com/gcu enflame.com/vgcu enflame.com/vgcu-percentage} {kunlunxin.com/xpu kunlunxin.com/vxpu kunlunxin.com/vxpu-memory} {aws.amazon.com/neuron aws.amazon.com/neuroncore} {amd.com/gpu } [{Ascend910A 910A huawei.com/Ascend910A huawei.com/Ascend910A-memory 32768 32768 1 30 0 [{vir02 2184 2 0} {vir04 4369 4 0} {vir08 8738 8 0} {vir16 17476 16 0}]} {Ascend910B2 910B2 huawei.com/Ascend910B2 huawei.com/Ascend910B2-memory 65536 65536 1 24 6 [{vir03_1c_8g 8192 3 1} {vir06_1c_16g 16384 6 1} {vir12_3c_32g 32768 12 3}]} {Ascend910B3 910B3 huawei.com/Ascend910B3 huawei.com/Ascend910B3-memory 65536 65536 1 20 7 [{vir05_1c_16g 16384 5 1} {vir10_3c_32g 32768 10 3}]} {Ascend910B4-1 910B4-1 huawei.com/Ascend910B4-1 huawei.com/Ascend910B4-1-memory 65536 65536 1 20 7 [{vir05_1c_8g 16384 5 1} {vir10_3c_16g 32768 10 3}]} {Ascend910B4 910B4 huawei.com/Ascend910B4 huawei.com/Ascend910B4-memory 32768 32768 1 20 7 [{vir05_1c_8g 8192 5 1} {vir10_3c_16g 16384 10 3}]} {Ascend310P 310P3 huawei.com/Ascend310P huawei.com/Ascend310P-memory 21527 24576 1 8 7 [{vir01 3072 1 1} {vir02 6144 2 2} {vir04 12288 4 4}]}]}
I0109 07:14:55.484439 1 device.go:69] load ascend vnpu config Ascend910A: {Ascend910A 910A huawei.com/Ascend910A huawei.com/Ascend910A-memory 32768 32768 1 30 0 [{vir02 2184 2 0} {vir04 4369 4 0} {vir08 8738 8 0} {vir16 17476 16 0}]}
I0109 07:14:55.484454 1 device.go:69] load ascend vnpu config Ascend910B2: {Ascend910B2 910B2 huawei.com/Ascend910B2 huawei.com/Ascend910B2-memory 65536 65536 1 24 6 [{vir03_1c_8g 8192 3 1} {vir06_1c_16g 16384 6 1} {vir12_3c_32g 32768 12 3}]}
I0109 07:14:55.484465 1 device.go:69] load ascend vnpu config Ascend910B3: {Ascend910B3 910B3 huawei.com/Ascend910B3 huawei.com/Ascend910B3-memory 65536 65536 1 20 7 [{vir05_1c_16g 16384 5 1} {vir10_3c_32g 32768 10 3}]}
I0109 07:14:55.484474 1 device.go:69] load ascend vnpu config Ascend910B4-1: {Ascend910B4-1 910B4-1 huawei.com/Ascend910B4-1 huawei.com/Ascend910B4-1-memory 65536 65536 1 20 7 [{vir05_1c_8g 16384 5 1} {vir10_3c_16g 32768 10 3}]}
I0109 07:14:55.484483 1 device.go:69] load ascend vnpu config Ascend910B4: {Ascend910B4 910B4 huawei.com/Ascend910B4 huawei.com/Ascend910B4-memory 32768 32768 1 20 7 [{vir05_1c_8g 8192 5 1} {vir10_3c_16g 16384 10 3}]}
I0109 07:14:55.484495 1 device.go:69] load ascend vnpu config Ascend310P: {Ascend310P 310P3 huawei.com/Ascend310P huawei.com/Ascend310P-memory 21527 24576 1 8 7 [{vir01 3072 1 1} {vir02 6144 2 2} {vir04 12288 4 4}]}
I0109 07:14:55.484512 1 config.go:82] Ascend device Ascend910A initialized
I0109 07:14:55.484517 1 config.go:82] Ascend device Ascend910B2 initialized
I0109 07:14:55.484521 1 config.go:82] Ascend device Ascend910B3 initialized
I0109 07:14:55.484525 1 config.go:82] Ascend device Ascend910B4-1 initialized
I0109 07:14:55.484529 1 config.go:82] Ascend device Ascend910B4 initialized
I0109 07:14:55.484533 1 config.go:82] Ascend device Ascend310P initialized
I0109 07:14:55.484554 1 device.go:85] "initializing nvidia device" resourceName="nvidia.com/gpu" resourceMem="nvidia.com/gpumem" DefaultGPUNum=1
I0109 07:14:55.484567 1 device.go:105] Ascend910A run manager
I0109 07:14:55.484584 1 device.go:105] Ascend910B2 run manager
I0109 07:14:55.484590 1 device.go:105] Ascend910B3 run manager
I0109 07:14:55.484594 1 device.go:105] Ascend910B4-1 run manager
I0109 07:14:55.484599 1 device.go:105] Ascend910B4 run manager
I0109 07:14:55.484604 1 device.go:105] Ascend310P run manager
I0109 07:14:55.484609 1 device.go:105] DCU run manager
I0109 07:14:55.484614 1 device.go:105] NVIDIA run manager
I0109 07:14:55.484635 1 device.go:207] Running mocking dp: NVIDIA
I0109 07:14:55.484715 1 device.go:128] Running mocking dp: Ascend910B4-1
I0109 07:14:55.484720 1 device.go:128] Running mocking dp: Ascend910A
I0109 07:14:55.484797 1 device.go:128] Running mocking dp: Ascend310P
I0109 07:14:55.484814 1 device.go:128] Running mocking dp: Ascend910B4
I0109 07:14:55.484799 1 device.go:128] Running mocking dp: Ascend910B2
I0109 07:14:55.484818 1 client.go:55] BuildConfigFromFlags failed for file /root/.kube/config: stat /root/.kube/config: no such file or directory using inClusterConfig
I0109 07:14:55.484892 1 device.go:128] Running mocking dp: Ascend910B3
I0109 07:14:55.502109 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:14:55.502199 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:14:55.502425 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:14:55.502778 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:14:55.502862 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:14:55.502888 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:14:55.503377 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:14:55.503632 1 device.go:93] device DCU is unhealthy on this node
I0109 07:15:25.515515 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:15:25.515644 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:15:25.515738 1 device.go:93] device DCU is unhealthy on this node
I0109 07:15:25.515790 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:15:25.515834 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:15:25.515835 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:15:25.516312 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:15:25.517188 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:15:55.526871 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:15:55.526940 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:15:55.526991 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:15:55.527654 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:15:55.527902 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:15:55.528111 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:15:55.528163 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:15:55.528358 1 device.go:93] device DCU is unhealthy on this node
I0109 07:16:25.536174 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:16:25.536373 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:16:25.536512 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:16:25.537019 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:16:25.537205 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:16:25.537271 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:16:25.537289 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:16:25.537950 1 device.go:93] device DCU is unhealthy on this node
I0109 07:16:55.543261 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:16:55.544829 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:16:55.546500 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:16:55.546503 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:16:55.546653 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:16:55.546826 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:16:55.547706 1 device.go:93] device DCU is unhealthy on this node
I0109 07:16:55.548450 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:17:25.555684 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:17:25.555832 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:17:25.556076 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:17:25.556333 1 device.go:93] device DCU is unhealthy on this node
I0109 07:17:25.557787 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:17:25.559025 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:17:25.559142 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:17:25.559261 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:17:55.562788 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:17:55.563013 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:17:55.563426 1 device.go:93] device DCU is unhealthy on this node
I0109 07:17:55.563428 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:17:55.563567 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:17:55.564476 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:17:55.564668 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:17:55.565786 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:18:25.572506 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:18:25.573754 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:18:25.574380 1 device.go:93] device DCU is unhealthy on this node
I0109 07:18:25.574795 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:18:25.574803 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:18:25.574948 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:18:25.575353 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:18:25.575490 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:18:55.587586 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:18:55.587800 1 device.go:93] device DCU is unhealthy on this node
I0109 07:18:55.587858 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:18:55.588614 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:18:55.588820 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:18:55.588991 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:18:55.589191 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:18:55.590246 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:19:25.595062 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:19:25.605700 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:19:25.606521 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:19:25.606591 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:19:25.606702 1 device.go:93] device DCU is unhealthy on this node
I0109 07:19:25.606865 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:19:25.606951 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:19:25.609531 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:19:55.602596 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:19:55.609110 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:19:55.611217 1 device.go:93] device DCU is unhealthy on this node
I0109 07:19:55.611537 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:19:55.611814 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:19:55.611942 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:19:55.613685 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:19:55.614210 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:20:25.613749 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:20:25.614383 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:20:25.618242 1 device.go:93] device DCU is unhealthy on this node
I0109 07:20:25.619503 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:20:25.619858 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:20:25.620836 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:20:25.621443 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:20:25.621626 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:20:55.623957 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:20:55.623988 1 device.go:93] device DCU is unhealthy on this node
I0109 07:20:55.624088 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:20:55.625144 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:20:55.625559 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:20:55.625869 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:20:55.626185 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:20:55.629207 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:21:25.632190 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:21:25.632512 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:21:25.632957 1 device.go:93] device DCU is unhealthy on this node
I0109 07:21:25.633448 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:21:25.633514 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:21:25.634687 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:21:25.635878 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:21:25.636283 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:21:55.643320 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:21:55.643485 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:21:55.643752 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:21:55.643802 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:21:55.644103 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:21:55.644239 1 device.go:93] device DCU is unhealthy on this node
I0109 07:21:55.647287 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:21:55.647682 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:22:25.651526 1 device.go:93] device DCU is unhealthy on this node
I0109 07:22:25.652483 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:22:25.653150 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:22:25.653279 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:22:25.653405 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:22:25.653509 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:22:25.655178 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:22:25.655736 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:22:55.661896 1 device.go:93] device DCU is unhealthy on this node
I0109 07:22:55.662645 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:22:55.662773 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:22:55.663029 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:22:55.663043 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:22:55.663656 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:22:55.664464 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:22:55.665290 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:23:25.672365 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:23:25.672608 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:23:25.672622 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:23:25.672948 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:23:25.673539 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:23:25.673923 1 device.go:93] device DCU is unhealthy on this node
I0109 07:23:25.676239 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:23:25.676270 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:23:55.679537 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:23:55.679958 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:23:55.680094 1 device.go:93] device DCU is unhealthy on this node
I0109 07:23:55.680516 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:23:55.681414 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:23:55.681467 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:23:55.681902 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:23:55.682015 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:24:25.687090 1 device.go:93] device DCU is unhealthy on this node
I0109 07:24:25.687209 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:24:25.688181 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:24:25.688419 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:24:25.688479 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:24:25.688523 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:24:25.688697 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:24:25.689485 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:24:55.699557 1 device.go:93] device DCU is unhealthy on this node
I0109 07:24:55.699729 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:24:55.699796 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:24:55.699892 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:24:55.699898 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:24:55.700544 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:24:55.700545 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:24:55.701750 1 device.go:104] device Ascend910B3 is unhealthy on this node`
node status
kubectl describe nodes node1 | grep Capacity -A 20
Capacity:
cpu: 112
ephemeral-storage: 1919113944Ki
hugepages-1Gi: 0
hugepages-2Mi: 0
memory: 230712852Ki
pods: 500
Allocatable:
cpu: 111600m
ephemeral-storage: 1919113944Ki
hugepages-1Gi: 0
hugepages-2Mi: 0
memory: 223913174250
pods: 500
hami-mock-device-plugin server log
`kubectl logs -f hami-mock-device-plugin-daemonset-6w8jd -n kube-system
I0109 07:14:55.482871 1 config.go:116] Loading device configuration from file: /device-config.yaml
I0109 07:14:55.482943 1 config.go:59] Reading config file from path: /device-config.yaml
I0109 07:14:55.484277 1 config.go:69] Successfully read and parsed config file
I0109 07:14:55.484287 1 config.go:121] Loaded config: &{{{0xc000014e80 0xc000014e88 0xc000014e90 0xc00069c2c0} nvidia.com/gpu nvidia.com/gpumem nvidia.com/gpucores nvidia.com/gpumem-percentage nvidia.com/priority false 0 0 1 1 false [{[A30] [[{1g.6gb 6144 4}] [{2g.12gb 12288 2}] [{4g.24gb 24576 1}]]} {[A100-SXM4-40GB A100-40GB-PCIe A100-PCIE-40GB] [[{1g.5gb 5120 7}] [{1g.5gb 5120 1} {2g.10gb 10240 3}] [{3g.20gb 20480 2}] [{7g.40gb 40960 1}]]} {[A100-SXM4-80GB A100-80GB-PCIe A100-PCIE-80GB] [[{1g.10gb 10240 7}] [{1g.10gb 10240 1} {2g.20gb 20480 3}] [{3g.40gb 40960 2}] [{7g.79gb 80896 1}]]} {[H100-PCIE-80GB H100-SXM5-80GB] [[{1g.10gb 10240 7}] [{1g.10gb 10240 1} {2g.20gb 20480 3}] [{3g.40gb 40960 2}] [{7g.80gb 81920 1}]]} {[H100-PCIE-94GB H100-SXM5-94GB] [[{1g.12gb 12288 7}] [{1g.12gb 12288 1} {2g.24gb 24576 3}] [{3g.47gb 48128 2}] [{7g.94gb 96256 1}]]} {[H20 H100 on GH200] [[{1g.12gb 12288 7}] [{1g.12gb 12288 1} {2g.24gb 24576 3}] [{3g.48gb 49152 2}] [{7g.96gb 98304 1}]]} {[H200 NVL H200-SXM5] [[{1g.18gb 18432 7}] [{1g.18gb 18432 1} {2g.35gb 35840 3}] [{3g.71gb 72704 2}] [{7g.141gb 144384 1}]]} {[B200] [[{1g.23gb 23552 7}] [{1g.23gb 23552 1} {2g.45gb 46080 3}] [{3g.90gb 92160 2}] [{7g.180gb 184320 1}]]}] default } {metax-tech.com/gpu metax-tech.com/sgpu metax-tech.com/vmemory metax-tech.com/vcore false} {hygon.com/dcunum hygon.com/dcumem hygon.com/dcucores 1} {cambricon.com/vmlu cambricon.com/mlu.smlu.vmemory cambricon.com/mlu.smlu.vcore} {mthreads.com/vgpu mthreads.com/sgpu-memory mthreads.com/sgpu-core} [{MR-V100 MR-V100 iluvatar.ai/MR-V100-vgpu iluvatar.ai/MR-V100.vMem iluvatar.ai/MR-V100.vCore} {MR-V50 MR-V50 iluvatar.ai/MR-V50-vgpu iluvatar.ai/MR-V50.vMem iluvatar.ai/MR-V50.vCore} {BI-V150 BI-V150 iluvatar.ai/BI-V150-vgpu iluvatar.ai/BI-V150.vMem iluvatar.ai/BI-V150.vCore} {BI-V100 BI-V100 iluvatar.ai/BI-V100-vgpu iluvatar.ai/BI-V100.vMem iluvatar.ai/BI-V100.vCore}] {enflame.com/gcu enflame.com/vgcu enflame.com/vgcu-percentage} {kunlunxin.com/xpu kunlunxin.com/vxpu kunlunxin.com/vxpu-memory} {aws.amazon.com/neuron aws.amazon.com/neuroncore} {amd.com/gpu } [{Ascend910A 910A huawei.com/Ascend910A huawei.com/Ascend910A-memory 32768 32768 1 30 0 [{vir02 2184 2 0} {vir04 4369 4 0} {vir08 8738 8 0} {vir16 17476 16 0}]} {Ascend910B2 910B2 huawei.com/Ascend910B2 huawei.com/Ascend910B2-memory 65536 65536 1 24 6 [{vir03_1c_8g 8192 3 1} {vir06_1c_16g 16384 6 1} {vir12_3c_32g 32768 12 3}]} {Ascend910B3 910B3 huawei.com/Ascend910B3 huawei.com/Ascend910B3-memory 65536 65536 1 20 7 [{vir05_1c_16g 16384 5 1} {vir10_3c_32g 32768 10 3}]} {Ascend910B4-1 910B4-1 huawei.com/Ascend910B4-1 huawei.com/Ascend910B4-1-memory 65536 65536 1 20 7 [{vir05_1c_8g 16384 5 1} {vir10_3c_16g 32768 10 3}]} {Ascend910B4 910B4 huawei.com/Ascend910B4 huawei.com/Ascend910B4-memory 32768 32768 1 20 7 [{vir05_1c_8g 8192 5 1} {vir10_3c_16g 16384 10 3}]} {Ascend310P 310P3 huawei.com/Ascend310P huawei.com/Ascend310P-memory 21527 24576 1 8 7 [{vir01 3072 1 1} {vir02 6144 2 2} {vir04 12288 4 4}]}]}
I0109 07:14:55.484439 1 device.go:69] load ascend vnpu config Ascend910A: {Ascend910A 910A huawei.com/Ascend910A huawei.com/Ascend910A-memory 32768 32768 1 30 0 [{vir02 2184 2 0} {vir04 4369 4 0} {vir08 8738 8 0} {vir16 17476 16 0}]}
I0109 07:14:55.484454 1 device.go:69] load ascend vnpu config Ascend910B2: {Ascend910B2 910B2 huawei.com/Ascend910B2 huawei.com/Ascend910B2-memory 65536 65536 1 24 6 [{vir03_1c_8g 8192 3 1} {vir06_1c_16g 16384 6 1} {vir12_3c_32g 32768 12 3}]}
I0109 07:14:55.484465 1 device.go:69] load ascend vnpu config Ascend910B3: {Ascend910B3 910B3 huawei.com/Ascend910B3 huawei.com/Ascend910B3-memory 65536 65536 1 20 7 [{vir05_1c_16g 16384 5 1} {vir10_3c_32g 32768 10 3}]}
I0109 07:14:55.484474 1 device.go:69] load ascend vnpu config Ascend910B4-1: {Ascend910B4-1 910B4-1 huawei.com/Ascend910B4-1 huawei.com/Ascend910B4-1-memory 65536 65536 1 20 7 [{vir05_1c_8g 16384 5 1} {vir10_3c_16g 32768 10 3}]}
I0109 07:14:55.484483 1 device.go:69] load ascend vnpu config Ascend910B4: {Ascend910B4 910B4 huawei.com/Ascend910B4 huawei.com/Ascend910B4-memory 32768 32768 1 20 7 [{vir05_1c_8g 8192 5 1} {vir10_3c_16g 16384 10 3}]}
I0109 07:14:55.484495 1 device.go:69] load ascend vnpu config Ascend310P: {Ascend310P 310P3 huawei.com/Ascend310P huawei.com/Ascend310P-memory 21527 24576 1 8 7 [{vir01 3072 1 1} {vir02 6144 2 2} {vir04 12288 4 4}]}
I0109 07:14:55.484512 1 config.go:82] Ascend device Ascend910A initialized
I0109 07:14:55.484517 1 config.go:82] Ascend device Ascend910B2 initialized
I0109 07:14:55.484521 1 config.go:82] Ascend device Ascend910B3 initialized
I0109 07:14:55.484525 1 config.go:82] Ascend device Ascend910B4-1 initialized
I0109 07:14:55.484529 1 config.go:82] Ascend device Ascend910B4 initialized
I0109 07:14:55.484533 1 config.go:82] Ascend device Ascend310P initialized
I0109 07:14:55.484554 1 device.go:85] "initializing nvidia device" resourceName="nvidia.com/gpu" resourceMem="nvidia.com/gpumem" DefaultGPUNum=1
I0109 07:14:55.484567 1 device.go:105] Ascend910A run manager
I0109 07:14:55.484584 1 device.go:105] Ascend910B2 run manager
I0109 07:14:55.484590 1 device.go:105] Ascend910B3 run manager
I0109 07:14:55.484594 1 device.go:105] Ascend910B4-1 run manager
I0109 07:14:55.484599 1 device.go:105] Ascend910B4 run manager
I0109 07:14:55.484604 1 device.go:105] Ascend310P run manager
I0109 07:14:55.484609 1 device.go:105] DCU run manager
I0109 07:14:55.484614 1 device.go:105] NVIDIA run manager
I0109 07:14:55.484635 1 device.go:207] Running mocking dp: NVIDIA
I0109 07:14:55.484715 1 device.go:128] Running mocking dp: Ascend910B4-1
I0109 07:14:55.484720 1 device.go:128] Running mocking dp: Ascend910A
I0109 07:14:55.484797 1 device.go:128] Running mocking dp: Ascend310P
I0109 07:14:55.484814 1 device.go:128] Running mocking dp: Ascend910B4
I0109 07:14:55.484799 1 device.go:128] Running mocking dp: Ascend910B2
I0109 07:14:55.484818 1 client.go:55] BuildConfigFromFlags failed for file /root/.kube/config: stat /root/.kube/config: no such file or directory using inClusterConfig
I0109 07:14:55.484892 1 device.go:128] Running mocking dp: Ascend910B3
I0109 07:14:55.502109 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:14:55.502199 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:14:55.502425 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:14:55.502778 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:14:55.502862 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:14:55.502888 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:14:55.503377 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:14:55.503632 1 device.go:93] device DCU is unhealthy on this node
I0109 07:15:25.515515 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:15:25.515644 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:15:25.515738 1 device.go:93] device DCU is unhealthy on this node
I0109 07:15:25.515790 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:15:25.515834 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:15:25.515835 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:15:25.516312 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:15:25.517188 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:15:55.526871 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:15:55.526940 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:15:55.526991 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:15:55.527654 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:15:55.527902 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:15:55.528111 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:15:55.528163 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:15:55.528358 1 device.go:93] device DCU is unhealthy on this node
I0109 07:16:25.536174 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:16:25.536373 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:16:25.536512 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:16:25.537019 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:16:25.537205 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:16:25.537271 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:16:25.537289 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:16:25.537950 1 device.go:93] device DCU is unhealthy on this node
I0109 07:16:55.543261 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:16:55.544829 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:16:55.546500 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:16:55.546503 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:16:55.546653 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:16:55.546826 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:16:55.547706 1 device.go:93] device DCU is unhealthy on this node
I0109 07:16:55.548450 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:17:25.555684 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:17:25.555832 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:17:25.556076 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:17:25.556333 1 device.go:93] device DCU is unhealthy on this node
I0109 07:17:25.557787 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:17:25.559025 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:17:25.559142 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:17:25.559261 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:17:55.562788 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:17:55.563013 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:17:55.563426 1 device.go:93] device DCU is unhealthy on this node
I0109 07:17:55.563428 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:17:55.563567 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:17:55.564476 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:17:55.564668 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:17:55.565786 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:18:25.572506 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:18:25.573754 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:18:25.574380 1 device.go:93] device DCU is unhealthy on this node
I0109 07:18:25.574795 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:18:25.574803 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:18:25.574948 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:18:25.575353 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:18:25.575490 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:18:55.587586 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:18:55.587800 1 device.go:93] device DCU is unhealthy on this node
I0109 07:18:55.587858 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:18:55.588614 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:18:55.588820 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:18:55.588991 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:18:55.589191 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:18:55.590246 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:19:25.595062 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:19:25.605700 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:19:25.606521 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:19:25.606591 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:19:25.606702 1 device.go:93] device DCU is unhealthy on this node
I0109 07:19:25.606865 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:19:25.606951 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:19:25.609531 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:19:55.602596 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:19:55.609110 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:19:55.611217 1 device.go:93] device DCU is unhealthy on this node
I0109 07:19:55.611537 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:19:55.611814 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:19:55.611942 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:19:55.613685 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:19:55.614210 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:20:25.613749 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:20:25.614383 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:20:25.618242 1 device.go:93] device DCU is unhealthy on this node
I0109 07:20:25.619503 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:20:25.619858 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:20:25.620836 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:20:25.621443 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:20:25.621626 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:20:55.623957 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:20:55.623988 1 device.go:93] device DCU is unhealthy on this node
I0109 07:20:55.624088 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:20:55.625144 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:20:55.625559 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:20:55.625869 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:20:55.626185 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:20:55.629207 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:21:25.632190 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:21:25.632512 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:21:25.632957 1 device.go:93] device DCU is unhealthy on this node
I0109 07:21:25.633448 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:21:25.633514 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:21:25.634687 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:21:25.635878 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:21:25.636283 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:21:55.643320 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:21:55.643485 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:21:55.643752 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:21:55.643802 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:21:55.644103 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:21:55.644239 1 device.go:93] device DCU is unhealthy on this node
I0109 07:21:55.647287 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:21:55.647682 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:22:25.651526 1 device.go:93] device DCU is unhealthy on this node
I0109 07:22:25.652483 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:22:25.653150 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:22:25.653279 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:22:25.653405 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:22:25.653509 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:22:25.655178 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:22:25.655736 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:22:55.661896 1 device.go:93] device DCU is unhealthy on this node
I0109 07:22:55.662645 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:22:55.662773 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:22:55.663029 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:22:55.663043 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:22:55.663656 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:22:55.664464 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:22:55.665290 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:23:25.672365 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:23:25.672608 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:23:25.672622 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:23:25.672948 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:23:25.673539 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:23:25.673923 1 device.go:93] device DCU is unhealthy on this node
I0109 07:23:25.676239 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:23:25.676270 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:23:55.679537 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:23:55.679958 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:23:55.680094 1 device.go:93] device DCU is unhealthy on this node
I0109 07:23:55.680516 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:23:55.681414 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:23:55.681467 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:23:55.681902 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:23:55.682015 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:24:25.687090 1 device.go:93] device DCU is unhealthy on this node
I0109 07:24:25.687209 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:24:25.688181 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:24:25.688419 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:24:25.688479 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:24:25.688523 1 device.go:104] device Ascend910B3 is unhealthy on this node
I0109 07:24:25.688697 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:24:25.689485 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:24:55.699557 1 device.go:93] device DCU is unhealthy on this node
I0109 07:24:55.699729 1 device.go:104] device Ascend910B4-1 is unhealthy on this node
I0109 07:24:55.699796 1 device.go:104] device Ascend910B2 is unhealthy on this node
I0109 07:24:55.699892 1 device.go:174] device NVIDIA is unhealthy on this node
I0109 07:24:55.699898 1 device.go:104] device Ascend910A is unhealthy on this node
I0109 07:24:55.700544 1 device.go:104] device Ascend310P is unhealthy on this node
I0109 07:24:55.700545 1 device.go:104] device Ascend910B4 is unhealthy on this node
I0109 07:24:55.701750 1 device.go:104] device Ascend910B3 is unhealthy on this node`
node status