Skip to content

Commit 09ab29d

Browse files
committed
refactor: translate Chinese into English,and adjust the clean idle vnpus code
Signed-off-by: libin18 <libin18@kingsoft.com>
1 parent 243af02 commit 09ab29d

3 files changed

Lines changed: 44 additions & 42 deletions

File tree

cmd/main.go

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ import (
2121
"fmt"
2222
"os"
2323
"syscall"
24-
"time"
2524

2625
"github.com/Project-HAMi/HAMi/pkg/util/client"
2726
"github.com/Project-HAMi/ascend-device-plugin/internal"
@@ -74,33 +73,16 @@ restart:
7473
}
7574
}
7675
restarting = true
76+
if err := ps.CleanupIdleVNPUs(); err != nil {
77+
klog.Errorf("Failed to cleanup idle vNPUs: %v", err)
78+
}
7779
klog.Info("Starting Plugins.")
7880
err = ps.Start()
7981
if err != nil {
8082
klog.Errorf("Failed to start plugin server: %v", err)
8183
return err
8284
}
8385

84-
if err := ps.CleanupIdleVNPUs(); err != nil {
85-
klog.Errorf("Failed to cleanup idle vNPUs: %v", err)
86-
}
87-
cleanupTicker := time.NewTicker(time.Duration(*checkIdleVNPUInterval) * time.Second)
88-
defer cleanupTicker.Stop()
89-
go func() {
90-
for {
91-
select {
92-
case <-cleanupTicker.C:
93-
klog.Info("Running scheduled idle vNPU cleanup")
94-
if err := ps.CleanupIdleVNPUs(); err != nil {
95-
klog.Errorf("Failed to cleanup idle vNPUs: %v", err)
96-
}
97-
case <-ps.StopCh():
98-
klog.Info("Stopping cleanup goroutine")
99-
return
100-
}
101-
}
102-
}()
103-
10486
for {
10587
select {
10688
//case <-restartTimeout:
@@ -154,7 +136,7 @@ func main() {
154136
if err != nil {
155137
klog.Fatalf("load config failed, error is %v", err)
156138
}
157-
server, err := server.NewPluginServer(mgr, *nodeName)
139+
server, err := server.NewPluginServer(mgr, *nodeName, *checkIdleVNPUInterval)
158140
if err != nil {
159141
klog.Fatalf("init PluginServer failed, error is %v", err)
160142
}

internal/manager/manager.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ func (am *AscendManager) CleanupIdleVNPUs() error {
199199
klog.Warningf("failed to get card/device ID for logic ID %d: %v", logicID, err)
200200
continue
201201
}
202-
// 获取该设备上的所有 vNPU 信息
202+
// Obtain all vNPU information on this device
203203
vDevInfos, err := am.mgr.GetVirtualDeviceInfo(logicID)
204204
if err != nil {
205205
klog.Infof("no vNPU found on device %d or query failed: %v", logicID, err)

internal/server/server.go

Lines changed: 39 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -52,28 +52,30 @@ var (
5252
)
5353

5454
type PluginServer struct {
55-
nodeName string
56-
registerAnno string
57-
handshakeAnno string
58-
allocAnno string
59-
grpcServer *grpc.Server
60-
mgr *manager.AscendManager
61-
socket string
62-
stopCh chan interface{}
63-
healthCh chan int32
55+
nodeName string
56+
registerAnno string
57+
handshakeAnno string
58+
allocAnno string
59+
grpcServer *grpc.Server
60+
mgr *manager.AscendManager
61+
socket string
62+
stopCh chan interface{}
63+
healthCh chan int32
64+
checkIdleVNPUInterval int
6465
}
6566

66-
func NewPluginServer(mgr *manager.AscendManager, nodeName string) (*PluginServer, error) {
67+
func NewPluginServer(mgr *manager.AscendManager, nodeName string, checkIdleVNPUInterval int) (*PluginServer, error) {
6768
return &PluginServer{
68-
nodeName: nodeName,
69-
registerAnno: fmt.Sprintf("hami.io/node-register-%s", mgr.CommonWord()),
70-
handshakeAnno: fmt.Sprintf("hami.io/node-handshake-%s", mgr.CommonWord()),
71-
allocAnno: fmt.Sprintf("huawei.com/%s", mgr.CommonWord()),
72-
grpcServer: grpc.NewServer(),
73-
mgr: mgr,
74-
socket: path.Join(v1beta1.DevicePluginPath, fmt.Sprintf("%s.sock", mgr.CommonWord())),
75-
stopCh: make(chan interface{}),
76-
healthCh: make(chan int32),
69+
nodeName: nodeName,
70+
registerAnno: fmt.Sprintf("hami.io/node-register-%s", mgr.CommonWord()),
71+
handshakeAnno: fmt.Sprintf("hami.io/node-handshake-%s", mgr.CommonWord()),
72+
allocAnno: fmt.Sprintf("huawei.com/%s", mgr.CommonWord()),
73+
grpcServer: grpc.NewServer(),
74+
mgr: mgr,
75+
socket: path.Join(v1beta1.DevicePluginPath, fmt.Sprintf("%s.sock", mgr.CommonWord())),
76+
stopCh: make(chan interface{}),
77+
healthCh: make(chan int32),
78+
checkIdleVNPUInterval: checkIdleVNPUInterval,
7779
}, nil
7880
}
7981

@@ -91,10 +93,28 @@ func (ps *PluginServer) Start() error {
9193
if err != nil {
9294
return err
9395
}
96+
go ps.startPeriodicCheckIdleVNPUs()
9497
go ps.watchAndRegister()
9598
return nil
9699
}
97100

101+
func (ps *PluginServer) startPeriodicCheckIdleVNPUs() {
102+
ticker := time.NewTicker(time.Duration(ps.checkIdleVNPUInterval) * time.Second)
103+
defer ticker.Stop()
104+
for {
105+
select {
106+
case <-ticker.C:
107+
klog.Info("Running scheduled idle vNPU cleanup")
108+
if err := ps.CleanupIdleVNPUs(); err != nil {
109+
klog.Errorf("Failed to cleanup idle vNPUs: %v", err)
110+
}
111+
case <-ps.stopCh:
112+
klog.Info("Stopping cleanup goroutine")
113+
return
114+
}
115+
}
116+
}
117+
98118
func (ps *PluginServer) Stop() error {
99119
close(ps.stopCh)
100120
ps.grpcServer.Stop()

0 commit comments

Comments
 (0)