Skip to content

Commit 8c846bb

Browse files
committed
wip
Signed-off-by: Harsh Rawat <harshrawat@microsoft.com>
1 parent a738b0c commit 8c846bb

37 files changed

Lines changed: 2555 additions & 86 deletions

cmd/containerd-shim-lcow-v2/main.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"github.com/Microsoft/hcsshim/internal/log"
1717
"github.com/Microsoft/hcsshim/internal/oc"
1818
"github.com/Microsoft/hcsshim/internal/shim"
19+
"github.com/Microsoft/hcsshim/osversion"
1920

2021
"github.com/containerd/errdefs"
2122
"github.com/sirupsen/logrus"
@@ -42,6 +43,12 @@ func main() {
4243
os.Exit(1)
4344
}
4445

46+
// This shim is supported on Windows Build 26100 and later.
47+
if osversion.Build() < osversion.V25H1Server {
48+
_, _ = fmt.Fprintf(os.Stderr,
49+
"%s: Windows version [%v] is not supported", service.ShimName, osversion.Build())
50+
}
51+
4552
// Start the shim manager event loop. The manager is responsible for
4653
// handling containerd start/stop lifecycle calls for the shim process.
4754
shim.Run(context.Background(), newShimManager(service.ShimName), func(c *shim.Config) {

cmd/containerd-shim-lcow-v2/service/service.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"sync"
88

99
"github.com/Microsoft/hcsshim/internal/builder/vm/lcow"
10+
"github.com/Microsoft/hcsshim/internal/controller/pod"
1011
"github.com/Microsoft/hcsshim/internal/controller/vm"
1112
"github.com/Microsoft/hcsshim/internal/log"
1213
"github.com/Microsoft/hcsshim/internal/shim"
@@ -47,6 +48,14 @@ type Service struct {
4748
// vmController is responsible for managing the lifecycle of the underlying utility VM and its associated resources.
4849
vmController vm.Controller
4950

51+
// podControllers maps podID -> PodController for each active pod.
52+
podControllers map[string]pod.Controller
53+
54+
// containerPodMapping maps containerID -> podID, allowing callers to look up
55+
// which pod a container belongs to and then retrieve the corresponding controller
56+
// from podControllers.
57+
containerPodMapping map[string]string
58+
5059
// shutdown manages graceful shutdown operations and allows registration of cleanup callbacks.
5160
shutdown shutdown.Service
5261
}
@@ -64,6 +73,8 @@ func NewService(ctx context.Context, eventsPublisher shim.Publisher, sd shutdown
6473

6574
go svc.forward(ctx, eventsPublisher)
6675

76+
// todo: kill and delete all running pods.
77+
6778
// Register a shutdown callback to close the events channel,
6879
// which signals the forward goroutine to exit.
6980
sd.RegisterCallback(func(context.Context) error {

cmd/containerd-shim-lcow-v2/service/service_shimdiag.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,14 +72,14 @@ func (s *Service) DiagShare(ctx context.Context, request *shimdiag.ShareRequest)
7272

7373
// DiagStacks returns the stack traces of all goroutines in the shim.
7474
// This method is part of the instrumentation layer and business logic is included in diagStacksInternal.
75-
func (s *Service) DiagStacks(ctx context.Context, request *shimdiag.StacksRequest) (resp *shimdiag.StacksResponse, err error) {
75+
func (s *Service) DiagStacks(ctx context.Context, _ *shimdiag.StacksRequest) (resp *shimdiag.StacksResponse, err error) {
7676
ctx, span := oc.StartSpan(ctx, "DiagStacks")
7777
defer span.End()
7878
defer func() { oc.SetSpanStatus(span, err) }()
7979

8080
span.AddAttributes(trace.StringAttribute(logfields.SandboxID, s.sandboxID))
8181

82-
r, e := s.diagStacksInternal(ctx, request)
82+
r, e := s.diagStacksInternal(ctx)
8383
return r, errgrpc.ToGRPC(e)
8484
}
8585

cmd/containerd-shim-lcow-v2/service/service_shimdiag_internal.go

Lines changed: 77 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,12 @@ package service
55
import (
66
"context"
77
"fmt"
8+
"runtime"
9+
"time"
810

11+
"github.com/Microsoft/hcsshim/internal/controller/vm"
912
"github.com/Microsoft/hcsshim/internal/shimdiag"
13+
1014
"github.com/containerd/errdefs"
1115
)
1216

@@ -22,14 +26,83 @@ func (s *Service) diagExecInHostInternal(ctx context.Context, request *shimdiag.
2226
return &shimdiag.ExecProcessResponse{ExitCode: int32(ec)}, nil
2327
}
2428

25-
func (s *Service) diagTasksInternal(_ context.Context, _ *shimdiag.TasksRequest) (*shimdiag.TasksResponse, error) {
26-
return nil, errdefs.ErrNotImplemented
29+
func (s *Service) diagTasksInternal(ctx context.Context, request *shimdiag.TasksRequest) (*shimdiag.TasksResponse, error) {
30+
if s.vmController.State() != vm.StateRunning {
31+
return nil, fmt.Errorf("cannot list tasks when vm is not running: %w", errdefs.ErrFailedPrecondition)
32+
}
33+
34+
// Originally this method was intended to be used in a single pod setup and therefore,
35+
// we do not specify a TaskID in the request. Since our shim can support multiple pods,
36+
// we will modify this functionality so that we will return all tasks running in the UVM,
37+
// regardless of which pod they belong to.
38+
39+
resp := &shimdiag.TasksResponse{}
40+
41+
// todo: think about concurrency handling here.
42+
// Do we want to lock for entire duration or not.
43+
// This is a diagnostic method and therefore, should not have
44+
// performance implications in prod.
45+
s.mu.Lock()
46+
defer s.mu.Unlock()
47+
48+
for _, podCtrl := range s.podControllers {
49+
containers, err := podCtrl.ListContainers()
50+
if err != nil {
51+
return nil, fmt.Errorf("failed to list containers: %w", err)
52+
}
53+
54+
for containerID, ctrCtrl := range containers {
55+
t := &shimdiag.Task{ID: containerID}
56+
57+
if request.Execs {
58+
processes, err := ctrCtrl.ListProcesses()
59+
if err != nil {
60+
return nil, fmt.Errorf("failed to list processes for container %s: %w", containerID, err)
61+
}
62+
63+
for _, proc := range processes {
64+
status := proc.Status(false)
65+
t.Execs = append(t.Execs, &shimdiag.Exec{
66+
ID: status.ExecID,
67+
State: status.Status.String(),
68+
})
69+
}
70+
}
71+
72+
resp.Tasks = append(resp.Tasks, t)
73+
}
74+
}
75+
76+
return resp, nil
2777
}
2878

2979
func (s *Service) diagShareInternal(_ context.Context, _ *shimdiag.ShareRequest) (*shimdiag.ShareResponse, error) {
3080
return nil, errdefs.ErrNotImplemented
3181
}
3282

33-
func (s *Service) diagStacksInternal(_ context.Context, _ *shimdiag.StacksRequest) (*shimdiag.StacksResponse, error) {
34-
return nil, errdefs.ErrNotImplemented
83+
func (s *Service) diagStacksInternal(ctx context.Context) (*shimdiag.StacksResponse, error) {
84+
if s.vmController.State() != vm.StateRunning {
85+
return nil, fmt.Errorf("cannot dump stacks when vm is not running: %w", errdefs.ErrFailedPrecondition)
86+
}
87+
88+
buf := make([]byte, 4096)
89+
for {
90+
buf = buf[:runtime.Stack(buf, true)]
91+
if len(buf) < cap(buf) {
92+
break
93+
}
94+
buf = make([]byte, 2*len(buf))
95+
}
96+
97+
timedCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
98+
defer cancel()
99+
100+
resp := &shimdiag.StacksResponse{Stacks: string(buf)}
101+
stacks, err := s.vmController.DumpStacks(timedCtx)
102+
if err != nil {
103+
return nil, fmt.Errorf("failed to dump stacks: %w", err)
104+
}
105+
106+
resp.GuestStacks = stacks
107+
return resp, nil
35108
}

cmd/containerd-shim-lcow-v2/service/service_task.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ package service
44

55
import (
66
"context"
7+
"os"
78

89
"github.com/Microsoft/hcsshim/internal/logfields"
910
"github.com/Microsoft/hcsshim/internal/oc"
@@ -319,8 +320,13 @@ func (s *Service) Connect(ctx context.Context, request *task.ConnectRequest) (re
319320
trace.StringAttribute(logfields.SandboxID, s.sandboxID),
320321
trace.StringAttribute(logfields.ID, request.ID))
321322

322-
r, e := s.connectInternal(ctx, request)
323-
return r, errgrpc.ToGRPC(e)
323+
// We treat the shim/task as the same pid on the Windows host.
324+
pid := uint32(os.Getpid())
325+
326+
return &task.ConnectResponse{
327+
ShimPid: pid,
328+
TaskPid: pid,
329+
}, nil
324330
}
325331

326332
// Shutdown gracefully shuts down the Service.

0 commit comments

Comments
 (0)