Skip to content

Commit a55f001

Browse files
danielpfeifer02gvisor-bot
authored andcommitted
Add runsc kill --pgid
Follow up PR of #12739 for adding `runsc kill --pgid xxx` support. ## Changes ## Adds a `--pgid` flag to `runsc kill` that sends a signal to all processes in a given process group (identified by PGID in the root PID namespace). This complements the existing `--pid` and `--all` flags. The signal is delivered through the existing `ContMgrSignal` `RPC` via a new `DeliverToProcessGroup` delivery mode, following the same `container -> sandbox -> loader` chain as the other signal modes. ## Testing ## `TestSignalProcessGroup` creates a 3-process container (`init -> child -> grandchild`) where the child calls `setpgid` to form a new process group shared with the grandchild. The test sends `SIGKILL` to that PGID and verifies: - Both child and grandchild are killed - Init (PGID 1) survives A new `task-tree-pgid` test app subcommand supports this by spawning a deterministic process tree with a distinct process group. FUTURE_COPYBARA_INTEGRATE_REVIEW=#12905 from danielpfeifer02:dpfeifer/runsc-kill-pgid 7afe641 PiperOrigin-RevId: 897368159
1 parent 7a8f93e commit a55f001

7 files changed

Lines changed: 234 additions & 4 deletions

File tree

runsc/boot/controller.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -918,6 +918,10 @@ const (
918918
// process. If PID is 0, then the signal is delivered to the foreground
919919
// process group for the TTY for the init process.
920920
DeliverToForegroundProcessGroup
921+
922+
// DeliverToProcessGroup delivers the signal to all processes in the
923+
// process group identified by a PGID.
924+
DeliverToProcessGroup
921925
)
922926

923927
func (s SignalDeliveryMode) String() string {
@@ -928,6 +932,8 @@ func (s SignalDeliveryMode) String() string {
928932
return "All"
929933
case DeliverToForegroundProcessGroup:
930934
return "Foreground Process Group"
935+
case DeliverToProcessGroup:
936+
return "Process Group"
931937
}
932938
return fmt.Sprintf("unknown signal delivery mode: %d", s)
933939
}

runsc/boot/loader.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1826,6 +1826,15 @@ func (l *Loader) signal(cid string, pid, signo int32, mode SignalDeliveryMode) e
18261826
}
18271827
return nil
18281828

1829+
case DeliverToProcessGroup:
1830+
if pid == 0 {
1831+
return fmt.Errorf("PGID must be set when signaling a process group")
1832+
}
1833+
if err := l.signalProcessGroup(cid, kernel.ProcessGroupID(pid), signo); err != nil {
1834+
return fmt.Errorf("signaling process group %d: %w", pid, err)
1835+
}
1836+
return nil
1837+
18291838
default:
18301839
panic(fmt.Sprintf("unknown signal delivery mode %v", mode))
18311840
}
@@ -1905,6 +1914,28 @@ func (l *Loader) signalAllProcesses(cid string, signo int32) error {
19051914
return l.k.SendContainerSignal(cid, &linux.SignalInfo{Signo: signo})
19061915
}
19071916

1917+
// signalProcessGroup sends the signal to all processes in the process group
1918+
// identified by pgid. pgid is relative to the root PID namespace. It verifies
1919+
// that the process group exists in the container with the given ID.
1920+
func (l *Loader) signalProcessGroup(cid string, pgid kernel.ProcessGroupID, signo int32) error {
1921+
pg := l.k.RootPIDNamespace().ProcessGroupWithID(pgid)
1922+
if pg == nil {
1923+
return fmt.Errorf("no such process group with PGID %d", pgid)
1924+
}
1925+
// Verify that the process group exists in correct container.
1926+
found := false
1927+
for _, tg := range l.k.TaskSet().Root.ThreadGroups() {
1928+
if tg.ProcessGroup() == pg && tg.Leader().ContainerID() == cid {
1929+
found = true
1930+
break
1931+
}
1932+
}
1933+
if !found {
1934+
return fmt.Errorf("process group %d does not belong to container %q", pgid, cid)
1935+
}
1936+
return l.k.SendExternalSignalProcessGroup(pg, &linux.SignalInfo{Signo: signo})
1937+
}
1938+
19081939
// threadGroupFromID is similar to tryThreadGroupFromIDLocked except that it
19091940
// acquires mutex before calling it and fails in case container hasn't started
19101941
// yet.

runsc/cmd/kill.go

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,9 @@ import (
3232
// Kill implements subcommands.Command for the "kill" command.
3333
type Kill struct {
3434
containerLoader
35-
all bool
36-
pid int
35+
all bool
36+
pid int
37+
pgid int
3738
}
3839

3940
// Name implements subcommands.Command.Name.
@@ -55,6 +56,7 @@ func (*Kill) Usage() string {
5556
func (k *Kill) SetFlags(f *flag.FlagSet) {
5657
f.BoolVar(&k.all, "all", false, "send the specified signal to all processes inside the container")
5758
f.IntVar(&k.pid, "pid", 0, "send the specified signal to a specific process. pid is relative to the root PID namespace")
59+
f.IntVar(&k.pgid, "pgid", 0, "send the specified signal to all processes in the given process group. pgid is relative to the root PID namespace")
5860
}
5961

6062
// FetchSpec implements util.SubCommand.FetchSpec.
@@ -75,8 +77,19 @@ func (k *Kill) Execute(_ context.Context, f *flag.FlagSet, args ...any) subcomma
7577

7678
conf := args[0].(*config.Config)
7779

78-
if k.pid != 0 && k.all {
79-
util.Fatalf("it is invalid to specify both --all and --pid")
80+
// Validate that at most one targeting mode is used.
81+
modes := 0
82+
if k.all {
83+
modes++
84+
}
85+
if k.pid != 0 {
86+
modes++
87+
}
88+
if k.pgid != 0 {
89+
modes++
90+
}
91+
if modes > 1 {
92+
util.Fatalf("it is invalid to specify more than one of --all, --pid, and --pgid")
8093
}
8194

8295
c, err := k.loadContainer(conf, f, container.LoadOpts{})
@@ -101,6 +114,10 @@ func (k *Kill) Execute(_ context.Context, f *flag.FlagSet, args ...any) subcomma
101114
if err := c.SignalProcess(sig, int32(k.pid)); err != nil {
102115
util.Fatalf("failed to signal pid %d: %v", k.pid, err)
103116
}
117+
} else if k.pgid != 0 {
118+
if err := c.SignalProcessGroup(sig, int32(k.pgid)); err != nil {
119+
util.Fatalf("failed to signal process group %d: %v", k.pgid, err)
120+
}
104121
} else {
105122
if err := c.SignalContainer(sig, k.all); err != nil {
106123
util.Fatalf("%v", err)

runsc/container/container.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -780,6 +780,19 @@ func (c *Container) SignalProcess(sig unix.Signal, pid int32) error {
780780
return c.Sandbox.SignalProcess(c.ID, int32(pid), sig, false)
781781
}
782782

783+
// SignalProcessGroup sends sig to all processes in the given process group
784+
// inside the container.
785+
func (c *Container) SignalProcessGroup(sig unix.Signal, pgid int32) error {
786+
log.Debugf("Signal process group %d in container, cid: %s, signal: %v (%d)", pgid, c.ID, sig, sig)
787+
if err := c.requireStatus("signal a process group inside", Running); err != nil {
788+
return err
789+
}
790+
if !c.IsSandboxRunning() {
791+
return fmt.Errorf("sandbox is not running")
792+
}
793+
return c.Sandbox.SignalProcessGroup(c.ID, pgid, sig)
794+
}
795+
783796
// ForwardSignals forwards all signals received by the current process to the
784797
// container process inside the sandbox. It returns a function that will stop
785798
// forwarding signals.

runsc/container/container_test.go

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1023,6 +1023,93 @@ func TestKillPid(t *testing.T) {
10231023
}
10241024
}
10251025

1026+
// TestSignalProcessGroup verifies that SignalProcessGroup kills all
1027+
// processes in the targeted process group while leaving other groups
1028+
// running.
1029+
func TestSignalProcessGroup(t *testing.T) {
1030+
for name, conf := range configs(t, false /* noOverlay */) {
1031+
t.Run(name, func(t *testing.T) {
1032+
app, err := testutil.FindFile("test/cmd/test_app/test_app")
1033+
if err != nil {
1034+
t.Fatal("error finding test_app:", err)
1035+
}
1036+
1037+
spec := testutil.NewSpecWithArgs(app, "task-tree-pgid")
1038+
_, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf)
1039+
if err != nil {
1040+
t.Fatalf("error setting up container: %v", err)
1041+
}
1042+
defer cleanup()
1043+
1044+
args := Args{
1045+
ID: testutil.RandomContainerID(),
1046+
Spec: spec,
1047+
BundleDir: bundleDir,
1048+
}
1049+
cont, err := New(conf, args)
1050+
if err != nil {
1051+
t.Fatalf("error creating container: %v", err)
1052+
}
1053+
defer cont.Destroy()
1054+
if err := cont.Start(conf); err != nil {
1055+
t.Fatalf("error starting container: %v", err)
1056+
}
1057+
1058+
// Wait for all 3 processes: init, child, grandchild.
1059+
if err := waitForProcessCount(cont, 3); err != nil {
1060+
t.Fatalf("timed out waiting for processes: %v", err)
1061+
}
1062+
1063+
// Collect PGIDs.
1064+
procs, err := cont.Processes()
1065+
if err != nil {
1066+
t.Fatalf("failed to get process list: %v", err)
1067+
}
1068+
t.Logf("before signal: %s", procListToString(procs))
1069+
1070+
// Init (PID 1) is in PGID 1.
1071+
// Child + grandchild should share a PGID != 1.
1072+
pgidA := int32(1)
1073+
pgidBCount := make(map[int32]int)
1074+
for _, p := range procs {
1075+
if int32(p.PGID) != pgidA {
1076+
pgidBCount[int32(p.PGID)]++
1077+
}
1078+
}
1079+
1080+
// Find the PGID shared by child+grandchild.
1081+
var pgidB int32
1082+
for pgid, n := range pgidBCount {
1083+
if n == 2 {
1084+
pgidB = pgid
1085+
}
1086+
}
1087+
if pgidB == 0 {
1088+
t.Fatalf("expected child and grandchild to share a PGID distinct from init (%d); got: %v", pgidA, pgidBCount)
1089+
}
1090+
t.Logf("PGID_init=%d, PGID_target=%d (%d processes)", pgidA, pgidB, pgidBCount[pgidB])
1091+
1092+
// Signal the target PGID (both child and grandchild should die, init survives).
1093+
if err := cont.SignalProcessGroup(unix.SIGKILL, pgidB); err != nil {
1094+
t.Fatalf("SignalProcessGroup(%d): %v", pgidB, err)
1095+
}
1096+
1097+
if err := waitForProcessCount(cont, 1); err != nil {
1098+
procs, procsErr := cont.Processes()
1099+
t.Fatalf("expected only init to survive: %v; processes: %s / %v", err, procListToString(procs), procsErr)
1100+
}
1101+
1102+
procs, err = cont.Processes()
1103+
if err != nil {
1104+
t.Fatalf("failed to get process list: %v", err)
1105+
}
1106+
if len(procs) != 1 || procs[0].PID != 1 {
1107+
t.Fatalf("expected only PID 1 to survive, got: %s", procListToString(procs))
1108+
}
1109+
})
1110+
}
1111+
}
1112+
10261113
// testCheckpointRestore creates a container that continuously writes successive
10271114
// integers to a file. To test checkpoint and restore functionality, the
10281115
// container is checkpointed and the last number printed to the file is

runsc/sandbox/sandbox.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1539,6 +1539,23 @@ func (s *Sandbox) SignalProcess(cid string, pid int32, sig unix.Signal, fgProces
15391539
return nil
15401540
}
15411541

1542+
// SignalProcessGroup sends the signal to all processes in the process group
1543+
// identified by pgid. pgid is relative to the root PID namespace.
1544+
func (s *Sandbox) SignalProcessGroup(cid string, pgid int32, sig unix.Signal) error {
1545+
log.Debugf("Signal sandbox %q process group %d", s.ID, pgid)
1546+
1547+
args := boot.SignalArgs{
1548+
CID: cid,
1549+
Signo: int32(sig),
1550+
PID: pgid,
1551+
Mode: boot.DeliverToProcessGroup,
1552+
}
1553+
if err := s.call(boot.ContMgrSignal, &args, nil); err != nil {
1554+
return fmt.Errorf("signaling container %q PGID %d: %v", cid, pgid, err)
1555+
}
1556+
return nil
1557+
}
1558+
15421559
// CheckpointOpts contains the options for checkpointing a sandbox.
15431560
type CheckpointOpts struct {
15441561
Compression statefile.CompressionLevel

test/cmd/test_app/main.go

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ func main() {
5555
subcommands.Register(new(reaper), "")
5656
subcommands.Register(new(syscall), "")
5757
subcommands.Register(new(taskTree), "")
58+
subcommands.Register(new(taskTreePGID), "")
5859
subcommands.Register(new(uds), "")
5960
subcommands.Register(new(zombieTest), "")
6061
registerSubcommandsExtra()
@@ -372,6 +373,64 @@ func (c *taskTree) Execute(ctx context.Context, f *flag.FlagSet, args ...any) su
372373
return subcommands.ExitSuccess
373374
}
374375

376+
type taskTreePGID struct {
377+
level int
378+
}
379+
380+
// Name implements subcommands.Command.Name.
381+
func (*taskTreePGID) Name() string {
382+
return "task-tree-pgid"
383+
}
384+
385+
// Synopsis implements subcommands.Command.Synopsys.
386+
func (*taskTreePGID) Synopsis() string {
387+
return "creates a child+grandchild in a new process group"
388+
}
389+
390+
// Usage implements subcommands.Command.Usage.
391+
func (*taskTreePGID) Usage() string {
392+
return "task-tree-pgid --level=N\n"
393+
}
394+
395+
// SetFlags implements subcommands.Command.SetFlags.
396+
func (c *taskTreePGID) SetFlags(f *flag.FlagSet) {
397+
f.IntVar(&c.level, "level", 0, "0=init, 1=child (new pgid), 2=grandchild")
398+
}
399+
400+
// Execute implements subcommands.Command.Execute.
401+
func (c *taskTreePGID) Execute(ctx context.Context, f *flag.FlagSet, args ...any) subcommands.ExitStatus {
402+
switch c.level {
403+
case 0:
404+
stop := testutil.StartReaper()
405+
defer stop()
406+
cmd := exec.Command("/proc/self/exe", "task-tree-pgid", "--level=1")
407+
cmd.Stdout = os.Stdout
408+
cmd.Stderr = os.Stderr
409+
cmd.SysProcAttr = &sys.SysProcAttr{Setpgid: true}
410+
if err := cmd.Start(); err != nil {
411+
log.Fatalf("level 0: failed to start child: %v", err)
412+
}
413+
for {
414+
time.Sleep(time.Hour)
415+
}
416+
417+
case 1:
418+
cmd := exec.Command("/proc/self/exe", "task-tree-pgid", "--level=2")
419+
cmd.Stdout = os.Stdout
420+
cmd.Stderr = os.Stderr
421+
if err := cmd.Start(); err != nil {
422+
log.Fatalf("level 1: failed to start grandchild: %v", err)
423+
}
424+
cmd.Wait()
425+
426+
case 2:
427+
for {
428+
time.Sleep(time.Hour)
429+
}
430+
}
431+
return subcommands.ExitSuccess
432+
}
433+
375434
type gvisorDetect struct {
376435
exitCodeOnGVisor int
377436
exitCodeOnNotGVisor int

0 commit comments

Comments
 (0)