Skip to content

Commit 161d3ff

Browse files
christos68krogercollflorianlfabled
authored
Conditionalize sched_process_free hook (#737) (#738)
Co-authored-by: Roger Coll <roger.coll@elastic.co> Co-authored-by: Florian Lehner <florianl@users.noreply.github.com> Co-authored-by: Timo Teräs <timo.teras@iki.fi>
1 parent a76b1c6 commit 161d3ff

7 files changed

Lines changed: 76 additions & 17 deletions

File tree

.github/workflows/unit-test-on-pull-request.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,12 +179,16 @@ jobs:
179179
- { target_arch: amd64, kernel: 6.8.10 }
180180
- { target_arch: amd64, kernel: 6.9.1 }
181181
- { target_arch: amd64, kernel: 6.12.16 }
182+
- { target_arch: amd64, kernel: 6.16 }
182183

183184
# ARM64 (NOTE: older ARM64 kernels are not available in Cilium repos)
184185
- { target_arch: arm64, kernel: 6.6.31 }
185186
- { target_arch: arm64, kernel: 6.8.4 }
186187
- { target_arch: arm64, kernel: 6.9.1 }
187188
- { target_arch: arm64, kernel: 6.12.16 }
189+
# qemu has a bug that prevents booting the following kernel on ARM,
190+
# fixed in qemu 9.2
191+
#- { target_arch: arm64, kernel: 6.16 }
188192
steps:
189193
- name: Clone code
190194
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0

libpf/generics.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,15 @@ func MapKeysToSlice[K comparable, V any](m map[K]V) []K {
2424
return slice
2525
}
2626

27+
// MapKeysToSet creates a set from a map's keys.
28+
func MapKeysToSet[K comparable, V any](m map[K]V) Set[K] {
29+
set := make(Set[K], len(m))
30+
for key := range m {
31+
set[key] = Void{}
32+
}
33+
return set
34+
}
35+
2736
// SliceAllEqual checks whether all items in a slice have a given value.
2837
func SliceAllEqual[T comparable](s []T, value T) bool {
2938
for _, item := range s {

support/ebpf/sched_monitor.ebpf.c

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,25 @@
77
#include "types.h"
88

99
// See /sys/kernel/debug/tracing/events/sched/sched_process_free/format
10-
// for struct layout.
11-
struct sched_process_free_ctx {
10+
// for struct layout. This is pre-6.16 format which uses a fixed-size
11+
// (TASK_COMM_LEN) array for comm.
12+
struct sched_process_free_ctx_pre616 {
1213
unsigned char skip[24];
1314
pid_t pid;
1415
int prio;
1516
};
1617

17-
// tracepoint__sched_process_free is a tracepoint attached to the scheduler that frees processes.
18-
// Every time a processes exits this hook is triggered.
19-
SEC("tracepoint/sched/sched_process_free")
20-
int tracepoint__sched_process_free(struct sched_process_free_ctx *ctx)
21-
{
22-
u32 pid = ctx->pid;
18+
// This is the newer kernel version 6.16+ format.
19+
// The change was introduced upstream with
20+
// https://github.com/torvalds/linux/commit/155fd6c3e2f02efdc71a9b62888942efc217aff0
21+
struct sched_process_free_ctx {
22+
unsigned char skip[12];
23+
pid_t pid;
24+
int prio;
25+
};
2326

27+
static EBPF_INLINE int do_process_free(void *ctx, u32 pid)
28+
{
2429
if (!bpf_map_lookup_elem(&reported_pids, &pid) && !pid_information_exists(pid)) {
2530
// Only report PIDs that we explicitly track. This avoids sending kernel worker PIDs
2631
// to userspace.
@@ -33,3 +38,17 @@ int tracepoint__sched_process_free(struct sched_process_free_ctx *ctx)
3338
exit:
3439
return 0;
3540
}
41+
42+
// tracepoint__sched_process_free is a tracepoint attached to the scheduler that frees processes.
43+
// Every time a processes exits this hook is triggered.
44+
SEC("tracepoint/sched/sched_process_free/v2")
45+
int tracepoint__sched_process_free(struct sched_process_free_ctx *ctx)
46+
{
47+
return do_process_free(ctx, ctx->pid);
48+
}
49+
50+
SEC("tracepoint/sched/sched_process_free/v1")
51+
int tracepoint__sched_process_free_pre616(struct sched_process_free_ctx_pre616 *ctx)
52+
{
53+
return do_process_free(ctx, ctx->pid);
54+
}

support/ebpf/tracer.ebpf.amd64

5.47 KB
Binary file not shown.

support/ebpf/tracer.ebpf.arm64

5.46 KB
Binary file not shown.

tracer/tracepoints.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88

99
"github.com/cilium/ebpf"
1010
"github.com/cilium/ebpf/link"
11+
"go.opentelemetry.io/ebpf-profiler/libpf"
1112
"go.opentelemetry.io/ebpf-profiler/rlimit"
1213
)
1314

@@ -34,8 +35,8 @@ func (t *Tracer) AttachSchedMonitor() error {
3435
if err != nil {
3536
return fmt.Errorf("failed to adjust rlimit: %v", err)
3637
}
37-
defer restoreRlimit()
3838

39-
prog := t.ebpfProgs["tracepoint__sched_process_free"]
40-
return t.attachToTracepoint("sched", "sched_process_free", prog)
39+
defer restoreRlimit()
40+
name := schedProcessFreeHookName(libpf.MapKeysToSet(t.ebpfProgs))
41+
return t.attachToTracepoint("sched", "sched_process_free", t.ebpfProgs[name])
4142
}

tracer/tracer.go

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,13 @@ const (
5656
probProfilingDisable = -1
5757
)
5858

59+
// Names of tracepoint hooks for sched_process_free. There are two hooks
60+
// as the tracepoint format has changed for kernel versions 6.16+.
61+
const (
62+
schedProcessFreeV1 = "tracepoint__sched_process_free_pre616"
63+
schedProcessFreeV2 = "tracepoint__sched_process_free"
64+
)
65+
5966
// Intervals is a subset of config.IntervalsAndTimers.
6067
type Intervals interface {
6168
MonitorInterval() time.Duration
@@ -174,6 +181,16 @@ func goString(cstr []byte) string {
174181
return strings.Clone(pfunsafe.ToString(cstr[:index]))
175182
}
176183

184+
// schedProcessFreeHookName returns the name of the tracepoint hook to use.
185+
// This function requires that only one of (schedProcessFreeV1, schedProcessFreeV2)
186+
// be present in progNames.
187+
func schedProcessFreeHookName(progNames libpf.Set[string]) string {
188+
if _, ok := progNames[schedProcessFreeV1]; ok {
189+
return schedProcessFreeV1
190+
}
191+
return schedProcessFreeV2
192+
}
193+
177194
// NewTracer loads eBPF code and map definitions from the ELF module at the configured path.
178195
func NewTracer(ctx context.Context, cfg *Config) (*Tracer, error) {
179196
kernelSymbolizer, err := kallsyms.NewSymbolizer()
@@ -282,11 +299,23 @@ func initializeMapsAndPrograms(kmod *kallsyms.Module, cfg *Config) (
282299
// References to eBPF maps in the eBPF programs are just placeholders that need to be
283300
// replaced by the actual loaded maps later on with RewriteMaps before loading the
284301
// programs into the kernel.
302+
major, minor, patch, err := GetCurrentKernelVersion()
303+
if err != nil {
304+
return nil, nil, fmt.Errorf("failed to get kernel version: %v", err)
305+
}
306+
285307
coll, err := support.LoadCollectionSpec()
286308
if err != nil {
287309
return nil, nil, fmt.Errorf("failed to load specification for tracers: %v", err)
288310
}
289311

312+
if major > 6 || (major == 6 && minor >= 16) {
313+
// Tracepoint format for sched_process_free has changed in v6.16+.
314+
delete(coll.Programs, schedProcessFreeV1)
315+
} else {
316+
delete(coll.Programs, schedProcessFreeV2)
317+
}
318+
290319
if cfg.VerboseMode {
291320
if err = coll.Variables["with_debug_output"].Set(uint32(1)); err != nil {
292321
return nil, nil, fmt.Errorf("failed to set debug output: %v", err)
@@ -316,11 +345,6 @@ func initializeMapsAndPrograms(kmod *kallsyms.Module, cfg *Config) (
316345
}
317346

318347
if cfg.KernelVersionCheck {
319-
var major, minor, patch uint32
320-
major, minor, patch, err = GetCurrentKernelVersion()
321-
if err != nil {
322-
return nil, nil, fmt.Errorf("failed to get kernel version: %v", err)
323-
}
324348
if hasProbeReadBug(major, minor, patch) {
325349
if err = checkForMaccessPatch(coll, ebpfMaps, kmod); err != nil {
326350
return nil, nil, fmt.Errorf("your kernel version %d.%d.%d may be "+
@@ -540,9 +564,11 @@ func loadPerfUnwinders(coll *cebpf.CollectionSpec, ebpfProgs map[string]*cebpf.P
540564

541565
progs := make([]progLoaderHelper, len(tailCallProgs)+2)
542566
copy(progs, tailCallProgs)
567+
568+
schedProcessFree := schedProcessFreeHookName(libpf.MapKeysToSet(coll.Programs))
543569
progs = append(progs,
544570
progLoaderHelper{
545-
name: "tracepoint__sched_process_free",
571+
name: schedProcessFree,
546572
noTailCallTarget: true,
547573
enable: true,
548574
},

0 commit comments

Comments
 (0)