@@ -21,12 +21,56 @@ struct trace_event_raw_page_fault_user {
2121 char __data [0 ];
2222};
2323
24+ struct cr2_stat {
25+ __u64 cr2 ;
26+ __u64 err ;
27+ __u64 tai ;
28+ };
29+
30+ struct cr2_stats {
31+ struct cr2_stat stat [MAX_USER_PF_ENTRIES ];
32+ __u64 head ;
33+ __u64 count ;
34+ };
35+
2436struct {
2537 __uint (type , BPF_MAP_TYPE_HASH );
2638 __uint (max_entries , 1024 );
2739 __type (key , u32 );
28- __type (value , u64 );
40+ __type (value , struct cr2_stats );
2941} tgid_cr2 SEC (".maps" );
42+
43+ inline void cr2stats_init (struct cr2_stats * stats ) {
44+ stats -> head = 0 ;
45+ stats -> count = 0 ;
46+ }
47+
48+ inline void cr2stats_push (struct cr2_stats * stats , struct cr2_stat * value ) {
49+ if (stats -> head < MAX_USER_PF_ENTRIES ) {
50+ stats -> stat [stats -> head ] = * value ;
51+
52+ if (++ stats -> head >= MAX_USER_PF_ENTRIES )
53+ stats -> head -= MAX_USER_PF_ENTRIES ;
54+
55+ if (++ stats -> count > MAX_USER_PF_ENTRIES )
56+ stats -> count = MAX_USER_PF_ENTRIES ;
57+ }
58+ }
59+
60+ inline struct cr2_stat * cr2stats_get (struct cr2_stats * stats , u32 index ) {
61+ if (stats -> count == MAX_USER_PF_ENTRIES ) {
62+ index += stats -> head ;
63+ if (index >= MAX_USER_PF_ENTRIES ) {
64+ index -= MAX_USER_PF_ENTRIES ;
65+ }
66+ }
67+
68+ if (index < MAX_USER_PF_ENTRIES ) {
69+ return stats -> stat + index ;
70+ }
71+
72+ return NULL ;
73+ }
3074#endif
3175
3276// Output map (for user space)
@@ -75,24 +119,24 @@ int trace_sigsegv(struct trace_event_raw_signal_generate *ctx) {
75119 bpf_probe_read_kernel_str (& event -> tgleader_comm , sizeof (event -> tgleader_comm ), & task -> group_leader -> comm );
76120 // TODO: can the acquisition of pidns_tgid, pidns_pid be made more robust / simplified?
77121 {
78- struct pid const * thread_pid = task -> thread_pid ;
79- unsigned int const level = thread_pid -> level ;
80- // thread_pid->numbers is a size-one flexible array member (type numbers[1])
81- // => cannot perform bounds-check against BTF information
82- // => need bpf_probe_read_kernel to read from indices potentially > 1
83- struct upid const * upid_inv = & thread_pid -> numbers [level ];
84- event -> pidns_pid = BPF_CORE_READ (upid_inv , nr ); // we already have implicit CO-RE, but we need the probe function call
85- }
86- {
87- struct pid const * tgid_pid = task -> signal -> pids [PIDTYPE_TGID ];
88- unsigned int const level = tgid_pid -> level ;
89- struct upid const * tgid_upid_inv = & tgid_pid -> numbers [level ];
90- // TODO: doesn't this return the pid in the NS of the tg leader, instead of the pid in the NS of the current thread?
91- // TODO: don't we need RCU here?
92- event -> pidns_tgid = BPF_CORE_READ (tgid_upid_inv , nr );
93- }
94-
95- event -> regs .trapno = task -> thread .trap_nr ; // TODO: also copy the other fields like cr2 and error_code
122+ struct pid const * thread_pid = task -> thread_pid ;
123+ unsigned int const level = thread_pid -> level ;
124+ // thread_pid->numbers is a size-one flexible array member (type numbers[1])
125+ // => cannot perform bounds-check against BTF information
126+ // => need bpf_probe_read_kernel to read from indices potentially > 1
127+ struct upid const * upid_inv = & thread_pid -> numbers [level ];
128+ event -> pidns_pid = BPF_CORE_READ (upid_inv , nr ); // we already have implicit CO-RE, but we need the probe function call
129+ }
130+ {
131+ struct pid const * tgid_pid = task -> signal -> pids [PIDTYPE_TGID ];
132+ unsigned int const level = tgid_pid -> level ;
133+ struct upid const * tgid_upid_inv = & tgid_pid -> numbers [level ];
134+ // TODO: doesn't this return the pid in the NS of the tg leader, instead of the pid in the NS of the current thread?
135+ // TODO: don't we need RCU here?
136+ event -> pidns_tgid = BPF_CORE_READ (tgid_upid_inv , nr );
137+ }
138+
139+ event -> regs .trapno = task -> thread .trap_nr ;
96140 event -> regs .err = task -> thread .error_code ;
97141
98142 // TODO: how are these regs acquired?
@@ -119,14 +163,24 @@ int trace_sigsegv(struct trace_event_raw_signal_generate *ctx) {
119163 event -> regs .flags = regs -> flags ;
120164
121165 event -> regs .cr2 = task -> thread .cr2 ;
122- event -> regs . cr2_fault = -1 ;
166+ event -> cr2_userpf_entry_count = 0 ;
123167
124168 #ifdef TRACE_PF_CR2
125169 u32 tgid = task -> tgid ;
126- u64 * cr2 = bpf_map_lookup_elem (& tgid_cr2 , & tgid );
170+ struct cr2_stats * cr2stats = bpf_map_lookup_elem (& tgid_cr2 , & tgid );
171+
172+ if (cr2stats ) {
173+ for (u32 i = 0 ; i < cr2stats -> count && i < MAX_USER_PF_ENTRIES ; i ++ ) {
174+ struct cr2_stat * stat = cr2stats_get (cr2stats , i );
175+ if (stat ) {
176+ event -> regs .cr2_faults [i ] = stat -> cr2 ;
177+ event -> regs .cr2_errors [i ] = stat -> err ;
178+ event -> cr2_tai [i ] = stat -> tai ;
179+
180+ ++ event -> cr2_userpf_entry_count ;
181+ }
182+ }
127183
128- if (cr2 ) {
129- event -> regs .cr2_fault = * cr2 ;
130184 bpf_map_delete_elem (& tgid_cr2 , & tgid );
131185 }
132186 #endif
@@ -149,13 +203,24 @@ int trace_sigsegv(struct trace_event_raw_signal_generate *ctx) {
149203#ifdef TRACE_PF_CR2
150204SEC ("tracepoint/exceptions/page_fault_user" )
151205int trace_page_fault (struct trace_event_raw_page_fault_user * ctx ) {
152- u64 cr2 ;
206+ struct cr2_stat stat ;
153207 u32 tgid ;
154208
155- cr2 = ctx -> address ;
209+ stat .cr2 = ctx -> address ;
210+ stat .err = ctx -> error_code ;
211+ stat .tai = bpf_ktime_get_tai_ns ();
156212 tgid = bpf_get_current_pid_tgid () >> 32 ;
157213
158- bpf_map_update_elem (& tgid_cr2 , & tgid , & cr2 , BPF_ANY );
214+ struct cr2_stats * cr2stats = bpf_map_lookup_elem (& tgid_cr2 , & tgid );
215+ if (cr2stats ) {
216+ cr2stats_push (cr2stats , & stat );
217+ } else {
218+ struct cr2_stats new_stats ;
219+ cr2stats_init (& new_stats );
220+ cr2stats_push (& new_stats , & stat );
221+
222+ bpf_map_update_elem (& tgid_cr2 , & tgid , & new_stats , BPF_ANY );
223+ }
159224
160225 return 0 ;
161226}
0 commit comments