3232#include " tscore/BaseLogFile.h"
3333#include " tscore/runroot.h"
3434#include " iocore/eventsystem/RecProcess.h"
35+ #include < csignal>
3536#include < unistd.h>
3637
38+ namespace
39+ {
40+ // Timeout in seconds for backtrace collection. If ptrace/waitpid hangs, this
41+ // prevents the crashlog helper from blocking indefinitely.
42+ constexpr unsigned BACKTRACE_TIMEOUT_SECS = 10 ;
43+
44+ volatile sig_atomic_t backtrace_timed_out = 0 ;
45+
46+ void
47+ backtrace_alarm_handler (int /* sig */ )
48+ {
49+ backtrace_timed_out = 1 ;
50+ }
51+ } // namespace
52+
3753static int syslog_mode = false ;
3854static int debug_mode = false ;
3955static int wait_mode = false ;
@@ -94,29 +110,58 @@ crashlog_open(const char *path)
94110extern int ServerBacktrace (unsigned /* options */ , int pid, char **trace);
95111
96112bool
97- crashlog_write_backtrace (FILE *fp, pid_t pid, const crashlog_target &)
113+ crashlog_write_backtrace (FILE *fp, pid_t pid, const crashlog_target &target )
98114{
99- char *trace = nullptr ;
100- int mgmterr;
115+ char *trace = nullptr ;
116+ int mgmterr = -1 ;
117+
118+ // Set up a timeout to prevent indefinite hangs in ptrace/waitpid.
119+ backtrace_timed_out = 0 ;
120+ struct sigaction new_action;
121+ struct sigaction old_action;
122+ new_action.sa_handler = backtrace_alarm_handler;
123+ sigemptyset (&new_action.sa_mask );
124+ new_action.sa_flags = 0 ;
125+ sigaction (SIGALRM, &new_action, &old_action);
126+ alarm (BACKTRACE_TIMEOUT_SECS);
127+
128+ mgmterr = ServerBacktrace (0 , static_cast <int >(pid), &trace);
129+
130+ // Cancel the alarm and restore the old handler.
131+ alarm (0 );
132+ sigaction (SIGALRM, &old_action, nullptr );
133+
134+ if (backtrace_timed_out) {
135+ fprintf (fp, " Backtrace collection timed out after %u seconds\n " , BACKTRACE_TIMEOUT_SECS);
136+ free (trace);
137+ return false ;
138+ }
101139
102140 // NOTE: sometimes we can't get a backtrace because the ptrace attach will fail with
103141 // EPERM. I've seen this happen when a debugger is attached, which makes sense, but it
104142 // can also happen without a debugger. Possibly in that case, there is a race with the
105143 // kernel locking the process information?
106144
107- if ((mgmterr = ServerBacktrace (0 , static_cast <int >(pid), &trace)) != 0 ) {
108- fprintf (fp, " Unable to retrieve backtrace: %d\n " , mgmterr);
109- return false ;
145+ if (mgmterr == 0 && trace != nullptr ) {
146+ // ServerBacktrace succeeded - this gives us all threads' backtraces.
147+ fprintf (fp, " %s" , trace);
148+ free (trace);
149+ return true ;
110150 }
111151
112- if (trace == nullptr ) {
113- fprintf (fp, " Unable to retrieve backtrace: trace is null\n " );
114- return false ;
152+ // ServerBacktrace failed. Fall back to the in-process backtrace from the crashing thread.
153+ if ((target.flags & CRASHLOG_HAVE_BACKTRACE) && !target.backtrace .empty ()) {
154+ fprintf (fp, " Crashing Thread Backtrace:\n %s" , target.backtrace .c_str ());
155+ return true ;
115156 }
116157
117- fprintf (fp, " %s" , trace);
118- free (trace);
119- return true ;
158+ // No backtrace available from either source.
159+ if (mgmterr != 0 ) {
160+ fprintf (fp, " Unable to retrieve backtrace: ServerBacktrace returned %d\n " , mgmterr);
161+ } else {
162+ fprintf (fp, " Unable to retrieve backtrace: no backtrace data available\n " );
163+ }
164+ return false ;
120165}
121166
122167void
@@ -200,11 +245,12 @@ main(int /* argc ATS_UNUSED */, const char **argv)
200245 Note (" crashlog started, target=%ld, debug=%s syslog=%s, uid=%ld euid=%ld" , static_cast <long >(target_pid),
201246 debug_mode ? " true" : " false" , syslog_mode ? " true" : " false" , (long )getuid (), (long )geteuid ());
202247
203- ink_zero (target);
204248 target.pid = static_cast <pid_t >(target_pid);
205249 target.timestamp = timestamp ();
206250
207- if (host_triplet && strncmp (host_triplet, " x86_64-unknown-linux" , sizeof (" x86_64-unknown-linux" ) - 1 ) == 0 ) {
251+ // Read crash context on Linux platforms. The siginfo_t and ucontext_t
252+ // structures are platform-specific but defined for all Linux architectures.
253+ if (host_triplet && (strstr (host_triplet, " linux" ) != nullptr || strstr (host_triplet, " Linux" ) != nullptr )) {
208254 ssize_t nbytes;
209255 target.flags |= CRASHLOG_HAVE_THREADINFO;
210256
@@ -219,6 +265,21 @@ main(int /* argc ATS_UNUSED */, const char **argv)
219265 Warning (" received %zd of %zu expected thread context bytes" , nbytes, sizeof (target.ucontext ));
220266 target.flags &= ~CRASHLOG_HAVE_THREADINFO;
221267 }
268+
269+ // Read the in-process backtrace from the crashing thread.
270+ uint32_t bt_len = 0 ;
271+ nbytes = read (STDIN_FILENO, &bt_len, sizeof (bt_len));
272+ if (nbytes == static_cast <ssize_t >(sizeof (bt_len)) && bt_len > 0 && bt_len < 1024 * 1024 ) {
273+ target.backtrace .resize (bt_len);
274+ nbytes = read (STDIN_FILENO, target.backtrace .data (), bt_len);
275+ if (nbytes == static_cast <ssize_t >(bt_len)) {
276+ target.flags |= CRASHLOG_HAVE_BACKTRACE;
277+ Note (" received %u bytes of in-process backtrace" , bt_len);
278+ } else {
279+ Warning (" received %zd of %u expected backtrace bytes" , nbytes, bt_len);
280+ target.backtrace .clear ();
281+ }
282+ }
222283 }
223284
224285 logname = crashlog_name ();
0 commit comments