From 96bc212285a79b08b8035ab5bc7a6ca5d99b2306 Mon Sep 17 00:00:00 2001 From: bitsandfoxes Date: Wed, 13 May 2026 18:33:18 +0200 Subject: [PATCH] poc --- CMakeLists.txt | 10 +- include/sentry.h | 21 ++ .../main/java/io/sentry/ndk/SentryNdk.java | 20 ++ ndk/lib/src/main/jni/sentry.c | 26 +++ src/CMakeLists.txt | 7 + src/sentry_thread_sampler.c | 216 ++++++++++++++++++ tests/unit/CMakeLists.txt | 1 + tests/unit/test_thread_sampler.c | 72 ++++++ tests/unit/tests.inc | 4 + vendor/libunwind/CMakeLists.txt | 8 +- 10 files changed, 382 insertions(+), 3 deletions(-) create mode 100644 src/sentry_thread_sampler.c create mode 100644 tests/unit/test_thread_sampler.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 2de2efb745..cccf8faa48 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -297,6 +297,10 @@ endif() if(ANDROID) set(SENTRY_WITH_LIBUNWINDSTACK TRUE) + # libunwind is also enabled on Android so the cross-thread sampler + # (sentry_unwind_thread_stack) has an async-signal-safe unwinder. + # libunwindstack allocates and is unsafe from a signal handler. + set(SENTRY_WITH_LIBUNWIND TRUE) elseif(LINUX) set(SENTRY_WITH_LIBUNWIND TRUE) elseif(APPLE) @@ -669,8 +673,10 @@ if(SENTRY_WITH_LIBUNWINDSTACK) endif() if(SENTRY_WITH_LIBUNWIND) - if(LINUX) - # Use vendored libunwind + if(LINUX OR ANDROID) + # Use vendored libunwind (on Android too: matches Linux behaviour and + # avoids relying on the NDK toolchain's libunwind, whose API can drift + # between NDK versions). add_subdirectory(vendor/libunwind) target_link_libraries(sentry PRIVATE unwind) if(NOT SENTRY_BUILD_SHARED_LIBS) diff --git a/include/sentry.h b/include/sentry.h index 63fa2720c4..6ad6442914 100644 --- a/include/sentry.h +++ b/include/sentry.h @@ -707,6 +707,27 @@ SENTRY_EXPERIMENTAL_API size_t sentry_unwind_stack( SENTRY_EXPERIMENTAL_API size_t sentry_unwind_stack_from_ucontext( const sentry_ucontext_t *uctx, void **stacktrace_out, size_t max_len); +/** + * Captures a stacktrace from another thread by Linux kernel thread ID (TID). + * + * Uses signal-based sampling: a real-time signal is sent to the target thread, + * and the thread's stack is unwound from the signal context. The function + * blocks until the sample completes or times out (1 second). + * + * Linux and Android only. Other platforms return 0. + * + * Concurrent calls are serialized internally; only one sample runs at a time. + * + * @param tid Linux kernel TID of the target thread (e.g. from gettid() or + * android.os.Process.myTid()). + * @param stacktrace_out Caller-provided buffer for instruction pointers. + * @param max_len Capacity of stacktrace_out. + * @return Number of frames written. 0 on failure (invalid TID, signal delivery + * failure, timeout, or unsupported platform). + */ +SENTRY_EXPERIMENTAL_API size_t sentry_unwind_thread_stack( + int tid, void **stacktrace_out, size_t max_len); + /** * A UUID */ diff --git a/ndk/lib/src/main/java/io/sentry/ndk/SentryNdk.java b/ndk/lib/src/main/java/io/sentry/ndk/SentryNdk.java index 2369c11042..0f03b70898 100644 --- a/ndk/lib/src/main/java/io/sentry/ndk/SentryNdk.java +++ b/ndk/lib/src/main/java/io/sentry/ndk/SentryNdk.java @@ -22,6 +22,8 @@ private SentryNdk() {} private static native void shutdown(); + private static native long[] captureThreadStackNative(long tid); + /** * Preloads sentry-native into the process signal chain before full * initialization. @@ -63,6 +65,24 @@ public static void close() { shutdown(); } + /** + * Captures the native stack of another thread by Linux kernel TID. + * + * Uses signal-based sampling internally. Returns instruction-pointer + * addresses as longs; an empty array indicates sampling failure + * (invalid TID, signal delivery failure, timeout, or unsupported platform). + * + *

Linux/Android only. Other platforms return an empty array. + * + * @param tid Linux kernel TID of the target thread (e.g. android.os.Process.myTid()). + * @return array of instruction-pointer addresses (up to 128 frames), or empty on failure. + */ + public static long[] captureThreadStack(final long tid) { + loadNativeLibraries(); + final long[] result = captureThreadStackNative(tid); + return result != null ? result : new long[0]; + } + /** * Loads all required native libraries. This is automatically done by {@link #init(NdkOptions)}, * but can be called manually in case you want to preload the libraries before calling #init. diff --git a/ndk/lib/src/main/jni/sentry.c b/ndk/lib/src/main/jni/sentry.c index c64f005655..1b2071a29d 100644 --- a/ndk/lib/src/main/jni/sentry.c +++ b/ndk/lib/src/main/jni/sentry.c @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -575,3 +576,28 @@ JNIEXPORT void JNICALL Java_io_sentry_ndk_SentryNdk_shutdown(JNIEnv *env, jclass cls) { sentry_close(); } + +JNIEXPORT jlongArray JNICALL +Java_io_sentry_ndk_SentryNdk_captureThreadStackNative(JNIEnv *env, jclass cls, jlong tid) { + (void)cls; + enum { MAX_FRAMES = 128 }; + void *frames[MAX_FRAMES]; + + size_t count = sentry_unwind_thread_stack((int)tid, frames, MAX_FRAMES); + + jlongArray result = (*env)->NewLongArray(env, (jsize)count); + if (!result) { + return NULL; + } + if (count == 0) { + return result; + } + + // Copy via a small stack buffer so we don't depend on sizeof(void*) == sizeof(jlong) + jlong buf[MAX_FRAMES]; + for (size_t i = 0; i < count; i++) { + buf[i] = (jlong)(uintptr_t)frames[i]; + } + (*env)->SetLongArrayRegion(env, result, 0, (jsize)count, buf); + return result; +} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 6086dbaafb..ff5678c11e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -240,6 +240,13 @@ if(SENTRY_WITH_LIBUNWIND) ) endif() +# Cross-thread stack sampler (sentry_unwind_thread_stack). Self-gates to +# Linux + libunwind at compile time; on other platforms the file compiles to +# a no-op stub, so we add it unconditionally. +sentry_target_sources_cwd(sentry + sentry_thread_sampler.c +) + if(SENTRY_WITH_LIBUNWIND_MAC) target_compile_definitions(sentry PRIVATE SENTRY_WITH_UNWINDER_LIBUNWIND_MAC) sentry_target_sources_cwd(sentry diff --git a/src/sentry_thread_sampler.c b/src/sentry_thread_sampler.c new file mode 100644 index 0000000000..86c29c6268 --- /dev/null +++ b/src/sentry_thread_sampler.c @@ -0,0 +1,216 @@ +#include "sentry_boot.h" + +#include + +#if (defined(__linux__) || defined(__ANDROID__)) \ + && defined(SENTRY_WITH_UNWINDER_LIBUNWIND) +# define SENTRY_THREAD_SAMPLER_SUPPORTED 1 +#else +# define SENTRY_THREAD_SAMPLER_SUPPORTED 0 +#endif + +#if SENTRY_THREAD_SAMPLER_SUPPORTED + +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + +# define UNW_LOCAL_ONLY +# include + +/* + * Real-time signal used for asynchronous stack sampling. + * + * `SIGRTMIN + 5` is chosen because: + * - real-time signals (>= SIGRTMIN) are queued, not coalesced, and are not + * used by libc itself, so they will not collide with internal C library + * machinery (e.g. NPTL uses SIGRTMIN .. SIGRTMIN+2 on glibc, and Bionic + * reserves a similar low range for its own thread plumbing); + * - +5 matches the offset that async-profiler uses for the same purpose, + * which avoids stepping on common application-side users of low real-time + * signal slots. + * + * NOTE: the actual value of SIGRTMIN is only known at runtime on glibc/Bionic + * (it is a function call expanding to libc internals), so we cannot use it in + * a `case` label and must compute it at handler-install time. + */ +# define SENTRY_SAMPLER_SIGNAL (SIGRTMIN + 5) + +static pthread_mutex_t g_sampler_lock = PTHREAD_MUTEX_INITIALIZER; +static sem_t g_sampler_done; +static void **g_sampler_out_buf; +static size_t g_sampler_out_max; +static volatile size_t g_sampler_out_written; +static volatile int g_sampler_initialized = 0; + +/* + * TID the currently active sampling request is expecting. Set by the caller + * before `tgkill` (under `g_sampler_lock`) and consulted inside the signal + * handler to discard stale signals delivered after a previous sampling request + * timed out. Real-time signals are queued, not coalesced, so a target thread + * that was blocked when we sent it the original signal may eventually run our + * handler at an arbitrarily later time — potentially while another sampling + * request targeting a different thread is in flight. Without this guard, the + * stale handler would write the wrong thread's stack into the new request's + * buffer. + */ +static volatile int g_expected_tid = 0; + +/* + * Signal handler running on the *target* thread's stack. Must be strictly + * async-signal-safe: no malloc, no logging, no mutex acquisition. + * + * We unwind from the saved ucontext using libunwind's + * `UNW_INIT_SIGNAL_FRAME` mode (same pattern as + * `sentry__unwind_stack_libunwind` for the crash path), write IPs into the + * caller-provided buffer, then signal completion via `sem_post`, which POSIX + * mandates be async-signal-safe. + */ +static void +sentry__sampler_signal_handler(int sig, siginfo_t *info, void *ucontext_v) +{ + (void)sig; + (void)info; + + // Stale-signal guard: if our TID doesn't match the request currently in + // flight, return silently without posting. Writing into the active + // request's buffer here would corrupt the result. Not posting is safe — + // the active request's tgkill will produce its own (correctly-targeted) + // handler invocation, and `sem_trywait` drains any leftover posts at the + // start of each request. + const int my_tid = (int)syscall(SYS_gettid); + if (my_tid != g_expected_tid) { + return; + } + + size_t written = 0; + if (g_sampler_out_buf && g_sampler_out_max > 0 && ucontext_v) { + unw_cursor_t cursor; + if (unw_init_local2(&cursor, (unw_context_t *)ucontext_v, + UNW_INIT_SIGNAL_FRAME) + == 0) { + unw_word_t prev_ip = 0; + unw_word_t prev_sp = 0; + int have_prev = 0; + for (;;) { + unw_word_t ip = 0; + if (unw_get_reg(&cursor, UNW_REG_IP, &ip) != 0) { + break; + } + unw_word_t sp = 0; + (void)unw_get_reg(&cursor, UNW_REG_SP, &sp); + + // Stop on lack of progress (mirrors the crash unwinder). + if (have_prev && ip == prev_ip && sp == prev_sp) { + break; + } + + g_sampler_out_buf[written++] = (void *)(uintptr_t)ip; + if (written >= g_sampler_out_max) { + break; + } + + prev_ip = ip; + prev_sp = sp; + have_prev = 1; + + if (unw_step(&cursor) <= 0) { + break; + } + } + } + } + g_sampler_out_written = written; + // sem_post is in the POSIX async-signal-safe list. + sem_post(&g_sampler_done); +} + +#endif // SENTRY_THREAD_SAMPLER_SUPPORTED + +size_t +sentry_unwind_thread_stack(int tid, void **stacktrace_out, size_t max_len) +{ +#if !SENTRY_THREAD_SAMPLER_SUPPORTED + (void)tid; + (void)stacktrace_out; + (void)max_len; + return 0; +#else + if (!stacktrace_out || max_len == 0 || tid <= 0) { + return 0; + } + + pthread_mutex_lock(&g_sampler_lock); + + if (!g_sampler_initialized) { + if (sem_init(&g_sampler_done, 0, 0) != 0) { + pthread_mutex_unlock(&g_sampler_lock); + return 0; + } + + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = sentry__sampler_signal_handler; + sa.sa_flags = SA_SIGINFO | SA_RESTART; + sigemptyset(&sa.sa_mask); + + // Save any previous disposition. We intentionally overwrite it; this + // signal slot is owned by the sampler for the lifetime of the + // process. If the slot was already in use by the host application we + // would still want to know — but since we have no logger available + // here that is async-signal-safe to call later, just proceed. + struct sigaction oldact; + memset(&oldact, 0, sizeof(oldact)); + if (sigaction(SENTRY_SAMPLER_SIGNAL, &sa, &oldact) != 0) { + sem_destroy(&g_sampler_done); + pthread_mutex_unlock(&g_sampler_lock); + return 0; + } + g_sampler_initialized = 1; + } + + g_sampler_out_buf = stacktrace_out; + g_sampler_out_max = max_len; + g_sampler_out_written = 0; + g_expected_tid = tid; + + // Drain any spurious posts from a previous timed-out sample, so that the + // wait below cannot return prematurely on a stale token. + while (sem_trywait(&g_sampler_done) == 0) { + // discard + } + + pid_t my_pid = getpid(); + if (syscall(SYS_tgkill, my_pid, tid, SENTRY_SAMPLER_SIGNAL) != 0) { + g_sampler_out_buf = NULL; + g_expected_tid = 0; + pthread_mutex_unlock(&g_sampler_lock); + return 0; + } + + // Bounded wait — 1 second max. + struct timespec timeout; + clock_gettime(CLOCK_REALTIME, &timeout); + timeout.tv_sec += 1; + + int rc; + do { + rc = sem_timedwait(&g_sampler_done, &timeout); + } while (rc == -1 && errno == EINTR); + + size_t result = (rc == 0) ? g_sampler_out_written : 0; + g_sampler_out_buf = NULL; + g_sampler_out_max = 0; + g_expected_tid = 0; + + pthread_mutex_unlock(&g_sampler_lock); + return result; +#endif +} diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index a143fd540a..1079a128f1 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -53,6 +53,7 @@ add_executable(sentry_test_unit test_string.c test_symbolizer.c test_sync.c + test_thread_sampler.c test_tracing.c test_tus.c test_uninit.c diff --git a/tests/unit/test_thread_sampler.c b/tests/unit/test_thread_sampler.c new file mode 100644 index 0000000000..7cc0306d8a --- /dev/null +++ b/tests/unit/test_thread_sampler.c @@ -0,0 +1,72 @@ +#include "sentry_testsupport.h" + +#if defined(__linux__) || defined(__ANDROID__) +# include +# include + +SENTRY_TEST(thread_sampler_samples_self) +{ + void *frames[32]; + pid_t tid = (pid_t)syscall(SYS_gettid); + size_t n = sentry_unwind_thread_stack((int)tid, frames, 32); + // On Linux with vendored libunwind compiled in this should yield >= 1 + // frame. On Android (libunwindstack-only build) the function intentionally + // returns 0, so we only assert the bounds here. + TEST_CHECK(n <= 32); +} + +SENTRY_TEST(thread_sampler_rejects_invalid_tid) +{ + void *frames[32]; + size_t n = sentry_unwind_thread_stack(-1, frames, 32); + TEST_CHECK_INT_EQUAL((int)n, 0); + + n = sentry_unwind_thread_stack(0, frames, 32); + TEST_CHECK_INT_EQUAL((int)n, 0); +} + +SENTRY_TEST(thread_sampler_rejects_null_buf) +{ + size_t n = sentry_unwind_thread_stack(1, NULL, 32); + TEST_CHECK_INT_EQUAL((int)n, 0); +} + +SENTRY_TEST(thread_sampler_rejects_zero_max) +{ + void *frames[1]; + size_t n = sentry_unwind_thread_stack(1, frames, 0); + TEST_CHECK_INT_EQUAL((int)n, 0); +} + +#else // non-Linux/Android: function must be a no-op returning 0. + +SENTRY_TEST(thread_sampler_samples_self) +{ + void *frames[32]; + size_t n = sentry_unwind_thread_stack(1, frames, 32); + TEST_CHECK_INT_EQUAL((int)n, 0); +} + +SENTRY_TEST(thread_sampler_rejects_invalid_tid) +{ + void *frames[32]; + size_t n = sentry_unwind_thread_stack(-1, frames, 32); + TEST_CHECK_INT_EQUAL((int)n, 0); + n = sentry_unwind_thread_stack(0, frames, 32); + TEST_CHECK_INT_EQUAL((int)n, 0); +} + +SENTRY_TEST(thread_sampler_rejects_null_buf) +{ + size_t n = sentry_unwind_thread_stack(1, NULL, 32); + TEST_CHECK_INT_EQUAL((int)n, 0); +} + +SENTRY_TEST(thread_sampler_rejects_zero_max) +{ + void *frames[1]; + size_t n = sentry_unwind_thread_stack(1, frames, 0); + TEST_CHECK_INT_EQUAL((int)n, 0); +} + +#endif diff --git a/tests/unit/tests.inc b/tests/unit/tests.inc index 5829f94deb..8bd6f4595d 100644 --- a/tests/unit/tests.inc +++ b/tests/unit/tests.inc @@ -304,6 +304,10 @@ XX(stringbuilder_append_overflow) XX(stringbuilder_reserve_overflow) XX(symbolizer) XX(task_queue) +XX(thread_sampler_rejects_invalid_tid) +XX(thread_sampler_rejects_null_buf) +XX(thread_sampler_rejects_zero_max) +XX(thread_sampler_samples_self) XX(thread_without_name_still_valid) XX(trace_continuation_truth_table) XX(traceparent_header_disabled_by_default) diff --git a/vendor/libunwind/CMakeLists.txt b/vendor/libunwind/CMakeLists.txt index 6a684f0c95..a5698c2e74 100644 --- a/vendor/libunwind/CMakeLists.txt +++ b/vendor/libunwind/CMakeLists.txt @@ -288,4 +288,10 @@ if(CMAKE_C_COMPILER_ID MATCHES "GNU|Clang") target_compile_options(unwind PRIVATE -w) endif() -target_link_libraries(unwind PRIVATE ${CMAKE_DL_LIBS} pthread) +if(ANDROID) + # On Android, pthread is part of libc; there is no separate libpthread.so. + # libdl is similarly merged, so ${CMAKE_DL_LIBS} expands to empty. + target_link_libraries(unwind PRIVATE ${CMAKE_DL_LIBS}) +else() + target_link_libraries(unwind PRIVATE ${CMAKE_DL_LIBS} pthread) +endif()