diff --git a/.github/workflows/prof_asan.yml b/.github/workflows/prof_asan.yml index 78019c658d..f936cb9831 100644 --- a/.github/workflows/prof_asan.yml +++ b/.github/workflows/prof_asan.yml @@ -127,6 +127,25 @@ jobs: cargo build --profile profiler-release cp -v "$CARGO_TARGET_DIR/profiler-release/libdatadog_php_profiling.so" "$(php-config --extension-dir)/datadog-profiling.so" + # TODO(parallel): the php-8.5_bookworm-8 image ships parallel 1.2.13, which + # has a bug that intermittently trips UBSAN. Install the fixed 1.2.14 over + # it (ZTS-only; parallel requires ZTS). Remove this step once the CI images + # are rebuilt with parallel >= 1.2.14. + - name: Install fixed parallel 1.2.14 (ZTS only, temporary until images rebuilt) + if: matrix.php-build == 'zts' + run: | + set -eux + switch-php zts + scan_dir="$(php -r 'echo PHP_CONFIG_FILE_SCAN_DIR;')" + # pecl refuses to reinstall while the extension is loaded, so move its + # ini aside during the build, then restore it so the test run loads the + # freshly installed parallel.so. Use the direct package URL because the + # channel REST cache in the image can lag behind new releases. + mv "$scan_dir/parallel.ini" /tmp/parallel.ini.disabled + yes '' | pecl install -f https://pecl.php.net/get/parallel-1.2.14.tgz + mv /tmp/parallel.ini.disabled "$scan_dir/parallel.ini" + php --ri parallel | grep -i version + - name: Run phpt tests run: | set -eux diff --git a/.gitlab/generate-profiler.php b/.gitlab/generate-profiler.php index 2268cd44c4..a1f6b4b256 100644 --- a/.gitlab/generate-profiler.php +++ b/.gitlab/generate-profiler.php @@ -13,6 +13,16 @@ } ?> +# PHP 8.5 has a known tailcall VM crash; re-enable once PHP 8.5.8 is available. +.php_language_profiler_targets: &php_language_profiler_targets + + "profiling tests": stage: test tags: [ "arch:${ARCH}" ] @@ -44,6 +54,8 @@ script: - if [ -d '/opt/rh/devtoolset-7' ]; then set +eo pipefail; source scl_source enable devtoolset-7; set -eo pipefail; fi - if [ -d '/opt/rh/devtoolset-7' ] && [ "$(uname -m)" = "aarch64" ]; then export BINDGEN_EXTRA_CLANG_ARGS="-I$(clang --print-resource-dir)/include"; fi + - if [ -f /sbin/apk ] && [ $(uname -m) = "aarch64" ]; then ln -sf ../lib/llvm17/bin/clang /usr/bin/clang; fi + - export DD_PROFILING_OUTPUT_PPROF=/tmp/ - cd profiling - 'echo "nproc: $(nproc)"' @@ -86,8 +98,13 @@ image: registry.ddbuild.io/images/mirror/datadog/dd-trace-ci:php-${PHP_MAJOR_MINOR}_bookworm-8 variables: KUBERNETES_CPU_REQUEST: 5 + KUBERNETES_CPU_LIMIT: 5 KUBERNETES_MEMORY_REQUEST: 3Gi - KUBERNETES_MEMORY_LIMIT: 4Gi + KUBERNETES_MEMORY_LIMIT: 3Gi + KUBERNETES_HELPER_CPU_REQUEST: 1 + KUBERNETES_HELPER_CPU_LIMIT: 1 + KUBERNETES_HELPER_MEMORY_REQUEST: 2Gi + KUBERNETES_HELPER_MEMORY_LIMIT: 2Gi # CARGO_TARGET_DIR: /mnt/ramdisk/cargo # ramdisk?? libdir: /tmp/datadog-profiling parallel: @@ -105,11 +122,56 @@ image: registry.ddbuild.io/images/mirror/datadog/dd-trace-ci:php-8.5_bookworm-8 variables: KUBERNETES_CPU_REQUEST: 5 + KUBERNETES_CPU_LIMIT: 5 KUBERNETES_MEMORY_REQUEST: 3Gi - KUBERNETES_MEMORY_LIMIT: 4Gi + KUBERNETES_MEMORY_LIMIT: 3Gi + KUBERNETES_HELPER_CPU_REQUEST: 1 + KUBERNETES_HELPER_CPU_LIMIT: 1 + KUBERNETES_HELPER_MEMORY_REQUEST: 2Gi + KUBERNETES_HELPER_MEMORY_LIMIT: 2Gi # CARGO_TARGET_DIR: /mnt/ramdisk/cargo # ramdisk?? libdir: /tmp/datadog-profiling script: - switch-php nts # not compatible with debug - cd profiling - cargo test --all-features + +"PHP language tests": + stage: test + tags: [ "arch:${ARCH}" ] + image: registry.ddbuild.io/images/mirror/datadog/dd-trace-ci:php-${PHP_MAJOR_MINOR}_bookworm-8 + variables: + KUBERNETES_CPU_REQUEST: 5 + KUBERNETES_CPU_LIMIT: 5 + KUBERNETES_MEMORY_REQUEST: 3Gi + KUBERNETES_MEMORY_LIMIT: 3Gi + KUBERNETES_HELPER_CPU_REQUEST: 1 + KUBERNETES_HELPER_CPU_LIMIT: 1 + KUBERNETES_HELPER_MEMORY_REQUEST: 2Gi + KUBERNETES_HELPER_MEMORY_LIMIT: 2Gi + CARGO_TARGET_DIR: /tmp/cargo + libdir: /tmp/datadog-profiling + SKIP_ONLINE_TESTS: "1" + REPORT_EXIT_STATUS: "1" + DD_PROFILING_OUTPUT_PPROF: /tmp/ + XFAIL_LIST: dockerfiles/ci/xfail_tests/${PHP_MAJOR_MINOR}.list + parallel: + matrix: + - PHP_MAJOR_MINOR: *php_language_profiler_targets + ARCH: amd64 + FLAVOUR: [nts, zts] + script: + - unset DD_SERVICE; unset DD_ENV + - command -v switch-php && switch-php "${FLAVOUR}" + - cd profiling + - cargo build --profile profiler-release + - cd .. + - echo "extension=/tmp/cargo/profiler-release/libdatadog_php_profiling.so" > /opt/php/${FLAVOUR}/conf.d/profiling.ini + - php -v + # Fail loudly if the profiler did not load: otherwise the language tests + # would run profiler-less and pass, giving a false green. + - php -m | grep -qx 'datadog-profiling' || { echo 'ERROR: datadog-profiling extension is not loaded'; exit 1; } + - cat "${XFAIL_LIST}" profiling/tests/php-language-xfail.list > /tmp/profiler-php-language-xfail.list + - if php -r 'exit(PHP_VERSION_ID < 80400 ? 0 : 1);'; then cat profiling/tests/php-language-xfail-pre84.list >> /tmp/profiler-php-language-xfail.list; fi + - export XFAIL_LIST=/tmp/profiler-php-language-xfail.list + - .gitlab/run_php_language_tests.sh diff --git a/.gitlab/run_php_language_tests.sh b/.gitlab/run_php_language_tests.sh index 1b6bcd71ba..7261c7d0c0 100755 --- a/.gitlab/run_php_language_tests.sh +++ b/.gitlab/run_php_language_tests.sh @@ -4,8 +4,18 @@ set -eo pipefail # Helper to parse version strings for comparison function version { echo "$@" | awk -F. '{ printf("%d%03d%03d%03d\n", $1,$2,$3,$4); }'; } -sudo rm -f /opt/php/debug/conf.d/memcached.ini -sudo rm -f /opt/php/debug/conf.d/rdkafka.ini +# Remove extensions whose mere presence pollutes test output or output +# ordering: +# - rdkafka emits CONFWARN lines, breaking Zend/tests/instantiate_all_classes.phpt +# - parallel (ZTS-only) defers 'PHP Startup' diagnostics until after the +# script's first output, breaking ~56 deprecation/warning-ordering tests +# Derive the scan dir from the active PHP so this works for every build +# variant (debug for the tracer job, nts/zts for the profiler job) instead +# of hardcoding the debug path. +scan_dir="$(php -r 'echo PHP_CONFIG_FILE_SCAN_DIR;')" +sudo rm -f "${scan_dir}/memcached.ini" +sudo rm -f "${scan_dir}/rdkafka.ini" +sudo rm -f "${scan_dir}/parallel.ini" if [[ ! "${XFAIL_LIST:-none}" == "none" ]]; then cp "${XFAIL_LIST}" /usr/local/src/php/xfail_tests.list ( diff --git a/profiling/src/lib.rs b/profiling/src/lib.rs index 7adeae3ace..4afd0e8e81 100644 --- a/profiling/src/lib.rs +++ b/profiling/src/lib.rs @@ -247,8 +247,10 @@ extern "C" fn minit(_type: c_int, module_number: c_int) -> ZendResult { use std::sync::Mutex; let fd = loop { - // SAFETY: - let result = unsafe { libc::dup(libc::STDERR_FILENO) }; + // F_DUPFD_CLOEXEC (not plain dup) so the duplicate is not inherited + // by child processes spawned via proc_open()/exec(). See logging.rs. + // SAFETY: just a libc call. + let result = unsafe { libc::fcntl(libc::STDERR_FILENO, libc::F_DUPFD_CLOEXEC, 0) }; if result != -1 { break result; } else { diff --git a/profiling/src/logging.rs b/profiling/src/logging.rs index 3ea22c6e2e..1535ef02e2 100644 --- a/profiling/src/logging.rs +++ b/profiling/src/logging.rs @@ -12,7 +12,11 @@ pub fn log_init(level_filter: LevelFilter) { */ // Safety: this is safe, it's just "unsafe" because it's a call into C. - let fd = unsafe { libc::dup(libc::STDERR_FILENO) }; + // F_DUPFD_CLOEXEC (not plain dup) so the duplicate is not inherited by + // child processes spawned via proc_open()/exec(). A leaked stderr dup + // keeps pipes open and hangs. Observable in `run-tests.php` in PHP + // (e.g. ext/curl/tests/curl_setopt_ssl.phpt spawning `openssl s_server`). + let fd = unsafe { libc::fcntl(libc::STDERR_FILENO, libc::F_DUPFD_CLOEXEC, 0) }; if fd != -1 { // Safety: the fd is a valid and open file descriptor, and the File has sole ownership. let target = Box::new(unsafe { File::from_raw_fd(fd) }); diff --git a/profiling/src/profiling/thread_utils.rs b/profiling/src/profiling/thread_utils.rs index d52c62cec3..504a56eda8 100644 --- a/profiling/src/profiling/thread_utils.rs +++ b/profiling/src/profiling/thread_utils.rs @@ -20,25 +20,38 @@ where let result = std::thread::Builder::new() .name(name.to_string()) .spawn(move || { - /* Thread must not handle signals intended for PHP threads. - * See Zend/zend_signal.c for which signals it registers. + /* This helper thread has no valid PHP/TSRM context, so it must not + * run any PHP signal handler. The Zend Engine registers a fixed set + * of signals (see Zend/zend_signal.c), but a PHP script can install + * a handler for *any* signal via pcntl_signal() (e.g. SIGCHLD with + * pcntl_async_signals(true)). If such a signal is delivered to this + * thread, pcntl_signal_handler() dereferences PCNTL_G(spares) with + * no thread context and segfaults + * (see ext/pcntl/tests/waiting_on_sigchild_pcntl_wait.phpt). + * + * So block every signal here; async signals are then delivered to a + * PHP thread instead. The synchronous fault signals are left + * unblocked so a genuine fault on this thread is still reported + * (e.g. by the crashtracker) rather than masked. */ unsafe { let mut sigset_mem = MaybeUninit::uninit(); let sigset = sigset_mem.as_mut_ptr(); - libc::sigemptyset(sigset); - - const SIGNALS: [libc::c_int; 6] = [ - libc::SIGPROF, // todo: SIGALRM on __CYGWIN__/__PHASE__ - libc::SIGHUP, - libc::SIGINT, - libc::SIGTERM, - libc::SIGUSR1, - libc::SIGUSR2, + libc::sigfillset(sigset); + + // Hardware/synchronous fault signals: keep them deliverable to + // this thread. + const KEEP_UNBLOCKED: [libc::c_int; 6] = [ + libc::SIGSEGV, + libc::SIGBUS, + libc::SIGFPE, + libc::SIGILL, + libc::SIGABRT, + libc::SIGTRAP, ]; - for signal in SIGNALS { - libc::sigaddset(sigset, signal); + for signal in KEEP_UNBLOCKED { + libc::sigdelset(sigset, signal); } libc::pthread_sigmask(libc::SIG_BLOCK, sigset, std::ptr::null_mut()); } diff --git a/profiling/src/timeline.rs b/profiling/src/timeline.rs index 3b1719c3dd..46afc789e7 100644 --- a/profiling/src/timeline.rs +++ b/profiling/src/timeline.rs @@ -245,12 +245,23 @@ unsafe extern "C" fn ddog_php_prof_zend_error_observer( return; } + // The engine passes a NULL file for fatal errors with no active file + // location, e.g. an uncaught exception reported via zend_exception_error + // (`zend_error_va(..., file=NULL, ...)`). CStr::from_ptr(NULL) would + // strlen(NULL) and segfault, so guard against it. See + // Zend/tests/bug50005.phpt and Zend/tests/bug64821.3.phpt. #[cfg(zend_error_observer_80)] - let filename_str = unsafe { core::ffi::CStr::from_ptr(file) }; + let filename = if file.is_null() { + String::new() + } else { + unsafe { core::ffi::CStr::from_ptr(file) } + .to_string_lossy() + .into_owned() + }; #[cfg(not(zend_error_observer_80))] - let filename_str = unsafe { zai_str_from_zstr(file.as_mut()) }; - - let filename = filename_str.to_string_lossy().into_owned(); + let filename = unsafe { zai_str_from_zstr(file.as_mut()) } + .to_string_lossy() + .into_owned(); let now = SystemTime::now().duration_since(UNIX_EPOCH).unwrap(); if let Some(profiler) = Profiler::get() { diff --git a/profiling/tests/README.md b/profiling/tests/README.md new file mode 100644 index 0000000000..12a7f62f46 --- /dev/null +++ b/profiling/tests/README.md @@ -0,0 +1,43 @@ +# PHP language test xfail lists + +The profiler's "PHP language tests" CI job runs the upstream PHP test suite +with the profiling extension loaded, for every supported PHP version and for +both the `nts` and `zts` builds. These lists exclude tests that cannot pass in +that environment for reasons unrelated to profiler correctness. + +`.gitlab/run_php_language_tests.sh` **deletes** every `.phpt` named in +`XFAIL_LIST` before running, so listing a test means "do not run" it. + +| File | Applies to | +|------|------------| +| `php-language-xfail.list` | all profiler runs (`nts` + `zts`, all versions) | +| `php-language-xfail-pre84.list` | PHP < 8.4 (appended by the job via a version check) | + +## `php-language-xfail.list` (all versions) + +Fail with the profiler loaded regardless of version/flavour: + +- `ext/ffi/tests/list.phpt` — aborts (`free(): invalid size`); allocation + profiler conflicts with the test's FFI memory management. +- `Zend/tests/concat_003.phpt` — perf-sensitive (2 s budget); allocation + profiling overhead can exceed it on CI runners. +- `ext/session/tests/bug60634.phpt` (also under `user_session_module/` on some + versions; both paths are listed) — `die()` inside a session save handler. + Fails intermittently in the parallel run with "Cannot call session save + handler in a recursive manner". Not a profiler issue: it passes in isolation + with the profiler enabled; it's a concurrency/session-save-path collision in + the 64-worker run. Listed because it is flaky under parallelism. + +## `php-language-xfail-pre84.list` (PHP < 8.4) + +`php-language-xfail-pre84.list` contains opcache optimizer-output tests that +fail only with the profiler on PHP ≤ 8.3. On PHP < 8.4 the profiler overrides +`zend_execute_internal` (to handle VM interrupts while an internal function is +on the stack); on 8.4+ that hook is not installed (frameless calls), so these +pass. Internal calls therefore compile to `DO_FCALL` instead of `DO_ICALL`, +changing the optimized opcodes. + +- `opt/prop_types.phpt`, `opt/gh11170.phpt`, `opt/nullsafe_002.phpt` — cosmetic + opcode-dump differences (`DO_ICALL` → `DO_FCALL`). +- `bug66251.phpt` — same `< 8.4` condition: with the execute hook installed, + opcache folds a same-file runtime constant that should stay dynamic. diff --git a/profiling/tests/php-language-xfail-pre84.list b/profiling/tests/php-language-xfail-pre84.list new file mode 100644 index 0000000000..e46b04a589 --- /dev/null +++ b/profiling/tests/php-language-xfail-pre84.list @@ -0,0 +1,4 @@ +ext/opcache/tests/opt/prop_types.phpt +ext/opcache/tests/opt/gh11170.phpt +ext/opcache/tests/opt/nullsafe_002.phpt +ext/opcache/tests/bug66251.phpt diff --git a/profiling/tests/php-language-xfail.list b/profiling/tests/php-language-xfail.list new file mode 100644 index 0000000000..db2a6d4d6b --- /dev/null +++ b/profiling/tests/php-language-xfail.list @@ -0,0 +1,4 @@ +Zend/tests/concat_003.phpt +ext/ffi/tests/list.phpt +ext/session/tests/bug60634.phpt +ext/session/tests/user_session_module/bug60634.phpt diff --git a/profiling/tests/phpt/pcntl_async_signal_helper_thread.phpt b/profiling/tests/phpt/pcntl_async_signal_helper_thread.phpt new file mode 100644 index 0000000000..237da5a944 --- /dev/null +++ b/profiling/tests/phpt/pcntl_async_signal_helper_thread.phpt @@ -0,0 +1,77 @@ +--TEST-- +[profiling] async PHP signal handlers must not run on profiler helper threads +--DESCRIPTION-- +Regression test for a crash when a PHP script installs an async signal handler +(pcntl_async_signals(true) + pcntl_signal()) for a signal the Zend Engine does +not itself register, e.g. SIGCHLD. + +The profiler's helper threads (`ddprof_time`, `ddprof_upload`) only masked the +fixed set of signals the Zend Engine uses, so the kernel could deliver SIGCHLD +to one of them. pcntl's handler then ran on a thread with no valid PHP/TSRM +context and dereferenced the thread-local PCNTL_G, segfaulting. The fix is to +block every (non-fault) signal on the helper threads so async signals are +delivered to a PHP thread. + +Only crashes on ZTS, where PCNTL_G is thread-local. Observed on PHP 8.4 ZTS, +reproduced by ext/pcntl/tests/waiting_on_sigchild_pcntl_wait.phpt: + + Thread 2 "ddprof_time" received signal SIGSEGV, Segmentation fault. + #0 pcntl_signal_handler (signo=17, ...) at ext/pcntl/pcntl.c:1289 + 1289 struct php_pcntl_pending_signal *psig = PCNTL_G(spares); + #1 + #2 syscall () + #3 std::sys::pal::unix::futex::futex_wait () + #4 std::sys::sync::thread_parking::futex::Parker::park_timeout () + ... + #15 run () at profiling/src/profiling/uploader.rs:157 + #16 {closure#3} () at profiling/src/profiling/mod.rs:898 + #17 ... at profiling/src/profiling/thread_utils.rs:45 +--SKIPIF-- + +--ENV-- +DD_PROFILING_ENABLED=yes +DD_PROFILING_LOG_LEVEL=off +--FILE-- + 0) { + unset($processes[$pid]); + } +}); + +// Spawn bursts of short-lived children. They exit at roughly the same time, so +// a burst of SIGCHLD arrives while the main thread is idle in usleep(). If the +// profiler's helper threads do not block SIGCHLD the kernel may deliver it to +// one of them, where pcntl's handler runs without a PHP/TSRM context and +// crashes. Several rounds widen the (timing-dependent) race window. +for ($round = 0; $round < 4; $round++) { + for ($i = 0; $i < 8; $i++) { + $proc = proc_open('sleep 0.3', [], $pipes); + if ($proc !== false) { + $processes[proc_get_status($proc)['pid']] = $proc; + } + } + $iters = 50; + while (!empty($processes) && $iters-- > 0) { + usleep(100000); + } +} + +echo empty($processes) ? "OK\n" : "leftover children\n"; +?> +--EXPECT-- +OK diff --git a/profiling/tests/phpt/timeline_fatal_error_null_filename.phpt b/profiling/tests/phpt/timeline_fatal_error_null_filename.phpt new file mode 100644 index 0000000000..0aea9d2a78 --- /dev/null +++ b/profiling/tests/phpt/timeline_fatal_error_null_filename.phpt @@ -0,0 +1,46 @@ +--TEST-- +[profiling] fatal error with a NULL file location must not crash the timeline error observer +--DESCRIPTION-- +Regression test for a NULL pointer dereference in the timeline error observer. + +When an uncaught exception whose `file` property is NULL is reported, the +engine calls the error notification with file=NULL (location "Unknown"). The +profiler's timeline error observer used to call CStr::from_ptr(NULL) (PHP 8.0, +where the observer receives a raw C string), which segfaulted in strlen(). + +Reproduces the upstream Zend/tests/bug50005.phpt and bug64821.3.phpt crashes +that only triggered with the profiler loaded and timeline enabled. +--SKIPIF-- +file = null` +// throws a TypeError and can no longer produce a NULL-file fatal at all. +if (PHP_VERSION_ID < 80000 || PHP_VERSION_ID >= 80100) + echo "skip: NULL-file fatal error observer crash is specific to PHP 8.0\n"; +?> +--ENV-- +DD_PROFILING_ENABLED=yes +DD_PROFILING_TIMELINE_ENABLED=yes +DD_PROFILING_ALLOCATION_ENABLED=no +DD_PROFILING_EXCEPTION_ENABLED=no +DD_PROFILING_LOG_LEVEL=off +--FILE-- +file = null; + } +} + +throw new a; + +?> +--EXPECTF-- +Fatal error: Uncaught a in :%d +Stack trace: +#0 {main} + thrown in Unknown on line %d