From 9e850dc4fa7ce5deba9df28fb8477367bb3522d1 Mon Sep 17 00:00:00 2001 From: Jules Wiriath Date: Thu, 30 Apr 2026 17:49:02 +0200 Subject: [PATCH 1/4] feat: added regex-lite feature and changed code path to use libdd-common provided regex engine, which changes based on the feature chore: use shorthand and format fix: exclude datadog-ffe from this change. feat: remove env_filter and regex-automata docs: remove misleading doc --- Cargo.lock | 18 ++++++------ LICENSE-3rdparty.csv | 1 + bin_tests/Cargo.toml | 4 ++- bin_tests/tests/crashtracker_bin_test.rs | 2 +- builder/Cargo.toml | 3 +- builder/src/utils.rs | 4 +-- datadog-ffe/Cargo.toml | 3 +- datadog-ffe/src/rules_based/ufc/models.rs | 2 +- datadog-live-debugger-ffi/Cargo.toml | 1 + datadog-live-debugger/Cargo.toml | 5 ++-- datadog-live-debugger/src/expr_eval.rs | 2 +- datadog-live-debugger/src/redacted_names.rs | 6 ++-- datadog-remote-config/Cargo.toml | 2 +- .../src/config/agent_task.rs | 2 +- libdd-common-ffi/Cargo.toml | 1 + libdd-common/Cargo.toml | 3 ++ libdd-common/src/azure_app_services.rs | 2 +- .../src/entity_id/unix/container_id.rs | 2 +- libdd-common/src/entity_id/unix/mod.rs | 2 +- libdd-common/src/lib.rs | 1 + libdd-common/src/regex_engine.rs | 15 ++++++++++ libdd-crashtracker-ffi/Cargo.toml | 1 + libdd-data-pipeline-ffi/Cargo.toml | 1 + libdd-data-pipeline/Cargo.toml | 3 +- libdd-data-pipeline/src/telemetry/mod.rs | 3 ++ libdd-ddsketch-ffi/Cargo.toml | 1 + libdd-library-config-ffi/Cargo.toml | 1 + libdd-log-ffi/Cargo.toml | 1 + libdd-log/Cargo.toml | 2 +- libdd-log/src/logger.rs | 28 +++++++++++-------- libdd-profiling-ffi/Cargo.toml | 1 + libdd-shared-runtime-ffi/Cargo.toml | 1 + libdd-shared-runtime/Cargo.toml | 3 ++ libdd-telemetry-ffi/Cargo.toml | 1 + libdd-trace-obfuscation/Cargo.toml | 2 +- libdd-trace-obfuscation/src/ip_address.rs | 2 +- libdd-trace-obfuscation/src/replacer.rs | 19 +++++++------ tools/Cargo.toml | 5 +++- tools/src/lib.rs | 2 +- 39 files changed, 105 insertions(+), 53 deletions(-) create mode 100644 libdd-common/src/regex_engine.rs diff --git a/Cargo.lock b/Cargo.lock index 2d319c5084..f3bd2da413 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -473,7 +473,6 @@ dependencies = [ "nix 0.29.0", "once_cell", "os_info", - "regex", "serde_json", "serial_test", "strum", @@ -710,8 +709,8 @@ dependencies = [ "anyhow", "build_common", "cmake", + "libdd-common", "pico-args", - "regex", "serde", "tar", "toml", @@ -1359,10 +1358,10 @@ dependencies = [ "derive_more", "env_logger", "faststr", + "libdd-common", "log", "md5", "pyo3", - "regex", "semver", "serde", "serde-bool", @@ -1438,8 +1437,6 @@ dependencies = [ "libdd-common", "libdd-data-pipeline", "percent-encoding", - "regex", - "regex-automata", "serde", "serde_json", "smallvec", @@ -1494,7 +1491,6 @@ dependencies = [ "libdd-common", "libdd-trace-protobuf", "manual_future", - "regex", "serde", "serde_json", "serde_with", @@ -2871,6 +2867,7 @@ dependencies = [ "pin-project", "rand 0.8.5", "regex", + "regex-lite", "reqwest", "rustls", "rustls-native-certs", @@ -3314,7 +3311,6 @@ dependencies = [ "libdd-trace-utils", "log", "percent-encoding", - "regex", "serde", "serde_json", ] @@ -4635,6 +4631,12 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "regex-lite" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973" + [[package]] name = "regex-syntax" version = "0.8.5" @@ -5945,8 +5947,8 @@ dependencies = [ "cargo_metadata", "clap", "colored", + "libdd-common", "quick-xml", - "regex", "toml", "wait-timeout", ] diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv index 2a3a47edfa..035e4ac7d9 100644 --- a/LICENSE-3rdparty.csv +++ b/LICENSE-3rdparty.csv @@ -335,6 +335,7 @@ ref-cast,https://github.com/dtolnay/ref-cast,MIT OR Apache-2.0,David Tolnay regex,https://github.com/rust-lang/regex,MIT OR Apache-2.0,"The Rust Project Developers, Andrew Gallant " regex-automata,https://github.com/rust-lang/regex/tree/master/regex-automata,MIT OR Apache-2.0,"The Rust Project Developers, Andrew Gallant " +regex-lite,https://github.com/rust-lang/regex,MIT OR Apache-2.0,"The Rust Project Developers, Andrew Gallant " regex-syntax,https://github.com/rust-lang/regex/tree/master/regex-syntax,MIT OR Apache-2.0,"The Rust Project Developers, Andrew Gallant " reqwest,https://github.com/seanmonstar/reqwest,MIT OR Apache-2.0,Sean McArthur resolv-conf,https://github.com/hickory-dns/resolv-conf,MIT OR Apache-2.0,The resolv-conf Authors diff --git a/bin_tests/Cargo.toml b/bin_tests/Cargo.toml index 43e40161e4..65437705a7 100644 --- a/bin_tests/Cargo.toml +++ b/bin_tests/Cargo.toml @@ -25,11 +25,13 @@ errno = "0.3" nix = { version = "0.29", features = ["signal", "socket"] } hex = "0.4" os_info = "3.14.0" -regex = "1.0" [dev-dependencies] serial_test = "3.2" +[features] +regex-lite = ["libdd-common/regex-lite"] + [lib] bench = false diff --git a/bin_tests/tests/crashtracker_bin_test.rs b/bin_tests/tests/crashtracker_bin_test.rs index f6ed81a32d..503da63f14 100644 --- a/bin_tests/tests/crashtracker_bin_test.rs +++ b/bin_tests/tests/crashtracker_bin_test.rs @@ -738,7 +738,7 @@ fn test_panic_hook_mode(mode: &str, expected_category: &str, expected_panic_mess // Check for location format (file:line:column) - always present in Debug builds // Location should end with pattern like " (path/file.rs:123:45)" - let location_regex = regex::Regex::new(r" \(.+?:\d+:\d+\)$").unwrap(); + let location_regex = libdd_common::regex_engine::Regex::new(r" \(.+?:\d+:\d+\)$").unwrap(); assert!( location_regex.is_match(message), "Expected panic message to end with location ' (file:line:column)', got: {}", diff --git a/builder/Cargo.toml b/builder/Cargo.toml index bbb8c1694d..e8efb90671 100644 --- a/builder/Cargo.toml +++ b/builder/Cargo.toml @@ -29,6 +29,7 @@ library-config = [] log = [] ddsketch = [] ffe = [] +regex-lite = ["libdd-common/regex-lite"] [lib] bench = false @@ -44,7 +45,7 @@ tar = "0.4.45" tools = { path = "../tools" } toml = "0.8.19" serde = "1.0.209" -regex = "1.10" +libdd-common = { path = "../libdd-common", default-features = false } [[bin]] name = "release" diff --git a/builder/src/utils.rs b/builder/src/utils.rs index 804003a747..04f863acdf 100644 --- a/builder/src/utils.rs +++ b/builder/src/utils.rs @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use anyhow::{anyhow, Result}; -use regex::Regex; +use libdd_common::regex_engine::{Captures, Regex}; use std::fs::{self, OpenOptions}; use std::io::Write; use std::path::{Path, PathBuf}; @@ -37,7 +37,7 @@ pub(crate) fn adjust_extern_symbols( let re = Regex::new(r#"(?m)^(\s*)extern\s+(.+;)$"#).unwrap(); // Replace function using captures - let new_content = re.replace_all(&content, |caps: ®ex::Captures| { + let new_content = re.replace_all(&content, |caps: &Captures| { let full_match = caps.get(0).unwrap().as_str(); let indent = &caps[1]; let declaration = &caps[2]; diff --git a/datadog-ffe/Cargo.toml b/datadog-ffe/Cargo.toml index cdcf67ec46..ecc1dff5a5 100644 --- a/datadog-ffe/Cargo.toml +++ b/datadog-ffe/Cargo.toml @@ -18,7 +18,7 @@ chrono = { version = "0.4.38", default-features = false, features = ["now", "ser derive_more = { version = "2.0.0", default-features = false, features = ["from", "into"] } log = { version = "0.4.21", default-features = false, features = ["kv", "kv_serde"] } md5 = { version = "0.7.0", default-features = false } -regex = "1.10.4" +libdd-common = { version = "4.0.0", path = "../libdd-common", default-features = false } semver = "1.0" serde-bool = { version = "0.1.3", default-features = false } serde_with = { version = "3.11.0", default-features = false, features = ["base64", "hex", "macros"] } @@ -37,3 +37,4 @@ path = "benches/eval.rs" [features] pyo3 = ["dep:pyo3"] +regex-lite = ["libdd-common/regex-lite"] diff --git a/datadog-ffe/src/rules_based/ufc/models.rs b/datadog-ffe/src/rules_based/ufc/models.rs index 7a826c06d6..064d3dc837 100644 --- a/datadog-ffe/src/rules_based/ufc/models.rs +++ b/datadog-ffe/src/rules_based/ufc/models.rs @@ -3,7 +3,7 @@ use std::{collections::HashMap, sync::Arc}; -use regex::Regex; +use libdd_common::regex_engine::Regex; use serde::{Deserialize, Serialize}; use crate::rules_based::{EvaluationError, FlagType, Str, Timestamp}; diff --git a/datadog-live-debugger-ffi/Cargo.toml b/datadog-live-debugger-ffi/Cargo.toml index 466e87a0bd..596187b800 100644 --- a/datadog-live-debugger-ffi/Cargo.toml +++ b/datadog-live-debugger-ffi/Cargo.toml @@ -25,6 +25,7 @@ log = "0.4.21" [features] default = ["cbindgen"] cbindgen = ["build_common/cbindgen", "libdd-common-ffi/cbindgen"] +regex-lite = ["datadog-live-debugger/regex-lite"] [build-dependencies] build_common = { path = "../build-common" } diff --git a/datadog-live-debugger/Cargo.toml b/datadog-live-debugger/Cargo.toml index 45137559fb..c305dc179d 100644 --- a/datadog-live-debugger/Cargo.toml +++ b/datadog-live-debugger/Cargo.toml @@ -13,16 +13,17 @@ http-body-util = "0.1" "http" = "1" bytes = "1.11.1" -regex = "1.9.3" percent-encoding = "2.1" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" sys-info = { version = "0.9.0" } uuid = { version = "1.0", features = ["v4"] } -regex-automata = "0.4.5" smallvec = "1.13.2" constcat = "0.4.1" tokio = "1.36.0" +[features] +regex-lite = ["libdd-common/regex-lite"] + [lib] bench = false diff --git a/datadog-live-debugger/src/expr_eval.rs b/datadog-live-debugger/src/expr_eval.rs index 9cd65b2fb7..1372fc5a0c 100644 --- a/datadog-live-debugger/src/expr_eval.rs +++ b/datadog-live-debugger/src/expr_eval.rs @@ -6,7 +6,7 @@ use crate::expr_defs::{ BinaryComparison, CollectionMatch, CollectionSource, Condition, DslPart, NumberSource, Reference, StringComparison, StringSource, Value, }; -use regex::Regex; +use libdd_common::regex_engine::Regex; use std::borrow::Cow; use std::cmp::min; use std::fmt::{Display, Formatter}; diff --git a/datadog-live-debugger/src/redacted_names.rs b/datadog-live-debugger/src/redacted_names.rs index 1c99e5fed3..ab53c53203 100644 --- a/datadog-live-debugger/src/redacted_names.rs +++ b/datadog-live-debugger/src/redacted_names.rs @@ -3,7 +3,7 @@ #![allow(invalid_reference_casting)] -use regex_automata::dfa::regex::Regex; +use libdd_common::regex_engine::Regex; use std::collections::HashSet; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::LazyLock; @@ -164,7 +164,7 @@ pub unsafe fn add_redacted_type>(name: I) { regex_str.push('|') } let name = String::from_utf8_lossy(name); - regex_str.push_str(regex::escape(&name[..name.len() - 1]).as_str()); + regex_str.push_str(libdd_common::regex_engine::escape(&name[..name.len() - 1]).as_str()); regex_str.push_str(".*"); } else { let added_types = &mut (*(&*ADDED_REDACTED_TYPES as *const Vec>).cast_mut()); @@ -211,7 +211,7 @@ pub fn is_redacted_type>(name: I) -> bool { if REDACTED_TYPES.contains(name) { true } else if !REDACTED_WILDCARD_TYPES_PATTERN.is_empty() { - REDACTED_TYPES_REGEX.is_match(name) + std::str::from_utf8(name).is_ok_and(|s| REDACTED_TYPES_REGEX.is_match(s)) } else { false } diff --git a/datadog-remote-config/Cargo.toml b/datadog-remote-config/Cargo.toml index 7279f74779..e4102603cb 100644 --- a/datadog-remote-config/Cargo.toml +++ b/datadog-remote-config/Cargo.toml @@ -22,6 +22,7 @@ client = [ "tracing" ] live-debugger = ["datadog-live-debugger"] +regex-lite = ["libdd-common/regex-lite"] ffe = ["datadog-ffe"] test = ["hyper/server", "hyper-util"] @@ -46,7 +47,6 @@ tracing = { version = "0.1", default-features = false, optional = true } serde = "1.0" serde_json = { version = "1.0", features = ["raw_value"] } serde_with = "3" -regex = "1.0" # Test feature hyper-util = { workspace = true, features = ["service"], optional = true } diff --git a/datadog-remote-config/src/config/agent_task.rs b/datadog-remote-config/src/config/agent_task.rs index 790ee567c5..d83df9d4f7 100644 --- a/datadog-remote-config/src/config/agent_task.rs +++ b/datadog-remote-config/src/config/agent_task.rs @@ -6,7 +6,7 @@ use serde::Deserialize; #[cfg(feature = "test")] use serde::Serialize; -use regex::Regex; +use libdd_common::regex_engine::Regex; use serde::de::{self, Deserializer}; fn deserialize_case_id<'de, D>(deserializer: D) -> Result diff --git a/libdd-common-ffi/Cargo.toml b/libdd-common-ffi/Cargo.toml index 0f1443f42d..b7900a339d 100644 --- a/libdd-common-ffi/Cargo.toml +++ b/libdd-common-ffi/Cargo.toml @@ -15,6 +15,7 @@ bench =false [features] default = ["cbindgen"] cbindgen = ["build_common/cbindgen"] +regex-lite = ["libdd-common/regex-lite"] [build-dependencies] build_common = { path = "../build-common" } diff --git a/libdd-common/Cargo.toml b/libdd-common/Cargo.toml index f4a978f24f..616127c465 100644 --- a/libdd-common/Cargo.toml +++ b/libdd-common/Cargo.toml @@ -29,6 +29,7 @@ bytes = { version = "1.11.1" } pin-project = "1" rand = { version = "0.8", optional = true } regex = "1.5" +regex-lite = { version = "0.1", optional = true } # Use hickory-dns instead of the default system DNS resolver to avoid fork safety issues. # The default resolver can hold locks or other global state that can cause deadlocks # or corruption when the process forks (e.g., in PHP-FPM or other forking environments). @@ -98,6 +99,8 @@ use_webpki_roots = ["hyper-rustls/webpki-roots"] # Enable this feature to enable stubbing of cgroup # php directly import this crate and uses functions gated by this feature for their test cgroup_testing = [] +# Use regex-lite instead of regex for smaller binary size +regex-lite = ["dep:regex-lite"] # FIPS mode uses the FIPS-compliant cryptographic provider (Unix only) fips = ["tls-core", "hyper-rustls/fips"] # Enable reqwest client builder support with file dump debugging diff --git a/libdd-common/src/azure_app_services.rs b/libdd-common/src/azure_app_services.rs index 6fbd9ee180..e2b4d83bad 100644 --- a/libdd-common/src/azure_app_services.rs +++ b/libdd-common/src/azure_app_services.rs @@ -1,7 +1,7 @@ // Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -use regex::Regex; +use crate::regex_engine::Regex; #[cfg(target_os = "linux")] use std::collections::HashMap; use std::sync::LazyLock; diff --git a/libdd-common/src/entity_id/unix/container_id.rs b/libdd-common/src/entity_id/unix/container_id.rs index cf0fee1807..dc9dec85b2 100644 --- a/libdd-common/src/entity_id/unix/container_id.rs +++ b/libdd-common/src/entity_id/unix/container_id.rs @@ -3,7 +3,7 @@ //! This module provides functions to parse the container id from the cgroup file use super::CgroupFileParsingError; -use regex::Regex; +use crate::regex_engine::Regex; use std::fs::File; use std::io::{BufRead, BufReader}; use std::path::Path; diff --git a/libdd-common/src/entity_id/unix/mod.rs b/libdd-common/src/entity_id/unix/mod.rs index 2d77377c93..826f2e9604 100644 --- a/libdd-common/src/entity_id/unix/mod.rs +++ b/libdd-common/src/entity_id/unix/mod.rs @@ -104,7 +104,7 @@ pub static ENTITY_ID: LazyLock> = LazyLock::new(|| { #[cfg(test)] mod tests { use super::*; - use regex::Regex; + use crate::regex_engine::Regex; static IN_REGEX: LazyLock = LazyLock::new(|| Regex::new(r"in-\d+").unwrap()); static CI_REGEX: LazyLock = LazyLock::new(|| { diff --git a/libdd-common/src/lib.rs b/libdd-common/src/lib.rs index 2578459260..7e1e453014 100644 --- a/libdd-common/src/lib.rs +++ b/libdd-common/src/lib.rs @@ -21,6 +21,7 @@ pub mod connector; #[cfg(feature = "reqwest")] pub mod dump_server; pub mod entity_id; +pub mod regex_engine; #[macro_use] pub mod cstr; #[cfg(feature = "bench-utils")] diff --git a/libdd-common/src/regex_engine.rs b/libdd-common/src/regex_engine.rs new file mode 100644 index 0000000000..fdb57e9296 --- /dev/null +++ b/libdd-common/src/regex_engine.rs @@ -0,0 +1,15 @@ +// Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +//! Workspace-wide regex engine re-exports. +//! +//! By default this module re-exports from the full [`regex`] crate. +//! Enable the **`regex-lite`** feature to switch to [`regex_lite`] instead, +//! which trades advanced features (Unicode classes, look-around, etc.) for +//! smaller binary size and faster compile times. + +#[cfg(feature = "regex-lite")] +pub use regex_lite::{escape, Captures, Regex, RegexBuilder, Replacer}; + +#[cfg(not(feature = "regex-lite"))] +pub use regex::{escape, Captures, Regex, RegexBuilder, Replacer}; diff --git a/libdd-crashtracker-ffi/Cargo.toml b/libdd-crashtracker-ffi/Cargo.toml index 2af19d68b1..47d07a28e1 100644 --- a/libdd-crashtracker-ffi/Cargo.toml +++ b/libdd-crashtracker-ffi/Cargo.toml @@ -22,6 +22,7 @@ required-features = ["collector_windows"] [features] default = ["cbindgen", "collector", "demangler", "receiver"] cbindgen = ["build_common/cbindgen"] +regex-lite = ["libdd-common-ffi/regex-lite"] # Enables the in-process collection of crash-info collector = [] collector_windows = [] diff --git a/libdd-data-pipeline-ffi/Cargo.toml b/libdd-data-pipeline-ffi/Cargo.toml index aea4d46937..85d3323a34 100644 --- a/libdd-data-pipeline-ffi/Cargo.toml +++ b/libdd-data-pipeline-ffi/Cargo.toml @@ -19,6 +19,7 @@ bench = false default = ["cbindgen", "catch_panic"] catch_panic = [] cbindgen = ["build_common/cbindgen", "libdd-common-ffi/cbindgen"] +regex-lite = ["libdd-data-pipeline/regex-lite"] [build-dependencies] build_common = { path = "../build-common" } diff --git a/libdd-data-pipeline/Cargo.toml b/libdd-data-pipeline/Cargo.toml index 5a2b62ea4f..ddef1e5a78 100644 --- a/libdd-data-pipeline/Cargo.toml +++ b/libdd-data-pipeline/Cargo.toml @@ -64,6 +64,7 @@ path = "benches/trace_buffer.rs" libdd-capabilities-impl = { version = "1.0.0", path = "../libdd-capabilities-impl" } libdd-log = { path = "../libdd-log" } libdd-shared-runtime = { version = "0.1.0", path = "../libdd-shared-runtime" } +regex = "1.5" clap = { version = "4.0", features = ["derive"] } criterion = "0.5.1" libdd-trace-utils = { path = "../libdd-trace-utils", features = [ @@ -71,7 +72,6 @@ libdd-trace-utils = { path = "../libdd-trace-utils", features = [ ] } httpmock = "0.8.0-alpha.1" rand = "0.8.5" -regex = "1.5" tempfile = "3.3.0" tokio = { version = "1.23", features = [ "rt", @@ -90,3 +90,4 @@ https = [ "libdd-dogstatsd-client/https", ] test-utils = [] +regex-lite = ["libdd-common/regex-lite"] diff --git a/libdd-data-pipeline/src/telemetry/mod.rs b/libdd-data-pipeline/src/telemetry/mod.rs index 97bdba6407..1676665614 100644 --- a/libdd-data-pipeline/src/telemetry/mod.rs +++ b/libdd-data-pipeline/src/telemetry/mod.rs @@ -330,6 +330,9 @@ mod tests { use libdd_capabilities::HttpError; use libdd_shared_runtime::{SharedRuntime, WorkerHandle}; use libdd_trace_utils::test_utils::poll_for_mock_hits; + // Use `regex::Regex` directly here because `httpmock`'s `body_matches` + // requires `Into`, which is only implemented for + // `regex::Regex`, not `regex_lite::Regex`. use regex::Regex; use tokio::time::sleep; diff --git a/libdd-ddsketch-ffi/Cargo.toml b/libdd-ddsketch-ffi/Cargo.toml index 85b09772d2..d778c1c896 100644 --- a/libdd-ddsketch-ffi/Cargo.toml +++ b/libdd-ddsketch-ffi/Cargo.toml @@ -16,6 +16,7 @@ bench = false [features] default = ["cbindgen"] cbindgen = ["build_common/cbindgen", "libdd-common-ffi/cbindgen"] +regex-lite = ["libdd-common-ffi/regex-lite"] [build-dependencies] build_common = { path = "../build-common" } diff --git a/libdd-library-config-ffi/Cargo.toml b/libdd-library-config-ffi/Cargo.toml index 25801392b5..a8b7bd26e0 100644 --- a/libdd-library-config-ffi/Cargo.toml +++ b/libdd-library-config-ffi/Cargo.toml @@ -22,6 +22,7 @@ constcat = "0.4.1" default = ["cbindgen", "catch_panic"] cbindgen = ["build_common/cbindgen", "libdd-common-ffi/cbindgen"] catch_panic = [] +regex-lite = ["libdd-common-ffi/regex-lite"] [build-dependencies] build_common = { path = "../build-common" } diff --git a/libdd-log-ffi/Cargo.toml b/libdd-log-ffi/Cargo.toml index f7fad1d5d3..a4d1597322 100644 --- a/libdd-log-ffi/Cargo.toml +++ b/libdd-log-ffi/Cargo.toml @@ -14,6 +14,7 @@ bench = false default = ["cbindgen", "expanded_builder_macros"] cbindgen = ["build_common/cbindgen", "libdd-common-ffi/cbindgen", "expanded_builder_macros"] expanded_builder_macros = [] +regex-lite = ["libdd-common-ffi/regex-lite"] [build-dependencies] build_common = { path = "../build-common" } diff --git a/libdd-log/Cargo.toml b/libdd-log/Cargo.toml index b858be72bb..2f3220ce0c 100644 --- a/libdd-log/Cargo.toml +++ b/libdd-log/Cargo.toml @@ -13,7 +13,7 @@ bench = false [dependencies] tracing = { version = "0.1", default-features = false, features = ["std"] } -tracing-subscriber = { version = "0.3.22", default-features = false, features = ["json", "env-filter"] } +tracing-subscriber = { version = "0.3.22", default-features = false, features = ["json"] } tracing-appender = "0.2.3" chrono = { version = "0.4.38", default-features = false, features = ["clock", "std"] } diff --git a/libdd-log/src/logger.rs b/libdd-log/src/logger.rs index 2a893cdc16..d2c18531fb 100644 --- a/libdd-log/src/logger.rs +++ b/libdd-log/src/logger.rs @@ -4,10 +4,10 @@ use crate::writers::{FileWriter, StdWriter}; use std::sync::{LazyLock, Mutex}; use tracing::subscriber::DefaultGuard; -use tracing_subscriber::filter::LevelFilter; +use tracing_subscriber::filter::{LevelFilter, Targets}; use tracing_subscriber::layer::{Layered, SubscriberExt}; use tracing_subscriber::reload::Handle; -use tracing_subscriber::{fmt, reload, EnvFilter, Layer, Registry}; +use tracing_subscriber::{fmt, reload, Layer, Registry}; pub type Error = String; @@ -73,11 +73,11 @@ struct Logger { /// complexity warning. #[allow(clippy::type_complexity)] layer_handle: Handle< - Vec, Registry>> + Send + Sync>>, - Layered, Registry>, + Vec, Registry>> + Send + Sync>>, + Layered, Registry>, >, /// Handle for modifying the log filter at runtime. - filter_handle: Handle, + filter_handle: Handle, /// Guard is for local subscriber which is not used in the global logger. #[allow(dead_code)] _guard: Option, @@ -180,8 +180,10 @@ impl Logger { /// Set the log level for the logger. fn set_log_level(&self, log_level: LogEventLevel) -> Result<(), Error> { let level_filter = LevelFilter::from(log_level); - let new_filter = EnvFilter::try_from_default_env() - .unwrap_or_else(|_| EnvFilter::new(level_filter.to_string().to_lowercase())); + let new_filter = std::env::var("RUST_LOG") + .ok() + .and_then(|s| s.parse::().ok()) + .unwrap_or_else(|| Targets::new().with_default(level_filter)); self.filter_handle .modify(|filter| { @@ -194,9 +196,11 @@ impl Logger { } /// Create environment filter with default to INFO level. -fn env_filter() -> EnvFilter { - EnvFilter::try_from_default_env() - .unwrap_or_else(|_| EnvFilter::new(LevelFilter::INFO.to_string().to_lowercase())) +fn env_filter() -> Targets { + std::env::var("RUST_LOG") + .ok() + .and_then(|s| s.parse::().ok()) + .unwrap_or_else(|| Targets::new().with_default(LevelFilter::INFO)) } /// Create standard output layer. @@ -204,7 +208,7 @@ fn env_filter() -> EnvFilter { fn std_layer( config: &StdConfig, ) -> Result< - Box, Registry>> + Send + Sync + 'static>, + Box, Registry>> + Send + Sync + 'static>, Error, > { let writer = StdWriter::new(config.target); @@ -224,7 +228,7 @@ fn std_layer( fn file_layer( config: &FileConfig, ) -> Result< - Box, Registry>> + Send + Sync + 'static>, + Box, Registry>> + Send + Sync + 'static>, Error, > { let writer = FileWriter::new(config) diff --git a/libdd-profiling-ffi/Cargo.toml b/libdd-profiling-ffi/Cargo.toml index cfa1031ebe..bc2dccf5f1 100644 --- a/libdd-profiling-ffi/Cargo.toml +++ b/libdd-profiling-ffi/Cargo.toml @@ -33,6 +33,7 @@ datadog-library-config-ffi = ["dep:libdd-library-config-ffi"] ddcommon-ffi = ["dep:libdd-common-ffi"] ddsketch-ffi = ["dep:libdd-ddsketch-ffi"] datadog-ffe-ffi = ["dep:datadog-ffe-ffi"] +regex-lite = ["libdd-common/regex-lite"] [build-dependencies] build_common = { path = "../build-common" } diff --git a/libdd-shared-runtime-ffi/Cargo.toml b/libdd-shared-runtime-ffi/Cargo.toml index 88234c507c..41de29bd73 100644 --- a/libdd-shared-runtime-ffi/Cargo.toml +++ b/libdd-shared-runtime-ffi/Cargo.toml @@ -17,6 +17,7 @@ bench = false default = ["cbindgen", "catch_panic"] catch_panic = [] cbindgen = ["build_common/cbindgen"] +regex-lite = ["libdd-shared-runtime/regex-lite"] [build-dependencies] build_common = { path = "../build-common" } diff --git a/libdd-shared-runtime/Cargo.toml b/libdd-shared-runtime/Cargo.toml index 7c25e8dadd..390958c9e7 100644 --- a/libdd-shared-runtime/Cargo.toml +++ b/libdd-shared-runtime/Cargo.toml @@ -24,5 +24,8 @@ tracing = { version = "0.1", default-features = false } libdd-capabilities = { path = "../libdd-capabilities", version = "1.0.0" } libdd-common = { version = "4.0.0", path = "../libdd-common", default-features = false } +[features] +regex-lite = ["libdd-common/regex-lite"] + [target.'cfg(not(target_arch = "wasm32"))'.dependencies] tokio = { version = "1.23", features = ["rt-multi-thread"] } diff --git a/libdd-telemetry-ffi/Cargo.toml b/libdd-telemetry-ffi/Cargo.toml index 055ff1db5d..5824ed71d5 100644 --- a/libdd-telemetry-ffi/Cargo.toml +++ b/libdd-telemetry-ffi/Cargo.toml @@ -17,6 +17,7 @@ bench = false default = ["cbindgen", "expanded_builder_macros"] cbindgen = ["build_common/cbindgen", "libdd-common-ffi/cbindgen", "expanded_builder_macros"] expanded_builder_macros = [] +regex-lite = ["libdd-common-ffi/regex-lite"] [build-dependencies] build_common = { path = "../build-common" } diff --git a/libdd-trace-obfuscation/Cargo.toml b/libdd-trace-obfuscation/Cargo.toml index d6f4a59c1c..0f61579869 100644 --- a/libdd-trace-obfuscation/Cargo.toml +++ b/libdd-trace-obfuscation/Cargo.toml @@ -11,7 +11,6 @@ authors.workspace = true [dependencies] anyhow = "1.0" -regex = "1" serde = { version = "1.0.145", features = ["derive"] } serde_json = { version = "1.0", features = ["preserve_order"] } percent-encoding = "2.1" @@ -25,6 +24,7 @@ libdd-common = { version = "4.0.0", path = "../libdd-common", default-features = default = ["https"] https = ["libdd-common/https", "libdd-trace-utils/https"] fips = ["libdd-common/fips", "libdd-trace-utils/fips"] +regex-lite = ["libdd-common/regex-lite"] [dev-dependencies] duplicate = "0.4.1" diff --git a/libdd-trace-obfuscation/src/ip_address.rs b/libdd-trace-obfuscation/src/ip_address.rs index 4072ba8088..a204b52ce9 100644 --- a/libdd-trace-obfuscation/src/ip_address.rs +++ b/libdd-trace-obfuscation/src/ip_address.rs @@ -1,7 +1,7 @@ // Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -use regex::Regex; +use libdd_common::regex_engine::Regex; use std::{borrow::Cow, collections::HashSet, net::Ipv6Addr, sync::LazyLock}; const ALLOWED_IP_ADDRESSES: [&str; 5] = [ diff --git a/libdd-trace-obfuscation/src/replacer.rs b/libdd-trace-obfuscation/src/replacer.rs index c00e64e485..dd988a841c 100644 --- a/libdd-trace-obfuscation/src/replacer.rs +++ b/libdd-trace-obfuscation/src/replacer.rs @@ -1,8 +1,8 @@ // Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 +use libdd_common::regex_engine::{Regex, Replacer}; use libdd_trace_protobuf::pb; -use regex::Regex; use serde::{ser::SerializeStruct, Deserialize, Deserializer, Serialize}; #[derive(Deserialize)] @@ -27,7 +27,7 @@ pub struct ReplaceRule { pub name: String, // re holds the regex pattern for matching. - pub re: regex::Regex, + pub re: Regex, // repl specifies the replacement string to be used when Pattern matches. pub repl: String, @@ -40,7 +40,7 @@ impl<'de> Deserialize<'de> for ReplaceRule { fn deserialize>(deserializer: D) -> Result { let raw = RawReplaceRule::deserialize(deserializer)?; let re = Regex::new(&raw.pattern).map_err(serde::de::Error::custom)?; - let no_expansion = regex::Replacer::no_expansion(&mut raw.repl.as_str()).is_some(); + let no_expansion = Replacer::no_expansion(&mut raw.repl.as_str()).is_some(); Ok(ReplaceRule { name: raw.name, re, @@ -125,7 +125,7 @@ pub fn parse_rules_from_string( anyhow::bail!("Obfuscator Error: Error while parsing rule: {}", err) } }; - let no_expansion = regex::Replacer::no_expansion(&mut &raw_rule.repl).is_some(); + let no_expansion = Replacer::no_expansion(&mut &raw_rule.repl).is_some(); vec.push(ReplaceRule { name: raw_rule.name, re: compiled_regex, @@ -183,7 +183,7 @@ fn replace_all( #[allow(clippy::unwrap_used)] let m = cap.get(0).unwrap(); scratch_space.push_str(&haystack[last_match..m.start()]); - regex::Replacer::replace_append(&mut replace, &cap, scratch_space); + Replacer::replace_append(&mut replace, &cap, scratch_space); last_match = m.end(); } scratch_space.push_str(&haystack[last_match..]); @@ -195,6 +195,7 @@ fn replace_all( #[cfg(test)] mod tests { + use super::Regex; use crate::replacer; use duplicate::duplicate_item; use libdd_trace_protobuf::pb; @@ -315,13 +316,13 @@ mod tests { fn test_replace_rule_eq() { let rule1 = replacer::ReplaceRule { name: "http.url".to_string(), - re: regex::Regex::new("(token/)([^/]*)").unwrap(), + re: Regex::new("(token/)([^/]*)").unwrap(), repl: "${1}?".to_string(), no_expansion: false, }; let rule2 = replacer::ReplaceRule { name: "http.url".to_string(), - re: regex::Regex::new("(token/)([^/]*)").unwrap(), + re: Regex::new("(token/)([^/]*)").unwrap(), repl: "${1}?".to_string(), no_expansion: false, }; @@ -333,13 +334,13 @@ mod tests { fn test_replace_rule_neq() { let rule1 = replacer::ReplaceRule { name: "http.url".to_string(), - re: regex::Regex::new("(token/)([^/]*)").unwrap(), + re: Regex::new("(token/)([^/]*)").unwrap(), repl: "${1}?".to_string(), no_expansion: false, }; let rule2 = replacer::ReplaceRule { name: "http.url".to_string(), - re: regex::Regex::new("(broken/)([^/]*)").unwrap(), + re: Regex::new("(broken/)([^/]*)").unwrap(), repl: "${1}?".to_string(), no_expansion: false, }; diff --git a/tools/Cargo.toml b/tools/Cargo.toml index 2b6bddd154..ed731779f0 100644 --- a/tools/Cargo.toml +++ b/tools/Cargo.toml @@ -20,10 +20,13 @@ cargo_metadata = "0.18" clap = { version = "4.0", features = ["derive"] } colored = "2" quick-xml = "0.37" -regex = "1" +libdd-common = { path = "../libdd-common", default-features = false } toml = "0.8" wait-timeout = "0.2" +[features] +regex-lite = ["libdd-common/regex-lite"] + [[bin]] name = "dedup_headers" bench = false diff --git a/tools/src/lib.rs b/tools/src/lib.rs index 60c3d0ae04..e07b143cf4 100644 --- a/tools/src/lib.rs +++ b/tools/src/lib.rs @@ -4,7 +4,7 @@ pub mod junit_file_attributes; pub mod headers { - use regex::{Regex, RegexBuilder}; + use libdd_common::regex_engine::{Regex, RegexBuilder}; use std::collections::HashSet; use std::fs::{File, OpenOptions}; use std::io::{self, BufReader, BufWriter, Read, Seek, Write}; From dce8d30f37114f449b51471e5573da91b55e83ae Mon Sep 17 00:00:00 2001 From: Jules Wiriath Date: Tue, 5 May 2026 11:13:12 +0200 Subject: [PATCH 2/4] feat(regex_engine): ffe needs the full regex crate --- datadog-ffe/Cargo.toml | 2 +- libdd-common/Cargo.toml | 3 +++ libdd-common/src/regex_engine.rs | 8 ++++++-- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/datadog-ffe/Cargo.toml b/datadog-ffe/Cargo.toml index ecc1dff5a5..ad2e626e77 100644 --- a/datadog-ffe/Cargo.toml +++ b/datadog-ffe/Cargo.toml @@ -18,7 +18,7 @@ chrono = { version = "0.4.38", default-features = false, features = ["now", "ser derive_more = { version = "2.0.0", default-features = false, features = ["from", "into"] } log = { version = "0.4.21", default-features = false, features = ["kv", "kv_serde"] } md5 = { version = "0.7.0", default-features = false } -libdd-common = { version = "4.0.0", path = "../libdd-common", default-features = false } +libdd-common = { version = "4.0.0", path = "../libdd-common", default-features = false, features = ["ffe"] } semver = "1.0" serde-bool = { version = "0.1.3", default-features = false } serde_with = { version = "3.11.0", default-features = false, features = ["base64", "hex", "macros"] } diff --git a/libdd-common/Cargo.toml b/libdd-common/Cargo.toml index 616127c465..7105a4959e 100644 --- a/libdd-common/Cargo.toml +++ b/libdd-common/Cargo.toml @@ -101,6 +101,9 @@ use_webpki_roots = ["hyper-rustls/webpki-roots"] cgroup_testing = [] # Use regex-lite instead of regex for smaller binary size regex-lite = ["dep:regex-lite"] +# FFE uses user-provided regexes and cannot forgo the Unicode character class +# support dropped by regex-lite +ffe = [] # FIPS mode uses the FIPS-compliant cryptographic provider (Unix only) fips = ["tls-core", "hyper-rustls/fips"] # Enable reqwest client builder support with file dump debugging diff --git a/libdd-common/src/regex_engine.rs b/libdd-common/src/regex_engine.rs index fdb57e9296..95af92469a 100644 --- a/libdd-common/src/regex_engine.rs +++ b/libdd-common/src/regex_engine.rs @@ -7,9 +7,13 @@ //! Enable the **`regex-lite`** feature to switch to [`regex_lite`] instead, //! which trades advanced features (Unicode classes, look-around, etc.) for //! smaller binary size and faster compile times. +//! +//! The **`ffe`** feature forces the full `regex` crate even when `regex-lite` +//! is enabled, because FFE evaluates user-provided regexes that require +//! Unicode character class support. -#[cfg(feature = "regex-lite")] +#[cfg(all(feature = "regex-lite", not(feature = "ffe")))] pub use regex_lite::{escape, Captures, Regex, RegexBuilder, Replacer}; -#[cfg(not(feature = "regex-lite"))] +#[cfg(not(all(feature = "regex-lite", not(feature = "ffe"))))] pub use regex::{escape, Captures, Regex, RegexBuilder, Replacer}; From af89b4b921f19bbe1d254275ddadd3a84e16fb77 Mon Sep 17 00:00:00 2001 From: Jules Wiriath Date: Tue, 5 May 2026 15:13:57 +0200 Subject: [PATCH 3/4] fixup! feat(regex_engine): ffe needs the full regex crate --- datadog-ffe/Cargo.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/datadog-ffe/Cargo.toml b/datadog-ffe/Cargo.toml index ad2e626e77..619c25c1ee 100644 --- a/datadog-ffe/Cargo.toml +++ b/datadog-ffe/Cargo.toml @@ -37,4 +37,3 @@ path = "benches/eval.rs" [features] pyo3 = ["dep:pyo3"] -regex-lite = ["libdd-common/regex-lite"] From 0b6f61cc31ebff29befaf0fcd98c363fb732fac6 Mon Sep 17 00:00:00 2001 From: Jules Wiriath Date: Tue, 5 May 2026 16:03:47 +0200 Subject: [PATCH 4/4] fmt(regex_engine): change name of feature that forces full regex crate --- datadog-ffe/Cargo.toml | 2 +- libdd-common/Cargo.toml | 6 +++--- libdd-common/src/regex_engine.rs | 10 +++++----- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/datadog-ffe/Cargo.toml b/datadog-ffe/Cargo.toml index 619c25c1ee..30dcad9231 100644 --- a/datadog-ffe/Cargo.toml +++ b/datadog-ffe/Cargo.toml @@ -18,7 +18,7 @@ chrono = { version = "0.4.38", default-features = false, features = ["now", "ser derive_more = { version = "2.0.0", default-features = false, features = ["from", "into"] } log = { version = "0.4.21", default-features = false, features = ["kv", "kv_serde"] } md5 = { version = "0.7.0", default-features = false } -libdd-common = { version = "4.0.0", path = "../libdd-common", default-features = false, features = ["ffe"] } +libdd-common = { version = "4.0.0", path = "../libdd-common", default-features = false, features = ["require-regex-full"] } semver = "1.0" serde-bool = { version = "0.1.3", default-features = false } serde_with = { version = "3.11.0", default-features = false, features = ["base64", "hex", "macros"] } diff --git a/libdd-common/Cargo.toml b/libdd-common/Cargo.toml index 7105a4959e..627dec81d5 100644 --- a/libdd-common/Cargo.toml +++ b/libdd-common/Cargo.toml @@ -101,9 +101,9 @@ use_webpki_roots = ["hyper-rustls/webpki-roots"] cgroup_testing = [] # Use regex-lite instead of regex for smaller binary size regex-lite = ["dep:regex-lite"] -# FFE uses user-provided regexes and cannot forgo the Unicode character class -# support dropped by regex-lite -ffe = [] +# Consumers that handle user-provided regexes can enable this feature to force +# the full `regex` crate, overriding `regex-lite` when both are active. +require-regex-full = [] # FIPS mode uses the FIPS-compliant cryptographic provider (Unix only) fips = ["tls-core", "hyper-rustls/fips"] # Enable reqwest client builder support with file dump debugging diff --git a/libdd-common/src/regex_engine.rs b/libdd-common/src/regex_engine.rs index 95af92469a..f3674f6e12 100644 --- a/libdd-common/src/regex_engine.rs +++ b/libdd-common/src/regex_engine.rs @@ -8,12 +8,12 @@ //! which trades advanced features (Unicode classes, look-around, etc.) for //! smaller binary size and faster compile times. //! -//! The **`ffe`** feature forces the full `regex` crate even when `regex-lite` -//! is enabled, because FFE evaluates user-provided regexes that require -//! Unicode character class support. +//! The **`require-regex-full`** feature forces the full `regex` crate even +//! when `regex-lite` is enabled, for consumers that evaluate user-provided +//! regexes requiring Unicode character class support. -#[cfg(all(feature = "regex-lite", not(feature = "ffe")))] +#[cfg(all(feature = "regex-lite", not(feature = "require-regex-full")))] pub use regex_lite::{escape, Captures, Regex, RegexBuilder, Replacer}; -#[cfg(not(all(feature = "regex-lite", not(feature = "ffe"))))] +#[cfg(not(all(feature = "regex-lite", not(feature = "require-regex-full"))))] pub use regex::{escape, Captures, Regex, RegexBuilder, Replacer};