From a70b7a5445e8f5f77694033fff74883acfbd4417 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Fri, 24 Apr 2026 14:01:32 +0200 Subject: [PATCH 01/15] feat(size-benchmark): add syn-based FFI coverage binary generator Adds a new `size-benchmark` crate with a build.rs that: - Scans all *-ffi/src/**/*.rs files using syn - Finds all #[no_mangle] pub extern "C" fn signatures - Generates $OUT_DIR/calls.rs with exercise_all() For accessible public-module functions: called via qualified Rust path with zero/null args. For private-module functions: referenced via address-of extern "C" stubs to force linker inclusion. Adds ddog_ddsketch_dummy_size_bench() to libdd-ddsketch-ffi for verification. Release binary: ~10MB (LTO + opt-level=s). Coverage: 20 functions called (with args), 249 referenced (address-only) --- Cargo.lock | 20 ++ Cargo.toml | 1 + libdd-ddsketch-ffi/src/lib.rs | 9 + size-benchmark/Cargo.toml | 32 +++ size-benchmark/build.rs | 366 ++++++++++++++++++++++++++++++++++ size-benchmark/src/main.rs | 19 ++ 6 files changed, 447 insertions(+) create mode 100644 size-benchmark/Cargo.toml create mode 100644 size-benchmark/build.rs create mode 100644 size-benchmark/src/main.rs diff --git a/Cargo.lock b/Cargo.lock index 1a4c45ea9a..67519e990f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5281,6 +5281,26 @@ version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1de1d4f81173b03af4c0cbed3c898f6bff5b870e4a7f5d6f4057d62a7a4b686e" +[[package]] +name = "size-benchmark" +version = "32.0.0" +dependencies = [ + "datadog-ffe-ffi", + "glob", + "libdd-common-ffi", + "libdd-crashtracker-ffi", + "libdd-data-pipeline-ffi", + "libdd-ddsketch-ffi", + "libdd-library-config-ffi", + "libdd-log-ffi", + "libdd-profiling-ffi", + "libdd-shared-runtime-ffi", + "libdd-telemetry-ffi", + "quote", + "symbolizer-ffi", + "syn 2.0.87", +] + [[package]] name = "slab" version = "0.4.9" diff --git a/Cargo.toml b/Cargo.toml index 1d75806752..af1a0636e6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -53,6 +53,7 @@ members = [ "libdd-http-client", "libdd-log", "libdd-log-ffi", + "size-benchmark", ] # https://doc.rust-lang.org/cargo/reference/resolver.html diff --git a/libdd-ddsketch-ffi/src/lib.rs b/libdd-ddsketch-ffi/src/lib.rs index cc306c9b8a..3ae4ad4ca1 100644 --- a/libdd-ddsketch-ffi/src/lib.rs +++ b/libdd-ddsketch-ffi/src/lib.rs @@ -130,6 +130,15 @@ pub extern "C" fn ddog_Vec_U8_drop(_vec: ffi::Vec) { // The Vec will be automatically dropped when it goes out of scope } +/// Dummy function for size-benchmark verification. +/// +/// # Safety +/// Always safe to call. +#[no_mangle] +pub unsafe extern "C" fn ddog_ddsketch_dummy_size_bench() -> u64 { + 42 +} + #[cfg(test)] mod tests { use super::*; diff --git a/size-benchmark/Cargo.toml b/size-benchmark/Cargo.toml new file mode 100644 index 0000000000..0f2f376584 --- /dev/null +++ b/size-benchmark/Cargo.toml @@ -0,0 +1,32 @@ +# Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +# SPDX-License-Identifier: Apache-2.0 + +[package] +name = "size-benchmark" +edition.workspace = true +version.workspace = true +rust-version.workspace = true +license.workspace = true +publish = false + +[[bin]] +name = "size-benchmark" +path = "src/main.rs" + +[build-dependencies] +glob = "0.3.1" +syn = { version = "2.0.87", features = ["full", "parsing"] } +quote = "1.0.37" + +[dependencies] +libdd-common-ffi = { path = "../libdd-common-ffi" } +libdd-profiling-ffi = { path = "../libdd-profiling-ffi" } +libdd-crashtracker-ffi = { path = "../libdd-crashtracker-ffi" } +libdd-telemetry-ffi = { path = "../libdd-telemetry-ffi" } +libdd-data-pipeline-ffi = { path = "../libdd-data-pipeline-ffi" } +libdd-ddsketch-ffi = { path = "../libdd-ddsketch-ffi" } +libdd-library-config-ffi = { path = "../libdd-library-config-ffi" } +libdd-log-ffi = { path = "../libdd-log-ffi" } +datadog-ffe-ffi = { path = "../datadog-ffe-ffi" } +symbolizer-ffi = { path = "../symbolizer-ffi" } +libdd-shared-runtime-ffi = { path = "../libdd-shared-runtime-ffi" } diff --git a/size-benchmark/build.rs b/size-benchmark/build.rs new file mode 100644 index 0000000000..aa5989e019 --- /dev/null +++ b/size-benchmark/build.rs @@ -0,0 +1,366 @@ +// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +//! build.rs for size-benchmark +//! +//! Scans all *-ffi/src/**/*.rs files in the workspace, finds +//! `#[no_mangle] pub [unsafe] extern "C" fn` signatures, and generates +//! `$OUT_DIR/calls.rs`. +//! +//! Generated file contains two parts: +//! +//! 1. **Callers** (functions with only ptr/primitive/Option params in public modules): Called with +//! zeroed/null args via qualified Rust path. These force crate linking and exercise the actual +//! function code paths. +//! +//! 2. **Symbol references** (all other discovered functions): Referenced via `extern "C"` +//! declarations (no-arg, no-return stub) and `let _ = fn_name as *const ()`. This forces the +//! linker to include the symbol without actually calling the function. Used for functions with +//! complex signatures or in private modules. +//! +//! Together, these ensure all discovered FFI symbols contribute to binary size. + +use quote::ToTokens; +use std::collections::HashSet; +use std::fmt::Write as FmtWrite; +use std::fs; +use std::path::Path; +use syn::{FnArg, Item, ReturnType, Type}; + +/// (source_dir_relative_to_workspace, rust_crate_name) +const FFI_DIRS: &[(&str, &str)] = &[ + ("libdd-common-ffi/src", "libdd_common_ffi"), + ("libdd-profiling-ffi/src", "datadog_profiling_ffi"), + ("libdd-crashtracker-ffi/src", "libdd_crashtracker_ffi"), + ("libdd-telemetry-ffi/src", "libdd_telemetry_ffi"), + ("libdd-data-pipeline-ffi/src", "libdd_data_pipeline_ffi"), + ("libdd-ddsketch-ffi/src", "libdd_ddsketch_ffi"), + ("libdd-library-config-ffi/src", "libdd_library_config_ffi"), + ("libdd-log-ffi/src", "libdd_log_ffi"), + ("datadog-ffe-ffi/src", "datadog_ffe_ffi"), + ("symbolizer-ffi/src", "symbolizer_ffi"), + ("libdd-shared-runtime-ffi/src", "libdd_shared_runtime_ffi"), +]; + +fn main() { + for (dir, _) in FFI_DIRS { + println!("cargo:rerun-if-changed=../{dir}"); + } + + let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap(); + let workspace_root = Path::new(&manifest_dir).parent().unwrap(); + + let mut functions: Vec = Vec::new(); + + for (dir, crate_name) in FFI_DIRS { + let src_dir = workspace_root.join(dir); + let pub_mods = collect_pub_mods(&src_dir.join("lib.rs")); + let pattern = format!("{}/**/*.rs", src_dir.display()); + for entry in glob::glob(&pattern).unwrap().flatten() { + if let Ok(source) = fs::read_to_string(&entry) { + let (is_accessible, module_prefix) = + resolve_module(&src_dir, entry.as_path(), crate_name, &pub_mods); + collect_ffi_functions(&source, &module_prefix, is_accessible, &mut functions); + } + } + } + + // Deduplicate by function name + functions.dedup_by(|a, b| a.name == b.name); + + let out_dir = std::env::var("OUT_DIR").unwrap(); + let out_path = Path::new(&out_dir).join("calls.rs"); + fs::write(&out_path, generate_code(&functions)).unwrap(); +} + +/// Returns the set of `pub mod` names declared in lib.rs. +fn collect_pub_mods(lib_rs: &Path) -> HashSet { + let mut mods = HashSet::new(); + let Ok(source) = fs::read_to_string(lib_rs) else { + return mods; + }; + let Ok(file) = syn::parse_file(&source) else { + return mods; + }; + for item in &file.items { + if let Item::Mod(m) = item { + if matches!(m.vis, syn::Visibility::Public(_)) { + mods.insert(m.ident.to_string()); + } + } + } + mods +} + +/// Returns (is_publicly_accessible, module_prefix). +/// is_publicly_accessible = true if functions can be called via the Rust module path. +fn resolve_module( + src_dir: &Path, + file: &Path, + crate_name: &str, + pub_mods: &HashSet, +) -> (bool, String) { + let rel = file.strip_prefix(src_dir).unwrap_or(file); + let without_ext = rel.with_extension(""); + let segments: Vec = without_ext + .components() + .filter_map(|c| c.as_os_str().to_str().map(String::from)) + .collect(); + + if segments.is_empty() || segments == ["lib"] { + return (true, crate_name.to_string()); + } + + let path_segs: Vec<&str> = if segments.last().map(|s| s.as_str()) == Some("mod") { + segments[..segments.len() - 1] + .iter() + .map(|s| s.as_str()) + .collect() + } else { + segments.iter().map(|s| s.as_str()).collect() + }; + + if path_segs.is_empty() { + return (true, crate_name.to_string()); + } + + let top_mod = path_segs[0]; + let accessible = pub_mods.contains(top_mod); + let prefix = format!("{}::{}", crate_name, path_segs.join("::")); + (accessible, prefix) +} + +struct FfiFunction { + /// Symbol name (C name) + name: String, + /// Rust module path (for callable functions), e.g. `libdd_ddsketch_ffi` + module_path: String, + /// Callable params; None → referenced by address only (complex/inaccessible) + params: Option>, + has_return: bool, +} + +/// A simplified, trivially-constructible parameter type. +#[derive(Clone, Copy)] +enum SimpleType { + Bool, + Int, + Float, + PtrMut, + PtrConst, + OptionNone, +} + +impl SimpleType { + fn zero_expr(self) -> &'static str { + match self { + SimpleType::Bool => "false", + SimpleType::Int => "0", + SimpleType::Float => "0.0", + SimpleType::PtrMut => "core::ptr::null_mut()", + SimpleType::PtrConst => "core::ptr::null()", + SimpleType::OptionNone => "None", + } + } +} + +fn has_no_mangle(attrs: &[syn::Attribute]) -> bool { + attrs.iter().any(|a| a.path().is_ident("no_mangle")) +} + +fn is_extern_c(abi: &Option) -> bool { + matches!(abi, Some(syn::Abi { name: Some(n), .. }) if n.value() == "C") +} + +/// Return true if any `#[cfg(...)]` attribute limits this item to a specific +/// target OS or platform that is NOT the current build target. +/// We conservatively skip items with `target_os = "windows"` on non-Windows. +fn is_platform_excluded(attrs: &[syn::Attribute]) -> bool { + let current_os = std::env::var("CARGO_CFG_TARGET_OS").unwrap_or_default(); + for attr in attrs { + if !attr.path().is_ident("cfg") { + continue; + } + // Check token stream for "windows" or specific target_os values + let tokens = attr.to_token_stream().to_string(); + if tokens.contains("windows") && current_os != "windows" { + return true; + } + if tokens.contains("target_os") && tokens.contains("\"windows\"") && current_os != "windows" + { + return true; + } + } + false +} + +fn collect_ffi_functions( + source: &str, + module_path: &str, + accessible: bool, + out: &mut Vec, +) { + let file = match syn::parse_file(source) { + Ok(f) => f, + Err(_) => return, + }; + + for item in &file.items { + let Item::Fn(f) = item else { continue }; + + if !matches!(f.vis, syn::Visibility::Public(_)) { + continue; + } + if !is_extern_c(&f.sig.abi) { + continue; + } + if !has_no_mangle(&f.attrs) { + continue; + } + if is_platform_excluded(&f.attrs) { + continue; + } + + let name = f.sig.ident.to_string(); + let has_return = !matches!(f.sig.output, ReturnType::Default); + + // Try to build callable params (only for accessible modules) + let params = if accessible { + let mut p: Vec = Vec::new(); + let mut skip = false; + for arg in &f.sig.inputs { + let FnArg::Typed(pat_type) = arg else { + continue; + }; + match simplify_type(&pat_type.ty) { + Some(st) => p.push(st), + None => { + skip = true; + break; + } + } + } + if skip { + None + } else { + Some(p) + } + } else { + None + }; + + out.push(FfiFunction { + name, + module_path: module_path.to_string(), + params, + has_return, + }); + } +} + +/// Map a syn::Type to a SimpleType, or None to skip this function. +fn simplify_type(ty: &Type) -> Option { + match ty { + Type::Ptr(p) => Some(if p.mutability.is_some() { + SimpleType::PtrMut + } else { + SimpleType::PtrConst + }), + // References require valid data — skip + Type::Reference(_) => None, + Type::BareFn(_) => Some(SimpleType::PtrMut), + Type::Path(tp) => { + if tp.qself.is_some() { + return None; + } + let last = tp.path.segments.last()?.ident.to_string(); + match last.as_str() { + "u8" | "u16" | "u32" | "u64" | "u128" | "usize" | "i8" | "i16" | "i32" | "i64" + | "i128" | "isize" | "c_int" | "c_uint" | "c_long" | "c_ulong" | "c_short" + | "c_ushort" | "c_char" | "c_schar" | "c_uchar" | "c_longlong" | "c_ulonglong" => { + Some(SimpleType::Int) + } + "f32" | "f64" | "c_float" | "c_double" => Some(SimpleType::Float), + "bool" => Some(SimpleType::Bool), + "Option" => Some(SimpleType::OptionNone), + _ => None, + } + } + _ => None, + } +} + +fn generate_code(functions: &[FfiFunction]) -> String { + let mut code = String::new(); + writeln!( + code, + "// Auto-generated by size-benchmark/build.rs — DO NOT EDIT" + ) + .unwrap(); + writeln!(code).unwrap(); + + // Collect functions that need extern "C" stubs (not callable via Rust path) + let needs_extern: Vec<&FfiFunction> = functions.iter().filter(|f| f.params.is_none()).collect(); + + if !needs_extern.is_empty() { + writeln!(code, "extern \"C\" {{").unwrap(); + for f in &needs_extern { + // Declare with no params/return — we only take the address, not call it + writeln!(code, " fn {}();", f.name).unwrap(); + } + writeln!(code, "}}").unwrap(); + writeln!(code).unwrap(); + } + + writeln!(code, "pub fn exercise_all() {{").unwrap(); + writeln!(code, " #[allow(unused_unsafe)]").unwrap(); + writeln!(code, " unsafe {{").unwrap(); + + let mut called = 0u32; + let mut referenced = 0u32; + + for f in functions { + match &f.params { + Some(params) => { + // Call via qualified Rust path + let args: Vec = params + .iter() + .map(|st| format!("std::hint::black_box({})", st.zero_expr())) + .collect(); + let call = format!("{}::{}", f.module_path, f.name); + let invocation = format!("{}({})", call, args.join(", ")); + if f.has_return { + writeln!( + code, + " let _ = std::hint::black_box({});", + invocation + ) + .unwrap(); + } else { + writeln!(code, " {};", invocation).unwrap(); + } + called += 1; + } + None => { + // Reference via address-of to force linker inclusion + writeln!( + code, + " let _ = std::hint::black_box({} as *const ());", + f.name + ) + .unwrap(); + referenced += 1; + } + } + } + + writeln!(code, " }}").unwrap(); + writeln!(code, "}}").unwrap(); + writeln!(code).unwrap(); + writeln!( + code, + "// Coverage: {called} functions called (with args), {referenced} referenced (address-only)" + ) + .unwrap(); + code +} diff --git a/size-benchmark/src/main.rs b/size-benchmark/src/main.rs new file mode 100644 index 0000000000..a7146a2df7 --- /dev/null +++ b/size-benchmark/src/main.rs @@ -0,0 +1,19 @@ +// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +// Force-link all FFI crates so their #[no_mangle] symbols are available to the +// extern "C" stubs in the generated calls.rs. +extern crate datadog_ffe_ffi; +extern crate datadog_profiling_ffi; +extern crate libdd_crashtracker_ffi; +extern crate libdd_data_pipeline_ffi; +extern crate libdd_shared_runtime_ffi; +extern crate libdd_telemetry_ffi; +extern crate symbolizer_ffi; + +include!(concat!(env!("OUT_DIR"), "/calls.rs")); + +fn main() { + exercise_all(); + println!("done"); +} From 2d37b73a472d628fde2dc03b97cc774244b3ff2f Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Fri, 24 Apr 2026 14:41:27 +0200 Subject: [PATCH 02/15] feat(size-benchmark): simplify to pure function-pointer array approach --- size-benchmark/Cargo.toml | 1 - size-benchmark/build.rs | 416 +++++++------------------------------ size-benchmark/src/main.rs | 14 +- 3 files changed, 81 insertions(+), 350 deletions(-) diff --git a/size-benchmark/Cargo.toml b/size-benchmark/Cargo.toml index 0f2f376584..ede79b4bde 100644 --- a/size-benchmark/Cargo.toml +++ b/size-benchmark/Cargo.toml @@ -16,7 +16,6 @@ path = "src/main.rs" [build-dependencies] glob = "0.3.1" syn = { version = "2.0.87", features = ["full", "parsing"] } -quote = "1.0.37" [dependencies] libdd-common-ffi = { path = "../libdd-common-ffi" } diff --git a/size-benchmark/build.rs b/size-benchmark/build.rs index aa5989e019..98102c925a 100644 --- a/size-benchmark/build.rs +++ b/size-benchmark/build.rs @@ -1,366 +1,98 @@ // Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -//! build.rs for size-benchmark +//! Scans all *-ffi/src/**/*.rs files, finds every `#[no_mangle] pub extern "C"` function, +//! and emits $OUT_DIR/fptrs.rs: //! -//! Scans all *-ffi/src/**/*.rs files in the workspace, finds -//! `#[no_mangle] pub [unsafe] extern "C" fn` signatures, and generates -//! `$OUT_DIR/calls.rs`. +//! extern "C" { fn ddog_foo(...); fn ddog_bar(...); ... } +//! static FPTRS: &[unsafe extern "C" fn()] = &[ddog_foo as _, ddog_bar as _, ...]; //! -//! Generated file contains two parts: -//! -//! 1. **Callers** (functions with only ptr/primitive/Option params in public modules): Called with -//! zeroed/null args via qualified Rust path. These force crate linking and exercise the actual -//! function code paths. -//! -//! 2. **Symbol references** (all other discovered functions): Referenced via `extern "C"` -//! declarations (no-arg, no-return stub) and `let _ = fn_name as *const ()`. This forces the -//! linker to include the symbol without actually calling the function. Used for functions with -//! complex signatures or in private modules. -//! -//! Together, these ensure all discovered FFI symbols contribute to binary size. +//! Storing every symbol in a non-dead static forces the linker to include every function +//! body (and its transitive call graph) in the final binary, which is what we want for +//! measuring realistic binary size after LTO. -use quote::ToTokens; -use std::collections::HashSet; -use std::fmt::Write as FmtWrite; -use std::fs; +use std::fmt::Write as _; use std::path::Path; -use syn::{FnArg, Item, ReturnType, Type}; - -/// (source_dir_relative_to_workspace, rust_crate_name) -const FFI_DIRS: &[(&str, &str)] = &[ - ("libdd-common-ffi/src", "libdd_common_ffi"), - ("libdd-profiling-ffi/src", "datadog_profiling_ffi"), - ("libdd-crashtracker-ffi/src", "libdd_crashtracker_ffi"), - ("libdd-telemetry-ffi/src", "libdd_telemetry_ffi"), - ("libdd-data-pipeline-ffi/src", "libdd_data_pipeline_ffi"), - ("libdd-ddsketch-ffi/src", "libdd_ddsketch_ffi"), - ("libdd-library-config-ffi/src", "libdd_library_config_ffi"), - ("libdd-log-ffi/src", "libdd_log_ffi"), - ("datadog-ffe-ffi/src", "datadog_ffe_ffi"), - ("symbolizer-ffi/src", "symbolizer_ffi"), - ("libdd-shared-runtime-ffi/src", "libdd_shared_runtime_ffi"), +use std::{env, fs}; +use syn::{Item, Visibility}; + +const FFI_DIRS: &[&str] = &[ + "libdd-common-ffi/src", + "libdd-profiling-ffi/src", + "libdd-crashtracker-ffi/src", + "libdd-telemetry-ffi/src", + "libdd-data-pipeline-ffi/src", + "libdd-ddsketch-ffi/src", + "libdd-library-config-ffi/src", + "libdd-log-ffi/src", + "datadog-ffe-ffi/src", + "symbolizer-ffi/src", + "libdd-shared-runtime-ffi/src", ]; fn main() { - for (dir, _) in FFI_DIRS { - println!("cargo:rerun-if-changed=../{dir}"); - } - - let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").unwrap(); - let workspace_root = Path::new(&manifest_dir).parent().unwrap(); - - let mut functions: Vec = Vec::new(); - - for (dir, crate_name) in FFI_DIRS { - let src_dir = workspace_root.join(dir); - let pub_mods = collect_pub_mods(&src_dir.join("lib.rs")); - let pattern = format!("{}/**/*.rs", src_dir.display()); - for entry in glob::glob(&pattern).unwrap().flatten() { - if let Ok(source) = fs::read_to_string(&entry) { - let (is_accessible, module_prefix) = - resolve_module(&src_dir, entry.as_path(), crate_name, &pub_mods); - collect_ffi_functions(&source, &module_prefix, is_accessible, &mut functions); - } - } - } - - // Deduplicate by function name - functions.dedup_by(|a, b| a.name == b.name); - - let out_dir = std::env::var("OUT_DIR").unwrap(); - let out_path = Path::new(&out_dir).join("calls.rs"); - fs::write(&out_path, generate_code(&functions)).unwrap(); -} - -/// Returns the set of `pub mod` names declared in lib.rs. -fn collect_pub_mods(lib_rs: &Path) -> HashSet { - let mut mods = HashSet::new(); - let Ok(source) = fs::read_to_string(lib_rs) else { - return mods; - }; - let Ok(file) = syn::parse_file(&source) else { - return mods; - }; - for item in &file.items { - if let Item::Mod(m) = item { - if matches!(m.vis, syn::Visibility::Public(_)) { - mods.insert(m.ident.to_string()); - } - } - } - mods -} - -/// Returns (is_publicly_accessible, module_prefix). -/// is_publicly_accessible = true if functions can be called via the Rust module path. -fn resolve_module( - src_dir: &Path, - file: &Path, - crate_name: &str, - pub_mods: &HashSet, -) -> (bool, String) { - let rel = file.strip_prefix(src_dir).unwrap_or(file); - let without_ext = rel.with_extension(""); - let segments: Vec = without_ext - .components() - .filter_map(|c| c.as_os_str().to_str().map(String::from)) - .collect(); - - if segments.is_empty() || segments == ["lib"] { - return (true, crate_name.to_string()); - } - - let path_segs: Vec<&str> = if segments.last().map(|s| s.as_str()) == Some("mod") { - segments[..segments.len() - 1] - .iter() - .map(|s| s.as_str()) - .collect() - } else { - segments.iter().map(|s| s.as_str()).collect() - }; - - if path_segs.is_empty() { - return (true, crate_name.to_string()); - } - - let top_mod = path_segs[0]; - let accessible = pub_mods.contains(top_mod); - let prefix = format!("{}::{}", crate_name, path_segs.join("::")); - (accessible, prefix) -} - -struct FfiFunction { - /// Symbol name (C name) - name: String, - /// Rust module path (for callable functions), e.g. `libdd_ddsketch_ffi` - module_path: String, - /// Callable params; None → referenced by address only (complex/inaccessible) - params: Option>, - has_return: bool, -} - -/// A simplified, trivially-constructible parameter type. -#[derive(Clone, Copy)] -enum SimpleType { - Bool, - Int, - Float, - PtrMut, - PtrConst, - OptionNone, -} - -impl SimpleType { - fn zero_expr(self) -> &'static str { - match self { - SimpleType::Bool => "false", - SimpleType::Int => "0", - SimpleType::Float => "0.0", - SimpleType::PtrMut => "core::ptr::null_mut()", - SimpleType::PtrConst => "core::ptr::null()", - SimpleType::OptionNone => "None", - } - } -} - -fn has_no_mangle(attrs: &[syn::Attribute]) -> bool { - attrs.iter().any(|a| a.path().is_ident("no_mangle")) -} + let manifest = env::var("CARGO_MANIFEST_DIR").unwrap(); + let workspace = Path::new(&manifest).parent().unwrap(); + let current_os = env::var("CARGO_CFG_TARGET_OS").unwrap_or_default(); -fn is_extern_c(abi: &Option) -> bool { - matches!(abi, Some(syn::Abi { name: Some(n), .. }) if n.value() == "C") -} - -/// Return true if any `#[cfg(...)]` attribute limits this item to a specific -/// target OS or platform that is NOT the current build target. -/// We conservatively skip items with `target_os = "windows"` on non-Windows. -fn is_platform_excluded(attrs: &[syn::Attribute]) -> bool { - let current_os = std::env::var("CARGO_CFG_TARGET_OS").unwrap_or_default(); - for attr in attrs { - if !attr.path().is_ident("cfg") { - continue; - } - // Check token stream for "windows" or specific target_os values - let tokens = attr.to_token_stream().to_string(); - if tokens.contains("windows") && current_os != "windows" { - return true; - } - if tokens.contains("target_os") && tokens.contains("\"windows\"") && current_os != "windows" - { - return true; - } - } - false -} - -fn collect_ffi_functions( - source: &str, - module_path: &str, - accessible: bool, - out: &mut Vec, -) { - let file = match syn::parse_file(source) { - Ok(f) => f, - Err(_) => return, - }; + let mut names: Vec = Vec::new(); - for item in &file.items { - let Item::Fn(f) = item else { continue }; - - if !matches!(f.vis, syn::Visibility::Public(_)) { - continue; - } - if !is_extern_c(&f.sig.abi) { - continue; - } - if !has_no_mangle(&f.attrs) { - continue; - } - if is_platform_excluded(&f.attrs) { - continue; - } - - let name = f.sig.ident.to_string(); - let has_return = !matches!(f.sig.output, ReturnType::Default); - - // Try to build callable params (only for accessible modules) - let params = if accessible { - let mut p: Vec = Vec::new(); - let mut skip = false; - for arg in &f.sig.inputs { - let FnArg::Typed(pat_type) = arg else { + for dir in FFI_DIRS { + println!("cargo:rerun-if-changed=../{dir}"); + let src = workspace.join(dir); + let pattern = format!("{}/**/*.rs", src.display()); + for path in glob::glob(&pattern).unwrap().flatten() { + let Ok(source) = fs::read_to_string(&path) else { + continue; + }; + let Ok(file) = syn::parse_file(&source) else { + continue; + }; + for item in &file.items { + let Item::Fn(f) = item else { continue }; + if !matches!(f.vis, Visibility::Public(_)) { continue; - }; - match simplify_type(&pat_type.ty) { - Some(st) => p.push(st), - None => { - skip = true; - break; - } } - } - if skip { - None - } else { - Some(p) - } - } else { - None - }; - - out.push(FfiFunction { - name, - module_path: module_path.to_string(), - params, - has_return, - }); - } -} - -/// Map a syn::Type to a SimpleType, or None to skip this function. -fn simplify_type(ty: &Type) -> Option { - match ty { - Type::Ptr(p) => Some(if p.mutability.is_some() { - SimpleType::PtrMut - } else { - SimpleType::PtrConst - }), - // References require valid data — skip - Type::Reference(_) => None, - Type::BareFn(_) => Some(SimpleType::PtrMut), - Type::Path(tp) => { - if tp.qself.is_some() { - return None; - } - let last = tp.path.segments.last()?.ident.to_string(); - match last.as_str() { - "u8" | "u16" | "u32" | "u64" | "u128" | "usize" | "i8" | "i16" | "i32" | "i64" - | "i128" | "isize" | "c_int" | "c_uint" | "c_long" | "c_ulong" | "c_short" - | "c_ushort" | "c_char" | "c_schar" | "c_uchar" | "c_longlong" | "c_ulonglong" => { - Some(SimpleType::Int) + let Some(abi) = &f.sig.abi else { continue }; + if !matches!(&abi.name, Some(n) if n.value() == "C") { + continue; } - "f32" | "f64" | "c_float" | "c_double" => Some(SimpleType::Float), - "bool" => Some(SimpleType::Bool), - "Option" => Some(SimpleType::OptionNone), - _ => None, + if !f.attrs.iter().any(|a| a.path().is_ident("no_mangle")) { + continue; + } + // Skip items gated to windows on non-windows builds + let is_windows_only = f.attrs.iter().any(|a| { + if !a.path().is_ident("cfg") { return false; } + let Ok(list) = a.meta.require_list() else { return false }; + list.tokens.to_string().contains("windows") + }); + if is_windows_only && current_os != "windows" { + continue; + } + names.push(f.sig.ident.to_string()); } } - _ => None, } -} -fn generate_code(functions: &[FfiFunction]) -> String { - let mut code = String::new(); - writeln!( - code, - "// Auto-generated by size-benchmark/build.rs — DO NOT EDIT" - ) - .unwrap(); - writeln!(code).unwrap(); + names.sort(); + names.dedup(); - // Collect functions that need extern "C" stubs (not callable via Rust path) - let needs_extern: Vec<&FfiFunction> = functions.iter().filter(|f| f.params.is_none()).collect(); - - if !needs_extern.is_empty() { - writeln!(code, "extern \"C\" {{").unwrap(); - for f in &needs_extern { - // Declare with no params/return — we only take the address, not call it - writeln!(code, " fn {}();", f.name).unwrap(); - } - writeln!(code, "}}").unwrap(); - writeln!(code).unwrap(); + let mut out = String::new(); + writeln!(out, "// Auto-generated by size-benchmark/build.rs — DO NOT EDIT").unwrap(); + writeln!(out, "extern \"C\" {{").unwrap(); + for name in &names { + writeln!(out, " fn {name}();").unwrap(); } - - writeln!(code, "pub fn exercise_all() {{").unwrap(); - writeln!(code, " #[allow(unused_unsafe)]").unwrap(); - writeln!(code, " unsafe {{").unwrap(); - - let mut called = 0u32; - let mut referenced = 0u32; - - for f in functions { - match &f.params { - Some(params) => { - // Call via qualified Rust path - let args: Vec = params - .iter() - .map(|st| format!("std::hint::black_box({})", st.zero_expr())) - .collect(); - let call = format!("{}::{}", f.module_path, f.name); - let invocation = format!("{}({})", call, args.join(", ")); - if f.has_return { - writeln!( - code, - " let _ = std::hint::black_box({});", - invocation - ) - .unwrap(); - } else { - writeln!(code, " {};", invocation).unwrap(); - } - called += 1; - } - None => { - // Reference via address-of to force linker inclusion - writeln!( - code, - " let _ = std::hint::black_box({} as *const ());", - f.name - ) - .unwrap(); - referenced += 1; - } - } + writeln!(out, "}}").unwrap(); + writeln!(out).unwrap(); + writeln!(out, "#[used]").unwrap(); + writeln!(out, "static FPTRS: &[unsafe extern \"C\" fn()] = &[").unwrap(); + for name in &names { + writeln!(out, " {name} as _,").unwrap(); } + writeln!(out, "];").unwrap(); + writeln!(out, "// {} symbols", names.len()).unwrap(); - writeln!(code, " }}").unwrap(); - writeln!(code, "}}").unwrap(); - writeln!(code).unwrap(); - writeln!( - code, - "// Coverage: {called} functions called (with args), {referenced} referenced (address-only)" - ) - .unwrap(); - code + let out_dir = env::var("OUT_DIR").unwrap(); + fs::write(Path::new(&out_dir).join("fptrs.rs"), out).unwrap(); } diff --git a/size-benchmark/src/main.rs b/size-benchmark/src/main.rs index a7146a2df7..19a654057d 100644 --- a/size-benchmark/src/main.rs +++ b/size-benchmark/src/main.rs @@ -1,19 +1,19 @@ // Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -// Force-link all FFI crates so their #[no_mangle] symbols are available to the -// extern "C" stubs in the generated calls.rs. +// Pull in all FFI crates so their symbols are available to the extern "C" block below. extern crate datadog_ffe_ffi; extern crate datadog_profiling_ffi; +extern crate libdd_common_ffi; extern crate libdd_crashtracker_ffi; extern crate libdd_data_pipeline_ffi; +extern crate libdd_ddsketch_ffi; +extern crate libdd_library_config_ffi; +extern crate libdd_log_ffi; extern crate libdd_shared_runtime_ffi; extern crate libdd_telemetry_ffi; extern crate symbolizer_ffi; -include!(concat!(env!("OUT_DIR"), "/calls.rs")); +include!(concat!(env!("OUT_DIR"), "/fptrs.rs")); -fn main() { - exercise_all(); - println!("done"); -} +fn main() {} From 8578d97efb43a587b8341e403e3d219bf0457a63 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Fri, 24 Apr 2026 14:52:02 +0200 Subject: [PATCH 03/15] feat(size-benchmark): add size-optimized build script for CI --- .cargo/config.toml | 1 + Cargo.lock | 1 - size-benchmark/build-size-optimized.sh | 36 ++++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 1 deletion(-) create mode 100755 size-benchmark/build-size-optimized.sh diff --git a/.cargo/config.toml b/.cargo/config.toml index 96db79f3cb..eb03b08b51 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -9,3 +9,4 @@ # 2. Compile the C/C++ examples using CMake # 3. Run each example and report pass/fail ffi-test = "run --package tools --bin ffi_test --" + diff --git a/Cargo.lock b/Cargo.lock index 67519e990f..34997ba713 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5296,7 +5296,6 @@ dependencies = [ "libdd-profiling-ffi", "libdd-shared-runtime-ffi", "libdd-telemetry-ffi", - "quote", "symbolizer-ffi", "syn 2.0.87", ] diff --git a/size-benchmark/build-size-optimized.sh b/size-benchmark/build-size-optimized.sh new file mode 100755 index 0000000000..fa1de402ac --- /dev/null +++ b/size-benchmark/build-size-optimized.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# Build the size-benchmark binary with the same aggressive size optimizations +# that our most critical users apply, so the measured size is representative. +# +# Requires: +# - rustup with nightly toolchain +# - aarch64-unknown-linux-musl target installed +# - aarch64-linux-musl-gcc (or equivalent cross linker) on PATH +# +# Usage: ./size-benchmark/build-size-optimized.sh [extra cargo args] +# Output: prints the binary size in bytes on stdout (last line) + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +WORKSPACE_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +TARGET=aarch64-unknown-linux-musl + +RUSTFLAGS="\ + -Zunstable-options \ + -Cpanic=immediate-abort \ + -Zlocation-detail=none \ + -Zfmt-debug=none \ +" \ +cargo +nightly build \ + -Z build-std=std,panic_abort \ + -Z build-std-features= \ + --target "$TARGET" \ + --release \ + -p size-benchmark \ + --manifest-path "$WORKSPACE_ROOT/Cargo.toml" \ + "$@" + +BINARY="$WORKSPACE_ROOT/target/$TARGET/release/size-benchmark" +wc -c < "$BINARY" From 330c912122a80ef7969cea4a84d06cf80b85883a Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Fri, 24 Apr 2026 14:58:31 +0200 Subject: [PATCH 04/15] fix(size-benchmark): use native arch target, avoid cross-compilation --- size-benchmark/build-size-optimized.sh | 22 ++++++++++++++++------ size-benchmark/build.rs | 14 +++++++++++--- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/size-benchmark/build-size-optimized.sh b/size-benchmark/build-size-optimized.sh index fa1de402ac..9c7230e372 100755 --- a/size-benchmark/build-size-optimized.sh +++ b/size-benchmark/build-size-optimized.sh @@ -2,20 +2,30 @@ # Build the size-benchmark binary with the same aggressive size optimizations # that our most critical users apply, so the measured size is representative. # -# Requires: -# - rustup with nightly toolchain -# - aarch64-unknown-linux-musl target installed -# - aarch64-linux-musl-gcc (or equivalent cross linker) on PATH +# On Linux → builds for {host-arch}-unknown-linux-musl (static, musl libc) +# On macOS → builds for the native Darwin target (no musl available on macOS) +# +# Requires: rustup with nightly toolchain + the resolved target installed. +# On Linux the musl target also needs a musl C toolchain (e.g. musl-tools package). # # Usage: ./size-benchmark/build-size-optimized.sh [extra cargo args] -# Output: prints the binary size in bytes on stdout (last line) +# Output: binary size in bytes on stdout (last line) set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" WORKSPACE_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" -TARGET=aarch64-unknown-linux-musl +ARCH="$(uname -m | sed 's/arm64/aarch64/')" +OS="$(uname -s)" + +case "$OS" in + Linux) TARGET="${ARCH}-unknown-linux-musl" ;; + Darwin) TARGET="${ARCH}-apple-darwin" ;; + *) echo "Unsupported OS: $OS" >&2; exit 1 ;; +esac + +rustup target add "$TARGET" --toolchain nightly 2>/dev/null || true RUSTFLAGS="\ -Zunstable-options \ diff --git a/size-benchmark/build.rs b/size-benchmark/build.rs index 98102c925a..2a674b5e9b 100644 --- a/size-benchmark/build.rs +++ b/size-benchmark/build.rs @@ -62,8 +62,12 @@ fn main() { } // Skip items gated to windows on non-windows builds let is_windows_only = f.attrs.iter().any(|a| { - if !a.path().is_ident("cfg") { return false; } - let Ok(list) = a.meta.require_list() else { return false }; + if !a.path().is_ident("cfg") { + return false; + } + let Ok(list) = a.meta.require_list() else { + return false; + }; list.tokens.to_string().contains("windows") }); if is_windows_only && current_os != "windows" { @@ -78,7 +82,11 @@ fn main() { names.dedup(); let mut out = String::new(); - writeln!(out, "// Auto-generated by size-benchmark/build.rs — DO NOT EDIT").unwrap(); + writeln!( + out, + "// Auto-generated by size-benchmark/build.rs — DO NOT EDIT" + ) + .unwrap(); writeln!(out, "extern \"C\" {{").unwrap(); for name in &names { writeln!(out, " fn {name}();").unwrap(); From b8efdce4de863102de2b1d7fe45de226261050af Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Fri, 24 Apr 2026 15:03:18 +0200 Subject: [PATCH 05/15] feat(size-benchmark): add release-size profile with opt-level=z, strip, panic=abort --- Cargo.toml | 8 ++++++++ size-benchmark/build-size-optimized.sh | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index af1a0636e6..c18d1cc40e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -102,6 +102,14 @@ debug = false incremental = false opt-level = 3 +# Profile used exclusively by the size-benchmark crate. +# Inherits release then tightens every size knob that cannot be set per-package. +[profile.release-size] +inherits = "release" +opt-level = "z" # "z" vs "s": skip loop vectorization too +strip = true +panic = "abort" + # https://camshaft.github.io/bolero/library-installation.html [profile.fuzz] inherits = "dev" diff --git a/size-benchmark/build-size-optimized.sh b/size-benchmark/build-size-optimized.sh index 9c7230e372..0776524062 100755 --- a/size-benchmark/build-size-optimized.sh +++ b/size-benchmark/build-size-optimized.sh @@ -37,10 +37,10 @@ cargo +nightly build \ -Z build-std=std,panic_abort \ -Z build-std-features= \ --target "$TARGET" \ - --release \ + --profile release-size \ -p size-benchmark \ --manifest-path "$WORKSPACE_ROOT/Cargo.toml" \ "$@" -BINARY="$WORKSPACE_ROOT/target/$TARGET/release/size-benchmark" +BINARY="$WORKSPACE_ROOT/target/$TARGET/release-size/size-benchmark" wc -c < "$BINARY" From 0089d2b72680de5d699f9746150958e3b708fc9e Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Fri, 24 Apr 2026 15:17:07 +0200 Subject: [PATCH 06/15] fix(size-benchmark): use build-std-features=optimize_for_size --- size-benchmark/build-size-optimized.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/size-benchmark/build-size-optimized.sh b/size-benchmark/build-size-optimized.sh index 0776524062..f512f0f6ad 100755 --- a/size-benchmark/build-size-optimized.sh +++ b/size-benchmark/build-size-optimized.sh @@ -35,7 +35,7 @@ RUSTFLAGS="\ " \ cargo +nightly build \ -Z build-std=std,panic_abort \ - -Z build-std-features= \ + -Z build-std-features=optimize_for_size \ --target "$TARGET" \ --profile release-size \ -p size-benchmark \ From 1a789585a1206488099ccc3ecb82a249ad53eff2 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Fri, 24 Apr 2026 15:28:56 +0200 Subject: [PATCH 07/15] feat(size-benchmark): add compare-size.sh for PR binary size diffing --- size-benchmark/build-size-optimized.sh | 2 +- size-benchmark/cargo-bloat-optimized.sh | 43 ++++++++++ size-benchmark/compare-size.sh | 101 ++++++++++++++++++++++++ 3 files changed, 145 insertions(+), 1 deletion(-) create mode 100755 size-benchmark/cargo-bloat-optimized.sh create mode 100755 size-benchmark/compare-size.sh diff --git a/size-benchmark/build-size-optimized.sh b/size-benchmark/build-size-optimized.sh index f512f0f6ad..82324c3909 100755 --- a/size-benchmark/build-size-optimized.sh +++ b/size-benchmark/build-size-optimized.sh @@ -25,7 +25,7 @@ case "$OS" in *) echo "Unsupported OS: $OS" >&2; exit 1 ;; esac -rustup target add "$TARGET" --toolchain nightly 2>/dev/null || true +rustup target add "$TARGET" --toolchain nightly >/dev/null 2>&1 || true RUSTFLAGS="\ -Zunstable-options \ diff --git a/size-benchmark/cargo-bloat-optimized.sh b/size-benchmark/cargo-bloat-optimized.sh new file mode 100755 index 0000000000..56fac9386b --- /dev/null +++ b/size-benchmark/cargo-bloat-optimized.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +# Build the size-benchmark binary with the same aggressive size optimizations +# that our most critical users apply, so the measured size is representative. +# +# On Linux → builds for {host-arch}-unknown-linux-musl (static, musl libc) +# On macOS → builds for the native Darwin target (no musl available on macOS) +# +# Requires: rustup with nightly toolchain + the resolved target installed. +# On Linux the musl target also needs a musl C toolchain (e.g. musl-tools package). +# +# Usage: ./size-benchmark/build-size-optimized.sh [extra cargo args] +# Output: binary size in bytes on stdout (last line) + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +WORKSPACE_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +ARCH="$(uname -m | sed 's/arm64/aarch64/')" +OS="$(uname -s)" + +case "$OS" in + Linux) TARGET="${ARCH}-unknown-linux-musl" ;; + Darwin) TARGET="${ARCH}-apple-darwin" ;; + *) echo "Unsupported OS: $OS" >&2; exit 1 ;; +esac + +rustup target add "$TARGET" --toolchain nightly 2>/dev/null || true + +RUSTFLAGS="\ + -Zunstable-options \ + -Cpanic=immediate-abort \ + -Zlocation-detail=none \ + -Zfmt-debug=none \ +" \ +cargo +nightly bloat \ + -Z build-std=std,panic_abort \ + -Z build-std-features= \ + --target "$TARGET" \ + --profile release-size \ + -p size-benchmark \ + --manifest-path "$WORKSPACE_ROOT/Cargo.toml" \ + "$@" diff --git a/size-benchmark/compare-size.sh b/size-benchmark/compare-size.sh new file mode 100755 index 0000000000..48a3c9a3d9 --- /dev/null +++ b/size-benchmark/compare-size.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash +# Compare binary size of the size-benchmark between two git refs. +# +# Usage: +# ./size-benchmark/compare-size.sh --base --head [--output ] +# +# Output: markdown table printed to stdout (and optionally to --output file). + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +BASE_REF="" +HEAD_REF="" +OUTPUT_FILE="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --base) BASE_REF="$2"; shift 2 ;; + --head) HEAD_REF="$2"; shift 2 ;; + --output) OUTPUT_FILE="$2"; shift 2 ;; + *) echo "Unknown argument: $1" >&2; exit 1 ;; + esac +done + +if [[ -z "$BASE_REF" || -z "$HEAD_REF" ]]; then + echo "Usage: $0 --base --head [--output ]" >&2 + exit 1 +fi + +format_bytes() { + local b=$1 + if [[ $b -lt 1024 ]]; then echo "${b} B" + elif [[ $b -lt $((1024*1024)) ]]; then printf "%.2f KB\n" "$(echo "scale=4; $b/1024" | bc)" + else printf "%.2f MB\n" "$(echo "scale=4; $b/1024/1024" | bc)" + fi +} + +# Build a ref in a temporary worktree, print byte count to stdout. +build_ref() { + local ref="$1" + local label="$2" + local worktree + worktree="$(mktemp -d)" + + echo "Building $label ($(git -C "$REPO_ROOT" rev-parse --short "$ref"))…" >&2 + + git -C "$REPO_ROOT" worktree add --detach "$worktree" "$ref" 2>&1 | sed 's/^/ /' >&2 + + # cargo writes to stderr; wc -c is the only stdout line. + # Redirect build stderr → our stderr so CI logs show progress. + bash "$worktree/size-benchmark/build-size-optimized.sh" 2>&3 + # (stdout = byte count, captured by the caller via $()) + + git -C "$REPO_ROOT" worktree remove --force "$worktree" 2>/dev/null || true + rm -rf "$worktree" +} + +BASE_SHORT="$(git -C "$REPO_ROOT" rev-parse --short "$BASE_REF")" +HEAD_SHORT="$(git -C "$REPO_ROOT" rev-parse --short "$HEAD_REF")" + +BASE_BYTES="$(build_ref "$BASE_REF" "base" 3>&2)" +HEAD_BYTES="$(build_ref "$HEAD_REF" "head" 3>&2)" + +DIFF=$(( HEAD_BYTES - BASE_BYTES )) +DIFF_ABS=${DIFF#-} +[[ $DIFF -ge 0 ]] && SIGN="+" || SIGN="-" + +PCT="$(echo "scale=2; $DIFF * 100 / $BASE_BYTES" | bc)" +PCT_ABS="$(echo "$PCT" | sed 's/^-//')" + +BASE_FMT="$(format_bytes "$BASE_BYTES")" +HEAD_FMT="$(format_bytes "$HEAD_BYTES")" +DIFF_FMT="$(format_bytes "$DIFF_ABS")" + +THRESHOLD=2 +if (( $(echo "$PCT < -$THRESHOLD" | bc -l) )); then EMOJI="🎉" # significantly smaller +elif (( $(echo "$PCT < 0" | bc -l) )); then EMOJI="✅" # smaller, within noise +elif (( $(echo "$PCT == 0" | bc -l) )); then EMOJI="➡️" # unchanged +elif (( $(echo "$PCT <= $THRESHOLD" | bc -l) )); then EMOJI="➡️" # larger, within noise +elif (( $(echo "$PCT <= 10" | bc -l) )); then EMOJI="⚠️" # notable regression +else EMOJI="🚨" # large regression +fi + +TABLE="$(cat <&2 +echo "$TABLE" + +if [[ -n "$OUTPUT_FILE" ]]; then + echo "$TABLE" > "$OUTPUT_FILE" + echo "Written to $OUTPUT_FILE" >&2 +fi From a7cd730a03635893e9d7adb144fe7d8bf4ba2a43 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Fri, 24 Apr 2026 15:33:22 +0200 Subject: [PATCH 08/15] fix(size-benchmark): share cargo target dir between base and head builds --- size-benchmark/build-size-optimized.sh | 3 ++- size-benchmark/compare-size.sh | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/size-benchmark/build-size-optimized.sh b/size-benchmark/build-size-optimized.sh index 82324c3909..3180da9eab 100755 --- a/size-benchmark/build-size-optimized.sh +++ b/size-benchmark/build-size-optimized.sh @@ -42,5 +42,6 @@ cargo +nightly build \ --manifest-path "$WORKSPACE_ROOT/Cargo.toml" \ "$@" -BINARY="$WORKSPACE_ROOT/target/$TARGET/release-size/size-benchmark" +TARGET_DIR="${CARGO_TARGET_DIR:-$WORKSPACE_ROOT/target}" +BINARY="$TARGET_DIR/$TARGET/release-size/size-benchmark" wc -c < "$BINARY" diff --git a/size-benchmark/compare-size.sh b/size-benchmark/compare-size.sh index 48a3c9a3d9..8edcf388c0 100755 --- a/size-benchmark/compare-size.sh +++ b/size-benchmark/compare-size.sh @@ -49,8 +49,10 @@ build_ref() { git -C "$REPO_ROOT" worktree add --detach "$worktree" "$ref" 2>&1 | sed 's/^/ /' >&2 # cargo writes to stderr; wc -c is the only stdout line. + # Point CARGO_TARGET_DIR at the main worktree so both builds share the cache. # Redirect build stderr → our stderr so CI logs show progress. - bash "$worktree/size-benchmark/build-size-optimized.sh" 2>&3 + CARGO_TARGET_DIR="$REPO_ROOT/target" \ + bash "$worktree/size-benchmark/build-size-optimized.sh" 2>&3 # (stdout = byte count, captured by the caller via $()) git -C "$REPO_ROOT" worktree remove --force "$worktree" 2>/dev/null || true From e3be70c3ffd1cda75f3b169e75aebbf40bbb7ef7 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Fri, 24 Apr 2026 15:34:14 +0200 Subject: [PATCH 09/15] ci: add binary size comparison workflow for pull requests --- .github/workflows/pr-binary-size.yml | 37 ++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 .github/workflows/pr-binary-size.yml diff --git a/.github/workflows/pr-binary-size.yml b/.github/workflows/pr-binary-size.yml new file mode 100644 index 0000000000..b77a579c06 --- /dev/null +++ b/.github/workflows/pr-binary-size.yml @@ -0,0 +1,37 @@ +name: Binary Size + +on: + pull_request: + +jobs: + binary-size: + runs-on: ubuntu-latest + permissions: + pull-requests: write + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 # need full history to check out base ref + + - name: Install nightly + rust-src + musl target + run: | + rustup toolchain install nightly --component rust-src + rustup target add x86_64-unknown-linux-musl --toolchain nightly + + - name: Install musl linker + run: sudo apt-get install -y musl-tools + + - uses: Swatinem/rust-cache@23869a5bd66c73db3c0ac40331f3206eb23791dc # v2.9.1 + with: + cache-targets: true + + - name: Compare binary size + run: | + bash size-benchmark/compare-size.sh \ + --base ${{ github.event.pull_request.base.sha }} \ + --head ${{ github.sha }} \ + --output size-report.md + + - uses: marocchino/sticky-pull-request-comment@0ea0beb66eb9baf113663a64ec522f60e49231c0 # v3.0.4 + with: + path: size-report.md From c934b2f2acdb653170d2f2f7f55284f07102fbaf Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Fri, 24 Apr 2026 15:58:38 +0200 Subject: [PATCH 10/15] fix(ci): use enterprise-allowed action SHAs, replace sticky comment with github-script --- .github/workflows/pr-binary-size.yml | 34 ++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pr-binary-size.yml b/.github/workflows/pr-binary-size.yml index b77a579c06..f399f68292 100644 --- a/.github/workflows/pr-binary-size.yml +++ b/.github/workflows/pr-binary-size.yml @@ -9,7 +9,7 @@ jobs: permissions: pull-requests: write steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 # need full history to check out base ref @@ -21,7 +21,7 @@ jobs: - name: Install musl linker run: sudo apt-get install -y musl-tools - - uses: Swatinem/rust-cache@23869a5bd66c73db3c0ac40331f3206eb23791dc # v2.9.1 + - uses: Swatinem/rust-cache@f13886b937689c021905a6b90929199931d60db1 # v2.8.1 with: cache-targets: true @@ -32,6 +32,32 @@ jobs: --head ${{ github.sha }} \ --output size-report.md - - uses: marocchino/sticky-pull-request-comment@0ea0beb66eb9baf113663a64ec522f60e49231c0 # v3.0.4 + - name: Post PR comment + uses: actions/github-script@d746ffe35508b1917358783b479e04febd2b8f71 # v9.0.0 with: - path: size-report.md + script: | + const fs = require('fs'); + const body = fs.readFileSync('size-report.md', 'utf8'); + const marker = ''; + const comments = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + }); + const existing = comments.data.find(c => c.body.includes(marker)); + const fullBody = marker + '\n' + body; + if (existing) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: existing.id, + body: fullBody, + }); + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: fullBody, + }); + } From a61cce3692f3e69dbd2ef20f88ade06cfe3baa83 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Fri, 24 Apr 2026 16:04:39 +0200 Subject: [PATCH 11/15] fix(size-benchmark): always use head's build script, support WORKSPACE_ROOT override --- size-benchmark/build-size-optimized.sh | 2 +- size-benchmark/compare-size.sh | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/size-benchmark/build-size-optimized.sh b/size-benchmark/build-size-optimized.sh index 3180da9eab..67d5b39878 100755 --- a/size-benchmark/build-size-optimized.sh +++ b/size-benchmark/build-size-optimized.sh @@ -14,7 +14,7 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -WORKSPACE_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +WORKSPACE_ROOT="${WORKSPACE_ROOT:-$(cd "$SCRIPT_DIR/.." && pwd)}" ARCH="$(uname -m | sed 's/arm64/aarch64/')" OS="$(uname -s)" diff --git a/size-benchmark/compare-size.sh b/size-benchmark/compare-size.sh index 8edcf388c0..15822191ae 100755 --- a/size-benchmark/compare-size.sh +++ b/size-benchmark/compare-size.sh @@ -10,6 +10,7 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +BUILD_SCRIPT="$SCRIPT_DIR/build-size-optimized.sh" BASE_REF="" HEAD_REF="" @@ -49,10 +50,13 @@ build_ref() { git -C "$REPO_ROOT" worktree add --detach "$worktree" "$ref" 2>&1 | sed 's/^/ /' >&2 # cargo writes to stderr; wc -c is the only stdout line. + # Always use the script from the current checkout (base may not have it). + # Override WORKSPACE_ROOT so the script builds the worktree, not itself. # Point CARGO_TARGET_DIR at the main worktree so both builds share the cache. # Redirect build stderr → our stderr so CI logs show progress. CARGO_TARGET_DIR="$REPO_ROOT/target" \ - bash "$worktree/size-benchmark/build-size-optimized.sh" 2>&3 + WORKSPACE_ROOT="$worktree" \ + bash "$BUILD_SCRIPT" 2>&3 # (stdout = byte count, captured by the caller via $()) git -C "$REPO_ROOT" worktree remove --force "$worktree" 2>/dev/null || true From 13436ccc5e0018a602b1e0ff425ccffaac01e9a1 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Fri, 24 Apr 2026 16:09:02 +0200 Subject: [PATCH 12/15] fix(size-benchmark): stable worktree paths for cache hits, merge-base for base ref --- .github/workflows/pr-binary-size.yml | 4 +++- .gitignore | 1 + size-benchmark/compare-size.sh | 16 ++++++++-------- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/.github/workflows/pr-binary-size.yml b/.github/workflows/pr-binary-size.yml index f399f68292..6fa40743e1 100644 --- a/.github/workflows/pr-binary-size.yml +++ b/.github/workflows/pr-binary-size.yml @@ -24,11 +24,13 @@ jobs: - uses: Swatinem/rust-cache@f13886b937689c021905a6b90929199931d60db1 # v2.8.1 with: cache-targets: true + prefix-key: v0-rust-binary-size - name: Compare binary size run: | + BASE=$(git merge-base origin/main HEAD) bash size-benchmark/compare-size.sh \ - --base ${{ github.event.pull_request.base.sha }} \ + --base "$BASE" \ --head ${{ github.sha }} \ --output size-report.md diff --git a/.gitignore b/.gitignore index 5a4edd14ce..8ec220b366 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,4 @@ examples/cxx/exporter_manager.exe examples/cxx/profiling examples/cxx/profiling.exe profile.pprof +.worktree-size-* diff --git a/size-benchmark/compare-size.sh b/size-benchmark/compare-size.sh index 15822191ae..a01133aa15 100755 --- a/size-benchmark/compare-size.sh +++ b/size-benchmark/compare-size.sh @@ -38,26 +38,26 @@ format_bytes() { fi } -# Build a ref in a temporary worktree, print byte count to stdout. +# Build a ref in a worktree placed inside the repo root so that Cargo's +# path-based fingerprints are stable across runs (no /tmp/tmp.xxx/ variance). build_ref() { local ref="$1" local label="$2" - local worktree - worktree="$(mktemp -d)" + local short + short="$(git -C "$REPO_ROOT" rev-parse --short "$ref")" + local worktree="$REPO_ROOT/.worktree-size-$label" - echo "Building $label ($(git -C "$REPO_ROOT" rev-parse --short "$ref"))…" >&2 + echo "Building $label ($short)…" >&2 git -C "$REPO_ROOT" worktree add --detach "$worktree" "$ref" 2>&1 | sed 's/^/ /' >&2 # cargo writes to stderr; wc -c is the only stdout line. # Always use the script from the current checkout (base may not have it). - # Override WORKSPACE_ROOT so the script builds the worktree, not itself. - # Point CARGO_TARGET_DIR at the main worktree so both builds share the cache. - # Redirect build stderr → our stderr so CI logs show progress. + # Override WORKSPACE_ROOT so the script builds the worktree's sources. + # CARGO_TARGET_DIR is fixed so both refs share the same build cache. CARGO_TARGET_DIR="$REPO_ROOT/target" \ WORKSPACE_ROOT="$worktree" \ bash "$BUILD_SCRIPT" 2>&3 - # (stdout = byte count, captured by the caller via $()) git -C "$REPO_ROOT" worktree remove --force "$worktree" 2>/dev/null || true rm -rf "$worktree" From dd618ff48ccc3d67bd04010ccbff50d3eb4cff67 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Fri, 24 Apr 2026 16:13:47 +0200 Subject: [PATCH 13/15] fix(size-benchmark): handle missing base crate gracefully, improve cache config --- .github/workflows/pr-binary-size.yml | 4 +- size-benchmark/compare-size.sh | 59 +++++++++++++++++++--------- 2 files changed, 43 insertions(+), 20 deletions(-) diff --git a/.github/workflows/pr-binary-size.yml b/.github/workflows/pr-binary-size.yml index 6fa40743e1..9f92d69184 100644 --- a/.github/workflows/pr-binary-size.yml +++ b/.github/workflows/pr-binary-size.yml @@ -23,8 +23,10 @@ jobs: - uses: Swatinem/rust-cache@f13886b937689c021905a6b90929199931d60db1 # v2.8.1 with: - cache-targets: true prefix-key: v0-rust-binary-size + cache-targets: true + cache-on-failure: true + workspaces: ". -> target" - name: Compare binary size run: | diff --git a/size-benchmark/compare-size.sh b/size-benchmark/compare-size.sh index a01133aa15..b665955231 100755 --- a/size-benchmark/compare-size.sh +++ b/size-benchmark/compare-size.sh @@ -55,6 +55,15 @@ build_ref() { # Always use the script from the current checkout (base may not have it). # Override WORKSPACE_ROOT so the script builds the worktree's sources. # CARGO_TARGET_DIR is fixed so both refs share the same build cache. + # If this ref predates the size-benchmark crate, return 0 (not an error). + if ! grep -q '"size-benchmark"' "$worktree/Cargo.toml" 2>/dev/null; then + echo " (size-benchmark not present on $label, skipping)" >&2 + git -C "$REPO_ROOT" worktree remove --force "$worktree" 2>/dev/null || true + rm -rf "$worktree" + echo "0" + return + fi + CARGO_TARGET_DIR="$REPO_ROOT/target" \ WORKSPACE_ROOT="$worktree" \ bash "$BUILD_SCRIPT" 2>&3 @@ -69,27 +78,38 @@ HEAD_SHORT="$(git -C "$REPO_ROOT" rev-parse --short "$HEAD_REF")" BASE_BYTES="$(build_ref "$BASE_REF" "base" 3>&2)" HEAD_BYTES="$(build_ref "$HEAD_REF" "head" 3>&2)" -DIFF=$(( HEAD_BYTES - BASE_BYTES )) -DIFF_ABS=${DIFF#-} -[[ $DIFF -ge 0 ]] && SIGN="+" || SIGN="-" - -PCT="$(echo "scale=2; $DIFF * 100 / $BASE_BYTES" | bc)" -PCT_ABS="$(echo "$PCT" | sed 's/^-//')" - -BASE_FMT="$(format_bytes "$BASE_BYTES")" HEAD_FMT="$(format_bytes "$HEAD_BYTES")" -DIFF_FMT="$(format_bytes "$DIFF_ABS")" - -THRESHOLD=2 -if (( $(echo "$PCT < -$THRESHOLD" | bc -l) )); then EMOJI="🎉" # significantly smaller -elif (( $(echo "$PCT < 0" | bc -l) )); then EMOJI="✅" # smaller, within noise -elif (( $(echo "$PCT == 0" | bc -l) )); then EMOJI="➡️" # unchanged -elif (( $(echo "$PCT <= $THRESHOLD" | bc -l) )); then EMOJI="➡️" # larger, within noise -elif (( $(echo "$PCT <= 10" | bc -l) )); then EMOJI="⚠️" # notable regression -else EMOJI="🚨" # large regression -fi -TABLE="$(cat <&2 echo "$TABLE" From f95d8818c73e72cdf8e04c28e64b92456bfd2ec3 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Fri, 24 Apr 2026 16:17:32 +0200 Subject: [PATCH 14/15] fix(size-benchmark): revert to build-std-features= to strip backtrace, fix musl link --- size-benchmark/build-size-optimized.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/size-benchmark/build-size-optimized.sh b/size-benchmark/build-size-optimized.sh index 67d5b39878..ed59875ef5 100755 --- a/size-benchmark/build-size-optimized.sh +++ b/size-benchmark/build-size-optimized.sh @@ -35,7 +35,7 @@ RUSTFLAGS="\ " \ cargo +nightly build \ -Z build-std=std,panic_abort \ - -Z build-std-features=optimize_for_size \ + -Z build-std-features= \ --target "$TARGET" \ --profile release-size \ -p size-benchmark \ From 64b1f60aae48f703d3474abb8b21941cd7033a33 Mon Sep 17 00:00:00 2001 From: Oscar Le Dauphin Date: Fri, 24 Apr 2026 16:21:28 +0200 Subject: [PATCH 15/15] fix(size-benchmark): use linux-gnu target on Linux, musl incompatible with workspace C libs --- .github/workflows/pr-binary-size.yml | 9 ++------- size-benchmark/build-size-optimized.sh | 2 +- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/.github/workflows/pr-binary-size.yml b/.github/workflows/pr-binary-size.yml index 9f92d69184..aed4573207 100644 --- a/.github/workflows/pr-binary-size.yml +++ b/.github/workflows/pr-binary-size.yml @@ -13,13 +13,8 @@ jobs: with: fetch-depth: 0 # need full history to check out base ref - - name: Install nightly + rust-src + musl target - run: | - rustup toolchain install nightly --component rust-src - rustup target add x86_64-unknown-linux-musl --toolchain nightly - - - name: Install musl linker - run: sudo apt-get install -y musl-tools + - name: Install nightly + rust-src + run: rustup toolchain install nightly --component rust-src - uses: Swatinem/rust-cache@f13886b937689c021905a6b90929199931d60db1 # v2.8.1 with: diff --git a/size-benchmark/build-size-optimized.sh b/size-benchmark/build-size-optimized.sh index ed59875ef5..ec79fbdc81 100755 --- a/size-benchmark/build-size-optimized.sh +++ b/size-benchmark/build-size-optimized.sh @@ -20,7 +20,7 @@ ARCH="$(uname -m | sed 's/arm64/aarch64/')" OS="$(uname -s)" case "$OS" in - Linux) TARGET="${ARCH}-unknown-linux-musl" ;; + Linux) TARGET="${ARCH}-unknown-linux-gnu" ;; Darwin) TARGET="${ARCH}-apple-darwin" ;; *) echo "Unsupported OS: $OS" >&2; exit 1 ;; esac