From c1f854ff40f140eba4e19e5bb7a71f0c9147bbe5 Mon Sep 17 00:00:00 2001 From: fastio Date: Mon, 2 Mar 2026 18:12:31 +0800 Subject: [PATCH 1/7] feat: add clickhouse-bench with auto-downloaded ClickHouse binary Introduce a new clickhouse-bench benchmark crate that runs ClickBench queries against Parquet data via clickhouse-local, providing a baseline for comparing Vortex performance against ClickHouse. Key design decisions: - build.rs auto-downloads the full ClickHouse binary (with Parquet support) into target/clickhouse-local/, similar to how vortex-duckdb downloads the DuckDB library. This eliminates manual install steps and avoids issues with slim/homebrew builds lacking Parquet support. - The binary path is baked in via CLICKHOUSE_BINARY env at compile time; CLICKHOUSE_LOCAL env var allows runtime override. - ClickHouse-dialect SQL queries are maintained in a separate clickbench_clickhouse_queries.sql file (43 queries). - CI workflows updated to include clickhouse:parquet target in ClickBench benchmarks and conditionally build clickhouse-bench. Signed-off-by: fastio --- .github/scripts/run-sql-bench.sh | 20 +- .github/workflows/bench.yml | 2 +- .github/workflows/sql-benchmarks.yml | 5 +- Cargo.lock | 12 + Cargo.toml | 1 + benchmarks/clickhouse-bench/Cargo.toml | 25 ++ benchmarks/clickhouse-bench/build.rs | 117 ++++++++++ benchmarks/clickhouse-bench/src/lib.rs | 216 ++++++++++++++++++ benchmarks/clickhouse-bench/src/main.rs | 105 +++++++++ .../clickbench_clickhouse_queries.sql | 43 ++++ vortex-bench/src/clickbench/benchmark.rs | 30 ++- vortex-bench/src/lib.rs | 4 + 12 files changed, 570 insertions(+), 10 deletions(-) create mode 100644 benchmarks/clickhouse-bench/Cargo.toml create mode 100644 benchmarks/clickhouse-bench/build.rs create mode 100644 benchmarks/clickhouse-bench/src/lib.rs create mode 100644 benchmarks/clickhouse-bench/src/main.rs create mode 100644 vortex-bench/clickbench_clickhouse_queries.sql diff --git a/.github/scripts/run-sql-bench.sh b/.github/scripts/run-sql-bench.sh index 93e96cb89dd..73d2a26d962 100755 --- a/.github/scripts/run-sql-bench.sh +++ b/.github/scripts/run-sql-bench.sh @@ -2,8 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright the Vortex contributors # -# Runs SQL benchmarks (datafusion-bench, duckdb-bench, lance-bench) for the given targets. -# This script is used by the sql-benchmarks.yml workflow. +# Runs SQL benchmarks (datafusion-bench, duckdb-bench, lance-bench, clickhouse-bench) +# for the given targets. This script is used by the sql-benchmarks.yml workflow. # # Usage: # run-sql-bench.sh [options] @@ -11,12 +11,12 @@ # Arguments: # subcommand The benchmark subcommand (e.g., tpch, clickbench, tpcds) # targets Comma-separated list of engine:format pairs -# (e.g., "datafusion:parquet,datafusion:vortex,duckdb:parquet") +# (e.g., "datafusion:parquet,datafusion:vortex,duckdb:parquet,clickhouse:parquet") # # Options: # --scale-factor Scale factor for the benchmark (e.g., 1.0, 10.0) # --remote-storage Remote storage URL (e.g., s3://bucket/path/) -# If provided, runs in remote mode (no lance support). +# If provided, runs in remote mode (no lance/clickhouse support). # --benchmark-id Benchmark ID for error messages (e.g., tpch-s3) set -Eeu -o pipefail @@ -78,6 +78,7 @@ fi df_formats=$(echo "$targets" | tr ',' '\n' | (grep '^datafusion:' | grep -v ':lance$' || true) | sed 's/datafusion://' | tr '\n' ',' | sed 's/,$//') ddb_formats=$(echo "$targets" | tr ',' '\n' | (grep '^duckdb:' || true) | sed 's/duckdb://' | tr '\n' ',' | sed 's/,$//') has_lance=$(echo "$targets" | grep -q 'datafusion:lance' && echo "true" || echo "false") +has_clickhouse=$(echo "$targets" | grep -q '^clickhouse:' && echo "true" || echo "false") # Build options string. opts="" @@ -127,3 +128,14 @@ if ! $is_remote && [[ "$has_lance" == "true" ]] && [[ -f "target/release_debug/l cat lance-results.json >> results.json fi + +# ClickHouse-bench only runs for local benchmarks (clickhouse-local reads local files). +if ! $is_remote && [[ "$has_clickhouse" == "true" ]] && [[ -f "target/release_debug/clickhouse-bench" ]]; then + # shellcheck disable=SC2086 + target/release_debug/clickhouse-bench \ + -d gh-json \ + $opts \ + -o ch-results.json + + cat ch-results.json >> results.json +fi diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 68f97854045..5507d268e8a 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -110,7 +110,7 @@ jobs: "id": "clickbench-nvme", "subcommand": "clickbench", "name": "Clickbench on NVME", - "targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,datafusion:lance,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact,duckdb:duckdb", + "targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,datafusion:lance,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact,duckdb:duckdb,clickhouse:parquet", "build_lance": true }, { diff --git a/.github/workflows/sql-benchmarks.yml b/.github/workflows/sql-benchmarks.yml index 030953c100b..ecdcc069096 100644 --- a/.github/workflows/sql-benchmarks.yml +++ b/.github/workflows/sql-benchmarks.yml @@ -21,7 +21,7 @@ on: "id": "clickbench-nvme", "subcommand": "clickbench", "name": "Clickbench on NVME", - "targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact,duckdb:duckdb" + "targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact,duckdb:duckdb,clickhouse:parquet" }, { "id": "tpch-nvme", @@ -136,6 +136,9 @@ jobs: if [ "${{ matrix.build_lance }}" = "true" ]; then packages="$packages --bin lance-bench" fi + if echo "${{ matrix.targets }}" | grep -q 'clickhouse:'; then + packages="$packages --bin clickhouse-bench" + fi cargo build $packages --profile release_debug - name: Generate data diff --git a/Cargo.lock b/Cargo.lock index 091dff9418d..95fe9ecf59b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1582,6 +1582,18 @@ version = "0.7.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" +[[package]] +name = "clickhouse-bench" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "reqwest", + "tokio", + "tracing", + "vortex-bench", +] + [[package]] name = "cmake" version = "0.1.57" diff --git a/Cargo.toml b/Cargo.toml index ba9a0268b87..8b1186c5f00 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -51,6 +51,7 @@ members = [ "encodings/zstd", "encodings/bytebool", # Benchmarks + "benchmarks/clickhouse-bench", "benchmarks/lance-bench", "benchmarks/compress-bench", "benchmarks/datafusion-bench", diff --git a/benchmarks/clickhouse-bench/Cargo.toml b/benchmarks/clickhouse-bench/Cargo.toml new file mode 100644 index 00000000000..789cce5a69f --- /dev/null +++ b/benchmarks/clickhouse-bench/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "clickhouse-bench" +description = "ClickHouse (clickhouse-local) benchmark runner for Vortex" +authors.workspace = true +edition.workspace = true +homepage.workspace = true +license.workspace = true +readme.workspace = true +repository.workspace = true +rust-version.workspace = true +version.workspace = true +publish = false + +[dependencies] +anyhow = { workspace = true } +clap = { workspace = true, features = ["derive"] } +tokio = { workspace = true, features = ["full"] } +tracing = { workspace = true } +vortex-bench = { workspace = true } + +[build-dependencies] +reqwest = { workspace = true, features = ["blocking"] } + +[lints] +workspace = true diff --git a/benchmarks/clickhouse-bench/build.rs b/benchmarks/clickhouse-bench/build.rs new file mode 100644 index 00000000000..917d248f9d8 --- /dev/null +++ b/benchmarks/clickhouse-bench/build.rs @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Build script that downloads a full ClickHouse binary (with Parquet support) +//! into the target directory. The binary path is exported via +//! `cargo:rustc-env=CLICKHOUSE_BINARY=...` so that `lib.rs` can locate it at runtime +//! via `env!("CLICKHOUSE_BINARY")` without any user-installed dependency. +//! +//! The approach mirrors `vortex-duckdb/build.rs` which auto-downloads a DuckDB dylib. +//! +//! Resolution order: +//! 1. `CLICKHOUSE_LOCAL` env var — use as-is (skip download). +//! 2. Download from `builds.clickhouse.com` (official master builds) into +//! `target/clickhouse-local/clickhouse`. +//! +//! We use the official master builds because macOS binaries are only available +//! from `builds.clickhouse.com`, not from the tgz/stable package repos. + +#![allow(clippy::unwrap_used)] +#![allow(clippy::expect_used)] +#![allow(clippy::panic)] + +use std::env; +use std::fs; +use std::os::unix::fs::PermissionsExt; +use std::path::PathBuf; + +/// Returns the download URL for the clickhouse binary based on the compilation target. +fn download_url() -> Result> { + let target = env::var("TARGET")?; + let dir = match target.as_str() { + "x86_64-apple-darwin" => "macos", + "aarch64-apple-darwin" => "macos-aarch64", + "x86_64-unknown-linux-gnu" => "amd64", + "aarch64-unknown-linux-gnu" => "aarch64", + other => return Err(format!("Unsupported target for clickhouse download: {other}").into()), + }; + Ok(format!( + "https://builds.clickhouse.com/master/{dir}/clickhouse" + )) +} + +/// Get the base target directory for ClickHouse artifacts. +fn target_dir() -> PathBuf { + let manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); + manifest_dir.parent().unwrap().parent().unwrap().join("target") +} + +fn main() { + println!("cargo:rerun-if-env-changed=CLICKHOUSE_LOCAL"); + + // If the user explicitly provides a binary path, just export it. + if let Ok(path) = env::var("CLICKHOUSE_LOCAL") { + println!("cargo:rustc-env=CLICKHOUSE_BINARY={path}"); + return; + } + + let ch_dir = target_dir().join("clickhouse-local"); + let binary_path = ch_dir.join("clickhouse"); + + // If the binary already exists (and is executable), skip download. + if binary_path.exists() { + println!("cargo:rustc-env=CLICKHOUSE_BINARY={}", binary_path.display()); + return; + } + + // Download the full ClickHouse binary. + let url = download_url().expect("Failed to determine clickhouse download URL"); + println!("cargo:warning=Downloading ClickHouse binary from {url} (this may take a minute)..."); + + fs::create_dir_all(&ch_dir).expect("Failed to create clickhouse-local directory"); + + let timeout_secs: u64 = env::var("CARGO_HTTP_TIMEOUT") + .or_else(|_| env::var("HTTP_TIMEOUT")) + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(300); // 5 minute timeout for ~160MB download + + let client = reqwest::blocking::Client::builder() + .timeout(std::time::Duration::from_secs(timeout_secs)) + .build() + .expect("Failed to create HTTP client"); + + let response = client + .get(&url) + .send() + .expect("Failed to download ClickHouse binary"); + + assert!( + response.status().is_success(), + "Failed to download ClickHouse binary: HTTP {}", + response.status() + ); + + let bytes = response + .bytes() + .expect("Failed to read ClickHouse binary response body"); + + // Write to a temporary file first, then rename (atomic on same filesystem). + let tmp_path = ch_dir.join("clickhouse.tmp"); + fs::write(&tmp_path, &bytes).expect("Failed to write ClickHouse binary"); + + // Make it executable (0o755). + let mut perms = fs::metadata(&tmp_path) + .expect("Failed to read tmp binary metadata") + .permissions(); + perms.set_mode(0o755); + fs::set_permissions(&tmp_path, perms).expect("Failed to set executable permissions"); + + fs::rename(&tmp_path, &binary_path).expect("Failed to rename ClickHouse binary into place"); + + println!("cargo:rustc-env=CLICKHOUSE_BINARY={}", binary_path.display()); + println!( + "cargo:warning=ClickHouse binary downloaded to {}", + binary_path.display() + ); +} diff --git a/benchmarks/clickhouse-bench/src/lib.rs b/benchmarks/clickhouse-bench/src/lib.rs new file mode 100644 index 00000000000..6f622b9c3a1 --- /dev/null +++ b/benchmarks/clickhouse-bench/src/lib.rs @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! ClickHouse Local context for benchmarks. +//! +//! Uses `clickhouse-local` via `std::process::Command` to execute SQL queries +//! against Parquet files on disk. +//! +//! The ClickHouse binary is **automatically downloaded** at build time by `build.rs` +//! (similar to how `vortex-duckdb/build.rs` downloads the DuckDB dynamic library). +//! No manual installation is required. +//! +//! ## Scan API Evaluation for ClickHouse Integration +//! +//! Per @gatesn's request in Discussion #6425, we evaluated whether the Vortex Scan API +//! (`vortex-scan/src/api.rs`) can support a good ClickHouse integration. +//! +//! ### Mapping +//! +//! The Scan API's four-layer abstraction maps naturally to ClickHouse: +//! +//! | Scan API | ClickHouse mapping | +//! |---|---| +//! | `DataSource` | Table metadata + connection config (`Send + Sync`, shareable) | +//! | `ScanRequest.projection` | `SELECT` column/expression pushdown (needs `Expression` → SQL converter) | +//! | `ScanRequest.filter` | `WHERE` clause pushdown (similar to `vortex-datafusion/convert/exprs.rs`) | +//! | `ScanRequest.limit` | `LIMIT N` pushdown (trivial) | +//! | `DataSourceScan` | Query planning + partition discovery (`system.parts` or file-level) | +//! | `Split` | Per-partition query execution unit | +//! | `Split::execute()` | Executes partition query, streams results as `SendableArrayStream` | +//! +//! ### Potential API Gaps +//! +//! 1. **No engine capability negotiation** — `DataSource` cannot declare which expression types +//! it supports for pushdown. Suggest adding `capabilities()` method. +//! 2. **`Split::execute()` is sync** — ClickHouse queries are inherently async (network I/O). +//! The pattern used by `LayoutReaderDataSource` (pre-compute `BoxFuture` in `scan()`) works +//! but should be documented as the recommended approach. +//! 3. **No column statistics API** — only `row_count_estimate()` exists. ClickHouse has rich +//! column stats (min/max/NDV) that could enable better query planning. +//! 4. **No transaction/snapshot semantics** — could lead to inconsistent reads across splits +//! on ClickHouse replicas. +//! +//! ### Conclusion +//! +//! The Scan API is a reasonable fit. None of the gaps are blockers. The recommended integration +//! order is: +//! 1. This PR: ClickBench baseline with `clickhouse-local` CLI (performance reference) +//! 2. `vortex-clickhouse` crate with type conversion (DType ↔ ClickHouse types) +//! 3. `ClickHouseDataSource` implementing `DataSource` trait (basic scan, no pushdown) +//! 4. Filter pushdown (`Expression` → ClickHouse WHERE clause) +//! 5. Projection pushdown and performance optimization + +use std::io::Write; +use std::path::PathBuf; +use std::process::Command; +use std::process::Stdio; +use std::time::Duration; +use std::time::Instant; + +use anyhow::Context; +use anyhow::Result; +use tracing::trace; +use vortex_bench::Benchmark; +use vortex_bench::Format; + +/// Path to the ClickHouse binary, set by build.rs at compile time. +const CLICKHOUSE_BINARY: &str = env!("CLICKHOUSE_BINARY"); + +/// A client that wraps `clickhouse-local` for running SQL benchmarks. +pub struct ClickHouseClient { + /// The path to the `clickhouse` binary. + binary: PathBuf, + /// SQL statements to run before each query (CREATE VIEW statements). + setup_sql: Vec, +} + +impl ClickHouseClient { + /// Create a new client. Only Parquet format is supported. + pub fn new(benchmark: &dyn Benchmark, format: Format) -> Result { + if format != Format::Parquet { + anyhow::bail!("clickhouse-bench only supports Parquet format, got {format}"); + } + + let binary = PathBuf::from(CLICKHOUSE_BINARY); + anyhow::ensure!( + binary.exists(), + "ClickHouse binary not found at '{}'. \ + This should have been downloaded by build.rs. Try `cargo clean -p clickhouse-bench`.", + binary.display() + ); + + tracing::info!(binary = %binary.display(), "Using clickhouse-local"); + + let mut client = Self { + binary, + setup_sql: Vec::new(), + }; + client.register_tables(benchmark, format)?; + Ok(client) + } + + /// Generate `CREATE VIEW ... AS SELECT * FROM file(...)` statements. + /// + /// We use a VIEW over the `file()` table function rather than `CREATE TABLE ... ENGINE = File()` + /// because the `file()` function handles glob patterns (e.g., `*.parquet`) more reliably across + /// ClickHouse versions. + fn register_tables(&mut self, benchmark: &dyn Benchmark, format: Format) -> Result<()> { + let data_url = benchmark.data_url(); + let base_dir = if data_url.scheme() == "file" { + data_url + .to_file_path() + .map_err(|_| anyhow::anyhow!("Invalid file URL: {data_url}"))? + } else { + anyhow::bail!("clickhouse-bench only supports local file:// data URLs"); + }; + + let format_dir = base_dir.join(format.name()); + if !format_dir.exists() { + anyhow::bail!( + "Data directory does not exist: {}. Run data generation first.", + format_dir.display() + ); + } + + for table_spec in benchmark.table_specs() { + let name = table_spec.name; + let pattern = benchmark + .pattern(name, format) + .map(|p| p.to_string()) + .unwrap_or_else(|| format!("*.{}", format.ext())); + + let data_path = format!("{}/{}", format_dir.display(), pattern); + + tracing::info!( + table = name, + path = %data_path, + "Registering ClickHouse table" + ); + + let create_sql = format!( + "CREATE VIEW IF NOT EXISTS {name} AS \ + SELECT * FROM file('{data_path}', Parquet);" + ); + self.setup_sql.push(create_sql); + } + + Ok(()) + } + + /// Execute a SQL query via `clickhouse-local`, returning `(row_count, timing)`. + /// + /// The approach: + /// 1. Prepend all CREATE VIEW statements + /// 2. Append the benchmark query + /// 3. Pipe the combined SQL into `clickhouse local` via stdin + /// 4. Parse stdout to count result rows + pub fn execute_query(&self, query: &str) -> Result<(usize, Option)> { + trace!("execute clickhouse query: {query}"); + + // Build the full SQL: setup views + the actual query + let mut full_sql = String::new(); + for stmt in &self.setup_sql { + full_sql.push_str(stmt); + full_sql.push('\n'); + } + full_sql.push_str(query); + // Ensure we have a trailing semicolon + if !query.trim_end().ends_with(';') { + full_sql.push(';'); + } + + let time_instant = Instant::now(); + + // The downloaded binary is the multi-tool `clickhouse` binary, + // so we always invoke it as `clickhouse local`. + let mut child = Command::new(&self.binary) + .args(["local", "--format", "TabSeparated"]) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .context("Failed to spawn clickhouse-local")?; + + // Write SQL to stdin + { + let stdin = child + .stdin + .as_mut() + .context("Failed to open clickhouse-local stdin")?; + stdin + .write_all(full_sql.as_bytes()) + .context("Failed to write SQL to clickhouse-local stdin")?; + } + + let output = child + .wait_with_output() + .context("Failed to wait for clickhouse-local")?; + + let query_time = time_instant.elapsed(); + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + anyhow::bail!( + "clickhouse-local failed (exit {}): {stderr}", + output.status.code().unwrap_or(-1) + ); + } + + // Count non-empty lines in stdout as row count + let stdout = String::from_utf8_lossy(&output.stdout); + let row_count = stdout.lines().filter(|line| !line.is_empty()).count(); + + Ok((row_count, Some(query_time))) + } +} diff --git a/benchmarks/clickhouse-bench/src/main.rs b/benchmarks/clickhouse-bench/src/main.rs new file mode 100644 index 00000000000..03e8db27fd6 --- /dev/null +++ b/benchmarks/clickhouse-bench/src/main.rs @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::path::PathBuf; + +use clap::Parser; +use clickhouse_bench::ClickHouseClient; +use tokio::runtime::Runtime; +use vortex_bench::Benchmark; +use vortex_bench::Engine; +use vortex_bench::Format; +use vortex_bench::Opt; +use vortex_bench::Opts; +use vortex_bench::clickbench::ClickBenchBenchmark; +use vortex_bench::clickbench::Flavor; +use vortex_bench::create_output_writer; +use vortex_bench::display::DisplayFormat; +use vortex_bench::runner::SqlBenchmarkRunner; +use vortex_bench::runner::filter_queries; +use vortex_bench::setup_logging_and_tracing; + +/// ClickHouse (clickhouse-local) benchmark runner. +/// +/// Runs ClickBench queries against Parquet data using clickhouse-local as a performance baseline. +/// This allows comparing ClickHouse's native Parquet reading performance against other engines +/// (DuckDB, DataFusion) on the same hardware and dataset. +#[derive(Parser)] +struct Args { + #[arg(short, long, default_value_t = 5)] + iterations: usize, + + #[arg(short, long)] + verbose: bool, + + #[arg(long)] + tracing: bool, + + #[arg(short, long, default_value_t, value_enum)] + display_format: DisplayFormat, + + #[arg(short, long, value_delimiter = ',')] + queries: Option>, + + #[arg(short, long, value_delimiter = ',')] + exclude_queries: Option>, + + #[arg(short)] + output_path: Option, + + #[arg(long, default_value_t = false)] + track_memory: bool, + + #[arg(long, default_value_t = false)] + hide_progress_bar: bool, + + #[arg(long = "opt", value_delimiter = ',', value_parser = clap::value_parser!(Opt))] + options: Vec, +} + +fn main() -> anyhow::Result<()> { + let args = Args::parse(); + let opts = Opts::from(args.options); + + setup_logging_and_tracing(args.verbose, args.tracing)?; + + let flavor = opts.get_as::("flavor").unwrap_or_default(); + let remote_data_dir = opts.get_as::("remote-data-dir"); + let benchmark = + ClickBenchBenchmark::new(flavor, None, remote_data_dir)?.with_engine(Engine::ClickHouse); + + let filtered_queries = filter_queries( + benchmark.queries()?, + args.queries.as_ref(), + args.exclude_queries.as_ref(), + ); + + // Generate base Parquet data if needed. + if benchmark.data_url().scheme() == "file" { + let runtime = Runtime::new()?; + runtime.block_on(async { benchmark.generate_base_data().await })?; + } + + let formats = vec![Format::Parquet]; + + let mut runner = SqlBenchmarkRunner::new( + &benchmark, + Engine::ClickHouse, + formats, + args.track_memory, + args.hide_progress_bar, + )?; + + runner.run_all( + &filtered_queries, + args.iterations, + |format| ClickHouseClient::new(&benchmark, format), + |ctx, _query_idx, _format, query| ctx.execute_query(query), + )?; + + let benchmark_id = format!("clickhouse-{}", benchmark.dataset_name()); + let writer = create_output_writer(&args.display_format, args.output_path, &benchmark_id)?; + runner.export_to(&args.display_format, writer)?; + + Ok(()) +} diff --git a/vortex-bench/clickbench_clickhouse_queries.sql b/vortex-bench/clickbench_clickhouse_queries.sql new file mode 100644 index 00000000000..31f65fc898d --- /dev/null +++ b/vortex-bench/clickbench_clickhouse_queries.sql @@ -0,0 +1,43 @@ +SELECT COUNT(*) FROM hits; +SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; +SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; +SELECT AVG(UserID) FROM hits; +SELECT COUNT(DISTINCT UserID) FROM hits; +SELECT COUNT(DISTINCT SearchPhrase) FROM hits; +SELECT MIN(EventDate), MAX(EventDate) FROM hits; +SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; +SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; +SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; +SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; +SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; +SELECT UserID FROM hits WHERE UserID = 435090932899640449; +SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; +SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; +SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; +SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; +SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; +SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; +SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; +SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; +SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; +SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; +SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; +SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; +SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/vortex-bench/src/clickbench/benchmark.rs b/vortex-bench/src/clickbench/benchmark.rs index 5e14cbcf40e..4f4a143b92a 100644 --- a/vortex-bench/src/clickbench/benchmark.rs +++ b/vortex-bench/src/clickbench/benchmark.rs @@ -4,6 +4,7 @@ use std::env; use std::fs; use std::path::Path; +use std::path::PathBuf; use anyhow::Result; use reqwest::Client; @@ -12,6 +13,7 @@ use vortex::error::VortexExpect; use crate::Benchmark; use crate::BenchmarkDataset; +use crate::Engine; use crate::IdempotentPath; use crate::TableSpec; use crate::clickbench::*; @@ -21,6 +23,8 @@ pub struct ClickBenchBenchmark { pub flavor: Flavor, pub queries_file: Option, pub data_url: Url, + /// Override the engine to select engine-specific query files. + pub engine: Option, } impl ClickBenchBenchmark { @@ -34,9 +38,30 @@ impl ClickBenchBenchmark { flavor, queries_file, data_url: url, + engine: None, }) } + /// Set the engine to select engine-specific query files. + pub fn with_engine(mut self, engine: Engine) -> Self { + self.engine = Some(engine); + self + } + + /// Returns the path to the queries file for the given engine. + fn queries_file_path(&self) -> PathBuf { + if let Some(file) = &self.queries_file { + return file.into(); + } + let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR")); + match self.engine { + Some(Engine::ClickHouse) => { + manifest_dir.join("clickbench_clickhouse_queries.sql") + } + _ => manifest_dir.join("clickbench_queries.sql"), + } + } + fn create_data_url(remote_data_dir: &Option, flavor: Flavor) -> Result { match remote_data_dir { None => { @@ -69,10 +94,7 @@ impl ClickBenchBenchmark { #[async_trait::async_trait] impl Benchmark for ClickBenchBenchmark { fn queries(&self) -> Result> { - let queries_filepath = match &self.queries_file { - Some(file) => file.into(), - None => Path::new(env!("CARGO_MANIFEST_DIR")).join("clickbench_queries.sql"), - }; + let queries_filepath = self.queries_file_path(); Ok(fs::read_to_string(queries_filepath)? .split(';') diff --git a/vortex-bench/src/lib.rs b/vortex-bench/src/lib.rs index 6dad0f0f6a1..8be4c6bcea8 100644 --- a/vortex-bench/src/lib.rs +++ b/vortex-bench/src/lib.rs @@ -206,6 +206,9 @@ pub enum Engine { #[clap(name = "duckdb")] #[serde(rename = "duckdb")] DuckDB, + #[clap(name = "clickhouse")] + #[serde(rename = "clickhouse")] + ClickHouse, } impl Display for Engine { @@ -213,6 +216,7 @@ impl Display for Engine { match self { Engine::DataFusion => write!(f, "datafusion"), Engine::DuckDB => write!(f, "duckdb"), + Engine::ClickHouse => write!(f, "clickhouse"), Engine::Vortex => write!(f, "vortex"), Engine::Arrow => write!(f, "arrow"), } From 38d2fe7c6003d41f15091566f938f1ac60f278f6 Mon Sep 17 00:00:00 2001 From: fastio Date: Tue, 3 Mar 2026 10:16:59 +0800 Subject: [PATCH 2/7] bench(clickbench): tighten ClickHouse query normalization and URL handling Signed-off-by: fastio --- .../clickbench_clickhouse_queries.sql | 43 --------- vortex-bench/src/clickbench/benchmark.rs | 93 ++++++++++++++++--- 2 files changed, 78 insertions(+), 58 deletions(-) delete mode 100644 vortex-bench/clickbench_clickhouse_queries.sql diff --git a/vortex-bench/clickbench_clickhouse_queries.sql b/vortex-bench/clickbench_clickhouse_queries.sql deleted file mode 100644 index 31f65fc898d..00000000000 --- a/vortex-bench/clickbench_clickhouse_queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM hits; -SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; -SELECT AVG(UserID) FROM hits; -SELECT COUNT(DISTINCT UserID) FROM hits; -SELECT COUNT(DISTINCT SearchPhrase) FROM hits; -SELECT MIN(EventDate), MAX(EventDate) FROM hits; -SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID FROM hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/vortex-bench/src/clickbench/benchmark.rs b/vortex-bench/src/clickbench/benchmark.rs index 4f4a143b92a..cece28c6b6b 100644 --- a/vortex-bench/src/clickbench/benchmark.rs +++ b/vortex-bench/src/clickbench/benchmark.rs @@ -1,15 +1,12 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::env; use std::fs; -use std::path::Path; use std::path::PathBuf; use anyhow::Result; use reqwest::Client; use url::Url; -use vortex::error::VortexExpect; use crate::Benchmark; use crate::BenchmarkDataset; @@ -48,28 +45,36 @@ impl ClickBenchBenchmark { self } - /// Returns the path to the queries file for the given engine. + /// Returns the path to the queries file. fn queries_file_path(&self) -> PathBuf { if let Some(file) = &self.queries_file { return file.into(); } - let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR")); - match self.engine { - Some(Engine::ClickHouse) => { - manifest_dir.join("clickbench_clickhouse_queries.sql") - } - _ => manifest_dir.join("clickbench_queries.sql"), + let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + manifest_dir.join("clickbench_queries.sql") + } + + /// Returns true if the engine requires unquoted column names. + fn uses_unquoted_identifiers(&self) -> bool { + matches!(self.engine, Some(Engine::ClickHouse)) + } + + /// Strips double quotes only from simple SQL identifiers for engines like + /// ClickHouse that don't require quoted column names. + fn normalize_query(&self, query: &str) -> String { + if !self.uses_unquoted_identifiers() { + return query.to_string(); } + + strip_simple_identifier_quotes(query) } fn create_data_url(remote_data_dir: &Option, flavor: Flavor) -> Result { match remote_data_dir { None => { let basepath = format!("clickbench_{flavor}").to_data_path(); - Ok(Url::parse(&format!( - "file:{}/", - basepath.to_str().vortex_expect("path should be utf8") - ))?) + Url::from_directory_path(basepath) + .map_err(|_| anyhow::anyhow!("Failed to convert ClickBench data path to URL")) } Some(remote_data_dir) => { if !remote_data_dir.ends_with("/") { @@ -91,6 +96,64 @@ impl ClickBenchBenchmark { } } +fn strip_simple_identifier_quotes(query: &str) -> String { + let bytes = query.as_bytes(); + let mut out = String::with_capacity(query.len()); + let mut i = 0; + + while i < query.len() { + let rel = match query[i..].find('"') { + Some(pos) => pos, + None => { + out.push_str(&query[i..]); + break; + } + }; + + let start = i + rel; + out.push_str(&query[i..start]); + + let mut end = start + 1; + while end < bytes.len() { + if bytes[end] == b'"' { + if end + 1 < bytes.len() && bytes[end + 1] == b'"' { + end += 2; + } else { + break; + } + } else { + end += 1; + } + } + + if end >= bytes.len() { + out.push_str(&query[start..]); + break; + } + + let inner = &query[start + 1..end]; + if is_simple_identifier(inner) { + out.push_str(inner); + } else { + out.push_str(&query[start..=end]); + } + + i = end + 1; + } + + out +} + +fn is_simple_identifier(s: &str) -> bool { + let mut chars = s.chars(); + let Some(first) = chars.next() else { + return false; + }; + + (first.is_ascii_alphabetic() || first == '_') + && chars.all(|c| c.is_ascii_alphanumeric() || c == '_') +} + #[async_trait::async_trait] impl Benchmark for ClickBenchBenchmark { fn queries(&self) -> Result> { @@ -100,7 +163,7 @@ impl Benchmark for ClickBenchBenchmark { .split(';') .map(|s| s.trim()) .filter(|s| !s.is_empty()) - .map(|s| s.to_string()) + .map(|s| self.normalize_query(s)) .enumerate() .collect()) } From 64834ed14862c5a63a9b5d6b518d8b3513c8a465 Mon Sep 17 00:00:00 2001 From: fastio Date: Tue, 3 Mar 2026 21:10:34 +0800 Subject: [PATCH 3/7] bench(clickhouse): remove build-time binary download, resolve clickhouse from PATH - Remove reqwest-based binary download from build.rs - Resolve clickhouse binary via CLICKHOUSE_BINARY env var or $PATH at runtime - Add CI step to install clickhouse before building when needed - Fail with clear error message if binary is not found locally Signed-off-by: fastio --- .github/workflows/sql-benchmarks.yml | 7 + Cargo.lock | 2712 +++++++++++------------- benchmarks/clickhouse-bench/Cargo.toml | 3 - benchmarks/clickhouse-bench/build.rs | 115 +- benchmarks/clickhouse-bench/src/lib.rs | 106 +- 5 files changed, 1311 insertions(+), 1632 deletions(-) diff --git a/.github/workflows/sql-benchmarks.yml b/.github/workflows/sql-benchmarks.yml index ecdcc069096..31175904275 100644 --- a/.github/workflows/sql-benchmarks.yml +++ b/.github/workflows/sql-benchmarks.yml @@ -127,6 +127,13 @@ jobs: - uses: ./.github/actions/system-info + - name: Install ClickHouse + if: contains(matrix.targets, 'clickhouse:') + run: | + curl https://clickhouse.com/ | sh + sudo ./clickhouse install + echo "CLICKHOUSE_BINARY=$(which clickhouse)" >> $GITHUB_ENV + - name: Build binaries shell: bash env: diff --git a/Cargo.lock b/Cargo.lock index 95fe9ecf59b..96ddd27fc9c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -125,7 +125,7 @@ version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -136,14 +136,43 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] name = "anyhow" -version = "1.0.101" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "apache-avro" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e0fee31ef5ed1ba1316088939cea399010ed7731dba877ed44aeb407a75ea" +checksum = "36fa98bc79671c7981272d91a8753a928ff6a1cd8e4f20a44c45bd5d313840bf" +dependencies = [ + "bigdecimal", + "bon", + "bzip2", + "crc32fast", + "digest", + "liblzma", + "log", + "miniz_oxide", + "num-bigint", + "quad-rand", + "rand 0.9.2", + "regex-lite", + "serde", + "serde_bytes", + "serde_json", + "snap", + "strum 0.27.2", + "strum_macros 0.27.2", + "thiserror 2.0.18", + "uuid", + "zstd", +] [[package]] name = "approx" @@ -154,6 +183,15 @@ dependencies = [ "num-traits", ] +[[package]] +name = "ar_archive_writer" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eb93bbb63b9c227414f6eb3a0adfddca591a8ce1e9b60661bb08969b87e340b" +dependencies = [ + "object", +] + [[package]] name = "arbitrary" version = "1.4.2" @@ -165,9 +203,9 @@ dependencies = [ [[package]] name = "arc-swap" -version = "1.8.1" +version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ded5f9a03ac8f24d1b8a25101ee812cd32cdc8c50a4c50237de2c4915850e73" +checksum = "f9f3647c145568cec02c42054e07bdf9a5a698e15b466fb2341bfc393cd24aa5" dependencies = [ "rustversion", ] @@ -192,9 +230,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a2b10dcb159faf30d3f81f6d56c1211a5bea2ca424eabe477648a44b993320e" +checksum = "e4754a624e5ae42081f464514be454b39711daae0458906dacde5f4c632f33a8" dependencies = [ "arrow-arith", "arrow-array", @@ -213,9 +251,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "288015089e7931843c80ed4032c5274f02b37bcb720c4a42096d50b390e70372" +checksum = "f7b3141e0ec5145a22d8694ea8b6d6f69305971c4fa1c1a13ef0195aef2d678b" dependencies = [ "arrow-array", "arrow-buffer", @@ -227,9 +265,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65ca404ea6191e06bf30956394173337fa9c35f445bd447fe6c21ab944e1a23c" +checksum = "4c8955af33b25f3b175ee10af580577280b4bd01f7e823d94c7cdef7cf8c9aef" dependencies = [ "ahash 0.8.12", "arrow-buffer", @@ -246,9 +284,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36356383099be0151dacc4245309895f16ba7917d79bdb71a7148659c9206c56" +checksum = "c697ddca96183182f35b3a18e50b9110b11e916d7b7799cbfd4d34662f2c56c2" dependencies = [ "bytes", "half", @@ -258,9 +296,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8e372ed52bd4ee88cc1e6c3859aa7ecea204158ac640b10e187936e7e87074" +checksum = "646bbb821e86fd57189c10b4fcdaa941deaf4181924917b0daa92735baa6ada5" dependencies = [ "arrow-array", "arrow-buffer", @@ -280,9 +318,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e4100b729fe656f2e4fb32bc5884f14acf9118d4ad532b7b33c1132e4dce896" +checksum = "8da746f4180004e3ce7b83c977daf6394d768332349d3d913998b10a120b790a" dependencies = [ "arrow-array", "arrow-cast", @@ -295,9 +333,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf87f4ff5fc13290aa47e499a8b669a82c5977c6a1fedce22c7f542c1fd5a597" +checksum = "1fdd994a9d28e6365aa78e15da3f3950c0fdcea6b963a12fa1c391afb637b304" dependencies = [ "arrow-buffer", "arrow-schema", @@ -308,9 +346,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb3ca63edd2073fcb42ba112f8ae165df1de935627ead6e203d07c99445f2081" +checksum = "abf7df950701ab528bf7c0cf7eeadc0445d03ef5d6ffc151eaae6b38a58feff1" dependencies = [ "arrow-array", "arrow-buffer", @@ -324,9 +362,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a36b2332559d3310ebe3e173f75b29989b4412df4029a26a30cc3f7da0869297" +checksum = "0ff8357658bedc49792b13e2e862b80df908171275f8e6e075c460da5ee4bf86" dependencies = [ "arrow-array", "arrow-buffer", @@ -348,9 +386,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13c4e0530272ca755d6814218dffd04425c5b7854b87fa741d5ff848bf50aa39" +checksum = "f7d8f1870e03d4cbed632959498bcc84083b5a24bded52905ae1695bd29da45b" dependencies = [ "arrow-array", "arrow-buffer", @@ -361,9 +399,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b07f52788744cc71c4628567ad834cadbaeb9f09026ff1d7a4120f69edf7abd3" +checksum = "18228633bad92bff92a95746bbeb16e5fc318e8382b75619dec26db79e4de4c0" dependencies = [ "arrow-array", "arrow-buffer", @@ -374,20 +412,20 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bb63203e8e0e54b288d0d8043ca8fa1013820822a27692ef1b78a977d879f2c" +checksum = "8c872d36b7bf2a6a6a2b40de9156265f0242910791db366a2c17476ba8330d68" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "serde_core", "serde_json", ] [[package]] name = "arrow-select" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c96d8a1c180b44ecf2e66c9a2f2bbcb8b1b6f14e165ce46ac8bde211a363411b" +checksum = "68bf3e3efbd1278f770d67e5dc410257300b161b93baedb3aae836144edcaf4b" dependencies = [ "ahash 0.8.12", "arrow-array", @@ -399,9 +437,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8ad6a81add9d3ea30bf8374ee8329992c7fd246ffd8b7e2f48a3cea5aa0cc9a" +checksum = "85e968097061b3c0e9fe3079cf2e703e487890700546b5b0647f60fca1b5a8d8" dependencies = [ "arrow-array", "arrow-buffer", @@ -426,24 +464,11 @@ dependencies = [ "pin-project-lite", ] -[[package]] -name = "async-compat" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1ba85bc55464dcbf728b56d97e119d673f4cf9062be330a9a26f3acf504a590" -dependencies = [ - "futures-core", - "futures-io", - "once_cell", - "pin-project-lite", - "tokio", -] - [[package]] name = "async-compression" -version = "0.4.37" +version = "0.4.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d10e4f991a553474232bc0a31799f6d24b034a84c0971d80d2e2f78b2e576e40" +checksum = "7d67d43201f4d20c78bcda740c142ca52482d81da80681533d33bf3f0596c8e2" dependencies = [ "compression-codecs", "compression-core", @@ -453,9 +478,9 @@ dependencies = [ [[package]] name = "async-executor" -version = "1.13.3" +version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "497c00e0fd83a72a79a39fcbd8e3e2f055d6f6c7e025f3b3d91f4f8e76527fb8" +checksum = "c96bf972d85afc50bf5ab8fe2d54d1586b4e0b46c97c50a0c9e71e2f7bcd812a" dependencies = [ "async-task", "concurrent-queue", @@ -489,7 +514,7 @@ dependencies = [ "futures-lite", "parking", "polling", - "rustix 1.1.3", + "rustix 1.1.4", "slab", "windows-sys 0.61.2", ] @@ -531,7 +556,7 @@ dependencies = [ "cfg-if", "event-listener", "futures-lite", - "rustix 1.1.3", + "rustix 1.1.4", ] [[package]] @@ -542,7 +567,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -557,7 +582,7 @@ dependencies = [ "cfg-if", "futures-core", "futures-io", - "rustix 1.1.3", + "rustix 1.1.4", "signal-hook-registry", "slab", "windows-sys 0.61.2", @@ -582,7 +607,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -599,7 +624,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -641,412 +666,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" -[[package]] -name = "aws-config" -version = "1.8.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96571e6996817bf3d58f6b569e4b9fd2e9d2fcf9f7424eed07b2ce9bb87535e5" -dependencies = [ - "aws-credential-types", - "aws-runtime", - "aws-sdk-sso", - "aws-sdk-ssooidc", - "aws-sdk-sts", - "aws-smithy-async", - "aws-smithy-http 0.62.6", - "aws-smithy-json", - "aws-smithy-runtime", - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-types", - "bytes", - "fastrand", - "hex", - "http 1.4.0", - "ring", - "time", - "tokio", - "tracing", - "url", - "zeroize", -] - -[[package]] -name = "aws-credential-types" -version = "1.2.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cd362783681b15d136480ad555a099e82ecd8e2d10a841e14dfd0078d67fee3" -dependencies = [ - "aws-smithy-async", - "aws-smithy-runtime-api", - "aws-smithy-types", - "zeroize", -] - -[[package]] -name = "aws-lc-rs" -version = "1.15.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b7b6141e96a8c160799cc2d5adecd5cbbe5054cb8c7c4af53da0f83bb7ad256" -dependencies = [ - "aws-lc-sys", - "zeroize", -] - -[[package]] -name = "aws-lc-sys" -version = "0.37.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c34dda4df7017c8db52132f0f8a2e0f8161649d15723ed63fc00c82d0f2081a" -dependencies = [ - "cc", - "cmake", - "dunce", - "fs_extra", -] - -[[package]] -name = "aws-runtime" -version = "1.5.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "959dab27ce613e6c9658eb3621064d0e2027e5f2acb65bc526a43577facea557" -dependencies = [ - "aws-credential-types", - "aws-sigv4", - "aws-smithy-async", - "aws-smithy-http 0.62.6", - "aws-smithy-runtime", - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-types", - "bytes", - "fastrand", - "http 0.2.12", - "http-body 0.4.6", - "percent-encoding", - "pin-project-lite", - "tracing", - "uuid", -] - -[[package]] -name = "aws-sdk-sso" -version = "1.92.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7d63bd2bdeeb49aa3f9b00c15e18583503b778b2e792fc06284d54e7d5b6566" -dependencies = [ - "aws-credential-types", - "aws-runtime", - "aws-smithy-async", - "aws-smithy-http 0.62.6", - "aws-smithy-json", - "aws-smithy-observability", - "aws-smithy-runtime", - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-types", - "bytes", - "fastrand", - "http 0.2.12", - "regex-lite", - "tracing", -] - -[[package]] -name = "aws-sdk-ssooidc" -version = "1.94.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "532d93574bf731f311bafb761366f9ece345a0416dbcc273d81d6d1a1205239b" -dependencies = [ - "aws-credential-types", - "aws-runtime", - "aws-smithy-async", - "aws-smithy-http 0.62.6", - "aws-smithy-json", - "aws-smithy-observability", - "aws-smithy-runtime", - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-types", - "bytes", - "fastrand", - "http 0.2.12", - "regex-lite", - "tracing", -] - -[[package]] -name = "aws-sdk-sts" -version = "1.96.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "357e9a029c7524db6a0099cd77fbd5da165540339e7296cca603531bc783b56c" -dependencies = [ - "aws-credential-types", - "aws-runtime", - "aws-smithy-async", - "aws-smithy-http 0.62.6", - "aws-smithy-json", - "aws-smithy-observability", - "aws-smithy-query", - "aws-smithy-runtime", - "aws-smithy-runtime-api", - "aws-smithy-types", - "aws-smithy-xml", - "aws-types", - "fastrand", - "http 0.2.12", - "regex-lite", - "tracing", -] - -[[package]] -name = "aws-sigv4" -version = "1.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69e523e1c4e8e7e8ff219d732988e22bfeae8a1cafdbe6d9eca1546fa080be7c" -dependencies = [ - "aws-credential-types", - "aws-smithy-http 0.62.6", - "aws-smithy-runtime-api", - "aws-smithy-types", - "bytes", - "form_urlencoded", - "hex", - "hmac", - "http 0.2.12", - "http 1.4.0", - "percent-encoding", - "sha2", - "time", - "tracing", -] - -[[package]] -name = "aws-smithy-async" -version = "1.2.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52eec3db979d18cb807fc1070961cc51d87d069abe9ab57917769687368a8c6c" -dependencies = [ - "futures-util", - "pin-project-lite", - "tokio", -] - -[[package]] -name = "aws-smithy-http" -version = "0.62.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "826141069295752372f8203c17f28e30c464d22899a43a0c9fd9c458d469c88b" -dependencies = [ - "aws-smithy-runtime-api", - "aws-smithy-types", - "bytes", - "bytes-utils", - "futures-core", - "futures-util", - "http 0.2.12", - "http 1.4.0", - "http-body 0.4.6", - "percent-encoding", - "pin-project-lite", - "pin-utils", - "tracing", -] - -[[package]] -name = "aws-smithy-http" -version = "0.63.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "630e67f2a31094ffa51b210ae030855cb8f3b7ee1329bdd8d085aaf61e8b97fc" -dependencies = [ - "aws-smithy-runtime-api", - "aws-smithy-types", - "bytes", - "bytes-utils", - "futures-core", - "futures-util", - "http 1.4.0", - "http-body 1.0.1", - "http-body-util", - "percent-encoding", - "pin-project-lite", - "pin-utils", - "tracing", -] - -[[package]] -name = "aws-smithy-http-client" -version = "1.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12fb0abf49ff0cab20fd31ac1215ed7ce0ea92286ba09e2854b42ba5cabe7525" -dependencies = [ - "aws-smithy-async", - "aws-smithy-runtime-api", - "aws-smithy-types", - "h2", - "http 1.4.0", - "hyper", - "hyper-rustls", - "hyper-util", - "pin-project-lite", - "rustls", - "rustls-native-certs", - "rustls-pki-types", - "tokio", - "tokio-rustls", - "tower", - "tracing", -] - -[[package]] -name = "aws-smithy-json" -version = "0.61.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49fa1213db31ac95288d981476f78d05d9cbb0353d22cdf3472cc05bb02f6551" -dependencies = [ - "aws-smithy-types", -] - -[[package]] -name = "aws-smithy-observability" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0a46543fbc94621080b3cf553eb4cbbdc41dd9780a30c4756400f0139440a1d" -dependencies = [ - "aws-smithy-runtime-api", -] - -[[package]] -name = "aws-smithy-query" -version = "0.60.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cebbddb6f3a5bd81553643e9c7daf3cc3dc5b0b5f398ac668630e8a84e6fff0" -dependencies = [ - "aws-smithy-types", - "urlencoding", -] - -[[package]] -name = "aws-smithy-runtime" -version = "1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3df87c14f0127a0d77eb261c3bc45d5b4833e2a1f63583ebfb728e4852134ee" -dependencies = [ - "aws-smithy-async", - "aws-smithy-http 0.63.3", - "aws-smithy-http-client", - "aws-smithy-observability", - "aws-smithy-runtime-api", - "aws-smithy-types", - "bytes", - "fastrand", - "http 0.2.12", - "http 1.4.0", - "http-body 0.4.6", - "http-body 1.0.1", - "http-body-util", - "pin-project-lite", - "pin-utils", - "tokio", - "tracing", -] - -[[package]] -name = "aws-smithy-runtime-api" -version = "1.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49952c52f7eebb72ce2a754d3866cc0f87b97d2a46146b79f80f3a93fb2b3716" -dependencies = [ - "aws-smithy-async", - "aws-smithy-types", - "bytes", - "http 0.2.12", - "http 1.4.0", - "pin-project-lite", - "tokio", - "tracing", - "zeroize", -] - -[[package]] -name = "aws-smithy-types" -version = "1.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b3a26048eeab0ddeba4b4f9d51654c79af8c3b32357dc5f336cee85ab331c33" -dependencies = [ - "base64-simd", - "bytes", - "bytes-utils", - "http 0.2.12", - "http 1.4.0", - "http-body 0.4.6", - "http-body 1.0.1", - "http-body-util", - "itoa", - "num-integer", - "pin-project-lite", - "pin-utils", - "ryu", - "serde", - "time", -] - -[[package]] -name = "aws-smithy-xml" -version = "0.60.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11b2f670422ff42bf7065031e72b45bc52a3508bd089f743ea90731ca2b6ea57" -dependencies = [ - "xmlparser", -] - -[[package]] -name = "aws-types" -version = "1.3.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d980627d2dd7bfc32a3c025685a033eeab8d365cc840c631ef59d1b8f428164" -dependencies = [ - "aws-credential-types", - "aws-smithy-async", - "aws-smithy-runtime-api", - "aws-smithy-types", - "rustc_version", - "tracing", -] - -[[package]] -name = "backon" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cffb0e931875b666fc4fcb20fee52e9bbd1ef836fd9e9e04ec21555f9f85f7ef" -dependencies = [ - "fastrand", - "gloo-timers", - "tokio", -] - [[package]] name = "base64" version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" -[[package]] -name = "base64-simd" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" -dependencies = [ - "outref", - "vsimd", -] - -[[package]] -name = "base64ct" -version = "1.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06" - [[package]] name = "better_io" version = "0.2.0" @@ -1064,24 +689,7 @@ dependencies = [ "num-bigint", "num-integer", "num-traits", -] - -[[package]] -name = "bindgen" -version = "0.70.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f" -dependencies = [ - "bitflags 2.10.0", - "cexpr", - "clang-sys", - "itertools 0.13.0", - "proc-macro2", - "quote", - "regex", - "rustc-hash 1.1.0", - "shlex", - "syn 2.0.114", + "serde", ] [[package]] @@ -1090,7 +698,7 @@ version = "0.72.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "cexpr", "clang-sys", "itertools 0.13.0", @@ -1099,9 +707,9 @@ dependencies = [ "proc-macro2", "quote", "regex", - "rustc-hash 2.1.1", + "rustc-hash", "shlex", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -1133,9 +741,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.10.0" +version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" [[package]] name = "bitpacking" @@ -1190,15 +798,6 @@ dependencies = [ "generic-array", ] -[[package]] -name = "block-padding" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8894febbff9f758034a5b8e12d87918f56dfc64a8e1fe757d65e29041538d93" -dependencies = [ - "generic-array", -] - [[package]] name = "blocking" version = "1.6.2" @@ -1214,9 +813,9 @@ dependencies = [ [[package]] name = "bon" -version = "3.8.2" +version = "3.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "234655ec178edd82b891e262ea7cf71f6584bcd09eff94db786be23f1821825c" +checksum = "2d13a61f2963b88eef9c1be03df65d42f6996dfeac1054870d950fcf66686f83" dependencies = [ "bon-macros", "rustversion", @@ -1224,9 +823,9 @@ dependencies = [ [[package]] name = "bon-macros" -version = "3.8.2" +version = "3.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89ec27229c38ed0eb3c0feee3d2c1d6a4379ae44f418a29a658890e062d8f365" +checksum = "d314cc62af2b6b0c65780555abb4d02a03dd3b799cd42419044f0c38d99738c0" dependencies = [ "darling", "ident_case", @@ -1234,7 +833,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -1257,7 +856,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -1294,9 +893,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.19.1" +version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" [[package]] name = "byte-slice-cast" @@ -1311,7 +910,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8c6d47a4e2961fb8721bcfc54feae6455f2f64e7054f9bc67e875f0e77f4c58d" dependencies = [ "rust_decimal", - "schemars", + "schemars 1.2.1", "serde", "utf8-width", ] @@ -1363,22 +962,45 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" [[package]] -name = "bytes-utils" -version = "0.1.4" +name = "bzip2" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dafe3a8757b027e2be6e4e5601ed563c55989fcf1546e933c66c8eb3a058d35" +checksum = "f3a53fac24f34a81bc9954b5d6cfce0c21e18ec6959f44f56e8e90e4bb7c346c" dependencies = [ - "bytes", - "either", + "libbz2-rs-sys", ] [[package]] -name = "bzip2" -version = "0.6.1" +name = "camino" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3a53fac24f34a81bc9954b5d6cfce0c21e18ec6959f44f56e8e90e4bb7c346c" +checksum = "e629a66d692cb9ff1a1c664e41771b3dcaf961985a9774c0eb0bd1b51cf60a48" dependencies = [ - "libbz2-rs-sys", + "serde_core", +] + +[[package]] +name = "cargo-platform" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87a0c0e6148f11f01f32650a2ea02d532b2ad4e81d8bd41e6e565b5adc5e6082" +dependencies = [ + "serde", + "serde_core", +] + +[[package]] +name = "cargo_metadata" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef987d17b0a113becdd19d3d0022d04d7ef41f9efe4f3fb63ac44ba61df3ade9" +dependencies = [ + "camino", + "cargo-platform", + "semver", + "serde", + "serde_json", + "thiserror 2.0.18", ] [[package]] @@ -1396,15 +1018,6 @@ dependencies = [ "rustversion", ] -[[package]] -name = "cbc" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26b52a9543ae338f279b96b0b9fed9c8093744685043739079ce85cd58f289a6" -dependencies = [ - "cipher", -] - [[package]] name = "cbindgen" version = "0.29.2" @@ -1419,16 +1032,16 @@ dependencies = [ "quote", "serde", "serde_json", - "syn 2.0.114", + "syn 2.0.117", "tempfile", "toml", ] [[package]] name = "cc" -version = "1.2.55" +version = "1.2.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47b26a0954ae34af09b50f0de26458fa95369a0d478d8236d3f93082b219bd29" +checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" dependencies = [ "find-msvc-tools", "jobserver", @@ -1471,9 +1084,9 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chrono" -version = "0.4.43" +version = "0.4.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" dependencies = [ "iana-time-zone", "js-sys", @@ -1543,9 +1156,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.57" +version = "4.5.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6899ea499e3fb9305a65d5ebf6e3d2248c5fab291f300ad0a704fbe142eae31a" +checksum = "2797f34da339ce31042b27d23607e051786132987f595b02ba4f6a6dffb7030a" dependencies = [ "clap_builder", "clap_derive", @@ -1553,9 +1166,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.57" +version = "4.5.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b12c8b680195a62a8364d16b8447b01b6c2c8f9aaf68bee653be34d4245e238" +checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876" dependencies = [ "anstream", "anstyle", @@ -1573,14 +1186,14 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "clap_lex" -version = "0.7.7" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" +checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" [[package]] name = "clickhouse-bench" @@ -1588,21 +1201,11 @@ version = "0.1.0" dependencies = [ "anyhow", "clap", - "reqwest", "tokio", "tracing", "vortex-bench", ] -[[package]] -name = "cmake" -version = "0.1.57" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d" -dependencies = [ - "cc", -] - [[package]] name = "codespan-reporting" version = "0.13.1" @@ -1611,7 +1214,7 @@ checksum = "af491d569909a7e4dee0ad7db7f5341fef5c614d5b8ec8cf765732aba3cff681" dependencies = [ "serde", "termcolor", - "unicode-width", + "unicode-width 0.2.2", ] [[package]] @@ -1683,7 +1286,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -1729,13 +1332,12 @@ dependencies = [ [[package]] name = "comfy-table" -version = "7.1.2" +version = "7.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0d05af1e006a2407bedef5af410552494ce5be9090444dbbcb57258c1af3d56" +checksum = "958c5d6ecf1f214b4c2bbbbf6ab9523a864bd136dcf71a7e8904799acfe1ad47" dependencies = [ - "strum 0.26.3", - "strum_macros 0.26.4", - "unicode-width", + "unicode-segmentation", + "unicode-width 0.2.2", ] [[package]] @@ -1768,7 +1370,6 @@ dependencies = [ "lance-bench", "parquet", "regex", - "serde", "tokio", "tracing", "vortex", @@ -1777,13 +1378,17 @@ dependencies = [ [[package]] name = "compression-codecs" -version = "0.4.36" +version = "0.4.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00828ba6fd27b45a448e57dbfe84f1029d4c9f26b368157e9a448a5f49a2ec2a" +checksum = "eb7b51a7d9c967fc26773061ba86150f19c50c0d65c887cb1fbe295fd16619b7" dependencies = [ + "bzip2", "compression-core", "flate2", + "liblzma", "memchr", + "zstd", + "zstd-safe", ] [[package]] @@ -1828,16 +1433,10 @@ dependencies = [ "encode_unicode", "libc", "once_cell", - "unicode-width", + "unicode-width 0.2.2", "windows-sys 0.61.2", ] -[[package]] -name = "const-oid" -version = "0.9.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" - [[package]] name = "const-random" version = "0.1.18" @@ -1955,15 +1554,6 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" -[[package]] -name = "crc32c" -version = "0.6.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a47af21622d091a8f0fb295b88bc886ac74efcc613efc19f5d0b21de5c89e47" -dependencies = [ - "rustc_version", -] - [[package]] name = "crc32fast" version = "1.5.0" @@ -2032,13 +1622,13 @@ version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8b9f2e4c67f833b660cdb0a3523065869fb35570177239812ed4c905aeff87b" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "crossterm_winapi", "derive_more", "document-features", "mio", "parking_lot", - "rustix 1.1.3", + "rustix 1.1.4", "signal-hook", "signal-hook-mio", "winapi", @@ -2112,11 +1702,11 @@ dependencies = [ [[package]] name = "custom-labels" -version = "0.4.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fd1c8a4320177bfad0514f5ee9525499030e2d48e6da0c2047da4a782190ce2" +checksum = "a750ea4bdb7dbf9584b5d5c668bfa3835f88275781a947b5ea0212945bbdd41f" dependencies = [ - "bindgen 0.70.1", + "bindgen", "cc", "libc", "pin-project-lite", @@ -2149,7 +1739,7 @@ dependencies = [ "proc-macro2", "quote", "scratch", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -2163,7 +1753,7 @@ dependencies = [ "indexmap", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -2181,7 +1771,7 @@ dependencies = [ "indexmap", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -2204,7 +1794,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -2215,7 +1805,7 @@ checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" dependencies = [ "darling_core", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -2292,6 +1882,7 @@ dependencies = [ "arrow-schema", "async-trait", "bytes", + "bzip2", "chrono", "datafusion-catalog 52.1.0", "datafusion-catalog-listing 52.1.0", @@ -2299,6 +1890,7 @@ dependencies = [ "datafusion-common-runtime 52.1.0", "datafusion-datasource 52.1.0", "datafusion-datasource-arrow 52.1.0", + "datafusion-datasource-avro", "datafusion-datasource-csv 52.1.0", "datafusion-datasource-json 52.1.0", "datafusion-datasource-parquet", @@ -2318,8 +1910,10 @@ dependencies = [ "datafusion-physical-plan 52.1.0", "datafusion-session 52.1.0", "datafusion-sql 52.1.0", + "flate2", "futures", "itertools 0.14.0", + "liblzma", "log", "object_store", "parking_lot", @@ -2331,6 +1925,7 @@ dependencies = [ "tokio", "url", "uuid", + "zstd", ] [[package]] @@ -2338,14 +1933,12 @@ name = "datafusion-bench" version = "0.1.0" dependencies = [ "anyhow", - "arrow-ipc", "clap", "custom-labels", "datafusion 52.1.0", "datafusion-common 52.1.0", "datafusion-physical-plan 52.1.0", "futures", - "get_dir", "itertools 0.14.0", "object_store", "opentelemetry", @@ -2354,6 +1947,7 @@ dependencies = [ "parking_lot", "tokio", "url", + "vortex", "vortex-bench", "vortex-cuda", "vortex-datafusion", @@ -2486,6 +2080,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3237a6ff0d2149af4631290074289cae548c9863c885d821315d54c6673a074a" dependencies = [ "ahash 0.8.12", + "apache-avro", "arrow", "arrow-ipc", "chrono", @@ -2497,6 +2092,7 @@ dependencies = [ "object_store", "parquet", "paste", + "recursive", "sqlparser", "tokio", "web-time", @@ -2560,8 +2156,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b2a6be734cc3785e18bbf2a7f2b22537f6b9fb960d79617775a51568c281842" dependencies = [ "arrow", + "async-compression", "async-trait", "bytes", + "bzip2", "chrono", "datafusion-common 52.1.0", "datafusion-common-runtime 52.1.0", @@ -2572,14 +2170,18 @@ dependencies = [ "datafusion-physical-expr-common 52.1.0", "datafusion-physical-plan 52.1.0", "datafusion-session 52.1.0", + "flate2", "futures", "glob", "itertools 0.14.0", + "liblzma", "log", "object_store", "rand 0.9.2", "tokio", + "tokio-util", "url", + "zstd", ] [[package]] @@ -2630,6 +2232,26 @@ dependencies = [ "tokio", ] +[[package]] +name = "datafusion-datasource-avro" +version = "52.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "828088c2fb681cc0e06fb42f541f76c82a0c10278f9fd6334e22c8d1e3574ee7" +dependencies = [ + "apache-avro", + "arrow", + "async-trait", + "bytes", + "datafusion-common 52.1.0", + "datafusion-datasource 52.1.0", + "datafusion-physical-expr-common 52.1.0", + "datafusion-physical-plan 52.1.0", + "datafusion-session 52.1.0", + "futures", + "num-traits", + "object_store", +] + [[package]] name = "datafusion-datasource-csv" version = "51.0.0" @@ -2843,6 +2465,7 @@ dependencies = [ "indexmap", "itertools 0.14.0", "paste", + "recursive", "serde_json", "sqlparser", ] @@ -2912,6 +2535,8 @@ dependencies = [ "arrow", "arrow-buffer", "base64", + "blake2", + "blake3", "chrono", "chrono-tz", "datafusion-common 52.1.0", @@ -2923,9 +2548,11 @@ dependencies = [ "hex", "itertools 0.14.0", "log", + "md-5", "num-traits", "rand 0.9.2", "regex", + "sha2", "unicode-segmentation", "uuid", ] @@ -3140,7 +2767,7 @@ checksum = "1063ad4c9e094b3f798acee16d9a47bd7372d9699be2de21b05c3bd3f34ab848" dependencies = [ "datafusion-doc 51.0.0", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -3151,7 +2778,7 @@ checksum = "c4fe888aeb6a095c4bcbe8ac1874c4b9a4c7ffa2ba849db7922683ba20875aaf" dependencies = [ "datafusion-doc 52.1.0", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -3188,6 +2815,7 @@ dependencies = [ "indexmap", "itertools 0.14.0", "log", + "recursive", "regex", "regex-syntax", ] @@ -3234,6 +2862,7 @@ dependencies = [ "parking_lot", "paste", "petgraph", + "recursive", "tokio", ] @@ -3332,6 +2961,7 @@ dependencies = [ "datafusion-physical-plan 52.1.0", "datafusion-pruning 52.1.0", "itertools 0.14.0", + "recursive", ] [[package]] @@ -3458,6 +3088,29 @@ dependencies = [ "parking_lot", ] +[[package]] +name = "datafusion-spark" +version = "52.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "556c431f5f2259620c8223254c0ef57aa9a85c576d4da0166157260f71eb0e25" +dependencies = [ + "arrow", + "bigdecimal", + "chrono", + "crc32fast", + "datafusion-catalog 52.1.0", + "datafusion-common 52.1.0", + "datafusion-execution 52.1.0", + "datafusion-expr 52.1.0", + "datafusion-functions 52.1.0", + "datafusion-functions-nested 52.1.0", + "log", + "percent-encoding", + "rand 0.9.2", + "sha1", + "url", +] + [[package]] name = "datafusion-sql" version = "51.0.0" @@ -3488,10 +3141,58 @@ dependencies = [ "datafusion-expr 52.1.0", "indexmap", "log", + "recursive", "regex", "sqlparser", ] +[[package]] +name = "datafusion-sqllogictest" +version = "52.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d388fec80647198ae041d314dd7d9e2305207836ecec3ad48908eac6844cdef" +dependencies = [ + "arrow", + "async-trait", + "bigdecimal", + "clap", + "datafusion 52.1.0", + "datafusion-spark", + "datafusion-substrait", + "futures", + "half", + "indicatif", + "itertools 0.14.0", + "log", + "object_store", + "sqllogictest", + "sqlparser", + "tempfile", + "thiserror 2.0.18", + "tokio", +] + +[[package]] +name = "datafusion-substrait" +version = "52.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6042adacd0bd64e56c22f6a7f9ce0ce1793dd367c899d868179d029f110d9215" +dependencies = [ + "async-recursion", + "async-trait", + "chrono", + "datafusion 52.1.0", + "half", + "itertools 0.14.0", + "object_store", + "pbjson-types", + "prost 0.14.3", + "substrait", + "tokio", + "url", + "uuid", +] + [[package]] name = "deepsize" version = "0.2.0" @@ -3524,22 +3225,11 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5729f5117e208430e437df2f4843f5e5952997175992d1414f94c57d61e270b4" -[[package]] -name = "der" -version = "0.7.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" -dependencies = [ - "const-oid", - "pem-rfc7468", - "zeroize", -] - [[package]] name = "deranged" -version = "0.5.5" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587" +checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" dependencies = [ "powerfmt", "serde_core", @@ -3553,7 +3243,7 @@ checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -3575,7 +3265,7 @@ dependencies = [ "proc-macro2", "quote", "rustc_version", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -3585,7 +3275,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer", - "const-oid", "crypto-common", "subtle", ] @@ -3608,7 +3297,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -3619,7 +3308,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -3630,16 +3319,7 @@ checksum = "8dc51d98e636f5e3b0759a39257458b22619cac7e96d932da6eeb052891bb67c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", -] - -[[package]] -name = "dlv-list" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "442039f5147480ba31067cb00ada1adae6892028e40e45fc5de7b7df6dcc1b5f" -dependencies = [ - "const-random", + "syn 2.0.117", ] [[package]] @@ -3659,9 +3339,9 @@ checksum = "117240f60069e65410b3ae1bb213295bd828f707b5bec6596a1afc8793ce0cbc" [[package]] name = "dtype_dispatch" -version = "0.1.1" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3a5ccdfd6c5e7e2fea9c5cf256f2a08216047fab19c621c3da64e9ae4a1462d" +checksum = "ab23e69df104e2fd85ee63a533a22d2132ef5975dc6b36f9f3e5a7305e4a8ed7" [[package]] name = "duckdb-bench" @@ -3669,24 +3349,15 @@ version = "0.1.0" dependencies = [ "anyhow", "clap", - "custom-labels", - "get_dir", "similar", "tokio", "tracing", - "url", "vortex", "vortex-bench", "vortex-cuda", "vortex-duckdb", ] -[[package]] -name = "dunce" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" - [[package]] name = "dyn-clone" version = "1.0.20" @@ -3703,6 +3374,18 @@ dependencies = [ "num-traits", ] +[[package]] +name = "educe" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d7bc049e1bd8cdeb31b68bbd586a9464ecf9f3944af3958a7a9d0f8b9799417" +dependencies = [ + "enum-ordinalize", + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "either" version = "1.15.0" @@ -3741,14 +3424,34 @@ checksum = "685adfa4d6f3d765a26bc5dbc936577de9abf756c1feeb3089b01dd395034842" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", +] + +[[package]] +name = "enum-ordinalize" +version = "4.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a1091a7bb1f8f2c4b28f1fe2cef4980ca2d410a3d727d67ecc3178c9b0800f0" +dependencies = [ + "enum-ordinalize-derive", +] + +[[package]] +name = "enum-ordinalize-derive" +version = "4.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ca9601fb2d62598ee17836250842873a413586e5d7ed88b356e38ddbb0ec631" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", ] [[package]] name = "env_filter" -version = "0.1.4" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bf3c259d255ca70051b30e2e95b5446cdb8949ac4cd22c0d7fd634d89f568e2" +checksum = "7a1c3cc8e57274ec99de65301228b537f1e4eedc1b8e0f9411c6caac8ae7308f" dependencies = [ "log", "regex", @@ -3762,9 +3465,9 @@ checksum = "c7f84e12ccf0a7ddc17a6c41c93326024c42920d7ee630d04950e6926645c0fe" [[package]] name = "env_logger" -version = "0.11.8" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" +checksum = "b2daee4ea451f429a58296525ddf28b45a3b64f1acf6587e2067437bb11e218d" dependencies = [ "anstream", "anstyle", @@ -3786,9 +3489,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] +[[package]] +name = "escape8259" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5692dd7b5a1978a5aeb0ce83b7655c58ca8efdcb79d21036ea249da95afec2c6" + [[package]] name = "ethnum" version = "1.5.2" @@ -3960,15 +3669,15 @@ version = "25.12.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "35f6839d7b3b98adde531effaf34f0c2badc6f4735d26fe74709d8e513a96ef3" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "rustc_version", ] [[package]] name = "flate2" -version = "1.1.8" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b375d6465b98090a5f25b1c7703f3859783755aa9a80433b36e0379a3ec2f369" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" dependencies = [ "crc32fast", "miniz_oxide", @@ -4023,6 +3732,15 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs-err" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73fde052dbfc920003cfd2c8e2c6e6d4cc7c1091538c3a24226cec0665ab08c0" +dependencies = [ + "autocfg", +] + [[package]] name = "fs4" version = "0.8.4" @@ -4033,17 +3751,11 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "fs_extra" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" - [[package]] name = "fsst" -version = "2.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f03a771ab914e207dd26bd2f12666839555ec8ecc7e1770e1ed6f9900d899a4" +checksum = "5f9e5c0b1c67a38cb92b41535d44623483beb9511592ae23a3bf42ddec758690" dependencies = [ "arrow-array", "rand 0.9.2", @@ -4072,9 +3784,9 @@ checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" [[package]] name = "futures" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" dependencies = [ "futures-channel", "futures-core", @@ -4087,9 +3799,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" dependencies = [ "futures-core", "futures-sink", @@ -4097,15 +3809,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" [[package]] name = "futures-executor" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" dependencies = [ "futures-core", "futures-task", @@ -4114,9 +3826,9 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" [[package]] name = "futures-lite" @@ -4133,26 +3845,26 @@ dependencies = [ [[package]] name = "futures-macro" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "futures-sink" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" [[package]] name = "futures-task" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" [[package]] name = "futures-timer" @@ -4162,9 +3874,9 @@ checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" [[package]] name = "futures-util" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" dependencies = [ "futures-channel", "futures-core", @@ -4174,7 +3886,6 @@ dependencies = [ "futures-task", "memchr", "pin-project-lite", - "pin-utils", "slab", ] @@ -4198,7 +3909,7 @@ dependencies = [ "libc", "log", "rustversion", - "windows-link 0.2.1", + "windows-link 0.1.3", "windows-result 0.4.1", ] @@ -4317,9 +4028,9 @@ dependencies = [ [[package]] name = "geographiclib-rs" -version = "0.2.6" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc8f647bd562db28a15e0dce4a77d89e3a78f6f85943e782418ebdbb420ea3c4" +checksum = "c5a7f08910fd98737a6eda7568e7c5e645093e073328eeef49758cfe8b0489c7" dependencies = [ "libm", ] @@ -4368,35 +4079,51 @@ dependencies = [ ] [[package]] -name = "glob" -version = "0.3.3" +name = "getrandom" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" +checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", + "wasip3", +] [[package]] -name = "gloo-timers" -version = "0.3.0" +name = "glob" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbb143cf96099802033e0d4f4963b19fd2e0b728bcf076cd9cf7f6634f092994" -dependencies = [ - "futures-channel", - "futures-core", - "js-sys", - "wasm-bindgen", -] +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" [[package]] name = "goldenfile" -version = "1.9.1" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ef8d7e733be5a2b7b473a8bf6865d6dda7911ca010241f459439bac27df0013" +checksum = "4ce2238f730d493a06ef6746713fe0b56acecc88485892ae65c50d2db9bf977b" dependencies = [ "scopeguard", "similar-asserts", + "static_assertions", "tempfile", "yansi", ] +[[package]] +name = "gpu-scan-cli" +version = "0.1.0" +dependencies = [ + "futures", + "tokio", + "tracing", + "tracing-perfetto", + "tracing-subscriber", + "vortex", + "vortex-cuda", + "vortex-cuda-macros", +] + [[package]] name = "grid" version = "1.0.0" @@ -4414,7 +4141,7 @@ dependencies = [ "fnv", "futures-core", "futures-sink", - "http 1.4.0", + "http", "indexmap", "slab", "tokio", @@ -4451,6 +4178,12 @@ dependencies = [ "byteorder", ] +[[package]] +name = "hashbag" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7040a10f52cba493ddb09926e15d10a9d8a28043708a405931fe4c6f19fac064" + [[package]] name = "hashbrown" version = "0.12.3" @@ -4529,32 +4262,12 @@ dependencies = [ "digest", ] -[[package]] -name = "home" -version = "0.5.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d" -dependencies = [ - "windows-sys 0.61.2", -] - [[package]] name = "htmlescape" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" -[[package]] -name = "http" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" -dependencies = [ - "bytes", - "fnv", - "itoa", -] - [[package]] name = "http" version = "1.4.0" @@ -4565,17 +4278,6 @@ dependencies = [ "itoa", ] -[[package]] -name = "http-body" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" -dependencies = [ - "bytes", - "http 0.2.12", - "pin-project-lite", -] - [[package]] name = "http-body" version = "1.0.1" @@ -4583,7 +4285,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http 1.4.0", + "http", ] [[package]] @@ -4594,8 +4296,8 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ "bytes", "futures-core", - "http 1.4.0", - "http-body 1.0.1", + "http", + "http-body", "pin-project-lite", ] @@ -4631,8 +4333,8 @@ dependencies = [ "futures-channel", "futures-core", "h2", - "http 1.4.0", - "http-body 1.0.1", + "http", + "http-body", "httparse", "itoa", "pin-project-lite", @@ -4648,7 +4350,7 @@ version = "0.27.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ - "http 1.4.0", + "http", "hyper", "hyper-util", "rustls", @@ -4686,8 +4388,8 @@ dependencies = [ "bytes", "futures-channel", "futures-util", - "http 1.4.0", - "http-body 1.0.1", + "http", + "http-body", "hyper", "ipnet", "libc", @@ -4858,6 +4560,12 @@ dependencies = [ "zerovec", ] +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + [[package]] name = "ident_case" version = "1.0.1" @@ -4902,7 +4610,7 @@ checksum = "a0eb5a3343abf848c0984fe4604b2b105da9539376e24fc0a3b0007411ae4fd9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -4913,18 +4621,20 @@ checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" dependencies = [ "equivalent", "hashbrown 0.16.1", + "serde", + "serde_core", ] [[package]] name = "indicatif" -version = "0.18.3" +version = "0.18.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9375e112e4b463ec1b1c6c011953545c65a30164fbab5b581df32b3abf0dcb88" +checksum = "25470f23803092da7d239834776d653104d551bc4d7eacaf31e6837854b8e9eb" dependencies = [ "console 0.16.2", "futures-core", "portable-atomic", - "unicode-width", + "unicode-width 0.2.2", "unit-prefix", "web-time", ] @@ -4944,7 +4654,6 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" dependencies = [ - "block-padding", "generic-array", ] @@ -4970,7 +4679,7 @@ dependencies = [ "indoc", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -4981,9 +4690,9 @@ checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" [[package]] name = "inventory" -version = "0.3.21" +version = "0.3.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc61209c082fbeb19919bee74b176221b27223e27b65d781eb91af24eb1fb46e" +checksum = "009ae045c87e7082cb72dab0ccd01ae075dd00141ddc108f43a0ea150a9e7227" dependencies = [ "rustversion", ] @@ -5012,7 +4721,7 @@ checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" dependencies = [ "hermit-abi", "libc", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -5083,9 +4792,9 @@ dependencies = [ [[package]] name = "jiff" -version = "0.2.19" +version = "0.2.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d89a5b5e10d5a9ad6e5d1f4bd58225f655d6fe9767575a5e8ac5a6fe64e04495" +checksum = "819b44bc7c87d9117eb522f14d46e918add69ff12713c475946b0a29363ed1c2" dependencies = [ "jiff-static", "jiff-tzdb-platform", @@ -5093,18 +4802,18 @@ dependencies = [ "portable-atomic", "portable-atomic-util", "serde_core", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] name = "jiff-static" -version = "0.2.19" +version = "0.2.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff7a39c8862fc1369215ccf0a8f12dd4598c7f6484704359f0351bd617034dbf" +checksum = "470252db18ecc35fd766c0891b1e3ec6cbbcd62507e85276c01bf75d8e94d4a1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -5158,9 +4867,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.85" +version = "0.3.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3" +checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c" dependencies = [ "once_cell", "wasm-bindgen", @@ -5186,21 +4895,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "jsonwebtoken" -version = "9.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a87cc7a48537badeae96744432de36f4be2b4a34a05a5ef32e9dd8a1c169dde" -dependencies = [ - "base64", - "js-sys", - "pem", - "ring", - "serde", - "serde_json", - "simple_asn1", -] - [[package]] name = "kanal" version = "0.1.1" @@ -5230,9 +4924,9 @@ checksum = "bf36173d4167ed999940f804952e6b08197cae5ad5d572eb4db150ce8ad5d58f" [[package]] name = "lance" -version = "2.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47b685aca3f97ee02997c83ded16f59c747ccb69e74c8abbbae4aa3d22cf1301" +checksum = "2b7f07b905df393a5554eba19055c620f9ea25a3e40a013bda4bd8dc4ca66f01" dependencies = [ "arrow", "arrow-arith", @@ -5246,7 +4940,6 @@ dependencies = [ "async-recursion", "async-trait", "async_cell", - "aws-credential-types", "byteorder", "bytes", "chrono", @@ -5296,9 +4989,9 @@ dependencies = [ [[package]] name = "lance-arrow" -version = "2.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daf00c7537df524cc518a089f0d156a036d95ca3f5bc2bc1f0a9f9293e9b62ef" +checksum = "100e076cb81c8f0c24cd2881c706fc53e037c7d6e81eb320e929e265d157effb" dependencies = [ "arrow-array", "arrow-buffer", @@ -5324,7 +5017,6 @@ dependencies = [ "async-trait", "clap", "futures", - "get_dir", "lance", "lance-encoding", "parquet", @@ -5336,9 +5028,9 @@ dependencies = [ [[package]] name = "lance-bitpacking" -version = "2.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46752e4ac8fc5590a445e780b63a8800adc7a770bd74770a8dc66963778e4e77" +checksum = "588318d3d1ba0f97162fab39a323a0a49866bb35b32af42572c6b6a12296fa27" dependencies = [ "arrayref", "paste", @@ -5347,9 +5039,9 @@ dependencies = [ [[package]] name = "lance-core" -version = "2.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d13d87d07305c6d4b4dc7780fb1107babf782a0e5b1dc7872e17ae1f8fd11ca" +checksum = "6fa01d1cf490ccfd3b8eaeee2781415d0419e6be8366040e57e43677abf2644e" dependencies = [ "arrow-array", "arrow-buffer", @@ -5386,9 +5078,9 @@ dependencies = [ [[package]] name = "lance-datafusion" -version = "2.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6451b5af876eaef8bec4b38a39dadac9d44621e1ecf85d0cdf6097a5d0aa8721" +checksum = "ef89a39e3284eef76f79e63f23de8881a0583ad6feb20ed39f47eadd847a2b88" dependencies = [ "arrow", "arrow-array", @@ -5418,9 +5110,9 @@ dependencies = [ [[package]] name = "lance-datagen" -version = "2.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1736708dd7867dfbab8fcc930b21c96717c6c00be73b7d9a240336a4ed80375" +checksum = "fc2a60eef5c47e65d91e2ffa8e7e1629c52e7190c8b88a371a1a60601dc49371" dependencies = [ "arrow", "arrow-array", @@ -5438,9 +5130,9 @@ dependencies = [ [[package]] name = "lance-encoding" -version = "2.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6b6ca4ff94833240d5ba4a94a742cba786d1949b3c3fa7e11d6f0050443432a" +checksum = "95ce4a6631308aa681b2671af8f2a845ff781f8d4e755a2a7ccd012379467094" dependencies = [ "arrow-arith", "arrow-array", @@ -5477,9 +5169,9 @@ dependencies = [ [[package]] name = "lance-file" -version = "2.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55fbe959bffe185543aed3cbeb14484f1aa2e55886034fdb1ea3d8cc9b70aad8" +checksum = "e2d4d82357cbfaa1a18494226c15b1cb3c8ed0b6c84b91146323c82047ede419" dependencies = [ "arrow-arith", "arrow-array", @@ -5511,9 +5203,9 @@ dependencies = [ [[package]] name = "lance-geo" -version = "2.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a52b0adabc953d457f336a784a3b37353a180e6a79905f544949746e0d4c6483" +checksum = "a7183fc870da62826f0f97df8007b634da053eb310157856efe1dc74f446951c" dependencies = [ "datafusion 51.0.0", "geo-traits", @@ -5527,9 +5219,9 @@ dependencies = [ [[package]] name = "lance-index" -version = "2.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b67654bf86fd942dd2cf08294ee7e91053427cd148225f49c9ff398ff9a40fd" +checksum = "20e9c5aa7024a63af9ae89ee8c0f23c8421b7896742e5cd4a271a60f9956cb80" dependencies = [ "arrow", "arrow-arith", @@ -5596,9 +5288,9 @@ dependencies = [ [[package]] name = "lance-io" -version = "2.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eb0ccc1c414e31687d83992d546af0a0237c8d2f4bf2ae3d347d539fd0fc141" +checksum = "c7d2af0b17fb374a8181bcf1a10bce5703ae3ee4373c1587ce4bba23e15e45c8" dependencies = [ "arrow", "arrow-arith", @@ -5610,8 +5302,6 @@ dependencies = [ "arrow-select", "async-recursion", "async-trait", - "aws-config", - "aws-credential-types", "byteorder", "bytes", "chrono", @@ -5622,8 +5312,6 @@ dependencies = [ "lance-namespace", "log", "object_store", - "object_store_opendal", - "opendal", "path_abs", "pin-project", "prost 0.14.3", @@ -5638,9 +5326,9 @@ dependencies = [ [[package]] name = "lance-linalg" -version = "2.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "083404cf12dcdb1a7df98fb58f9daf626b6e43a2f794b37b6b89b4012a0e1f78" +checksum = "5125aa62696e75a7475807564b4921f252d8815be606b84bc00e6def0f5c24bb" dependencies = [ "arrow-array", "arrow-buffer", @@ -5656,9 +5344,9 @@ dependencies = [ [[package]] name = "lance-namespace" -version = "2.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c12778d2aabf9c2bfd16e2509ebe120e562a288d8ae630ec6b6b204868df41b2" +checksum = "70545c2676ce954dfd801da5c6a631a70bba967826cd3a8f31b47d1f04bbfed3" dependencies = [ "arrow", "async-trait", @@ -5683,9 +5371,9 @@ dependencies = [ [[package]] name = "lance-table" -version = "2.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0fcc83f197ce2000c4abe4f5e0873490ab1f41788fa76571c4209b87d4daf50" +checksum = "b06ad37bd90045de8ef533df170c6098e6ff6ecb427aade47d7db8e2c86f2678" dependencies = [ "arrow", "arrow-array", @@ -5725,9 +5413,12 @@ name = "lazy_static" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" -dependencies = [ - "spin", -] + +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" [[package]] name = "lending-iterator" @@ -5825,15 +5516,15 @@ checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" [[package]] name = "libc" -version = "0.2.180" +version = "0.2.182" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" +checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" [[package]] name = "libfuzzer-sys" -version = "0.4.10" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5037190e1f70cbeef565bd267599242926f724d3b8a9f510fd7e0b540cfa4404" +checksum = "f12a681b7dd8ce12bff52488013ba614b869148d54dd79836ab85aafdd53f08d" dependencies = [ "arbitrary", "cc", @@ -5861,9 +5552,9 @@ dependencies = [ [[package]] name = "liblzma" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73c36d08cad03a3fbe2c4e7bb3a9e84c57e4ee4135ed0b065cade3d98480c648" +checksum = "b6033b77c21d1f56deeae8014eb9fbe7bdf1765185a6c508b5ca82eeaed7f899" dependencies = [ "liblzma-sys", ] @@ -5901,9 +5592,21 @@ version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "libc", - "redox_syscall 0.7.0", + "redox_syscall 0.7.2", +] + +[[package]] +name = "libtest-mimic" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5297962ef19edda4ce33aaa484386e0a5b3d7f2f4e037cbeee00503ef6b29d33" +dependencies = [ + "anstream", + "anstyle", + "clap", + "escape8259", ] [[package]] @@ -5912,7 +5615,7 @@ version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f4de44e98ddbf09375cbf4d17714d18f39195f4f4894e8524501726fd9a8a4a" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", ] [[package]] @@ -5932,9 +5635,9 @@ checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" [[package]] name = "linux-raw-sys" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" [[package]] name = "litemap" @@ -6113,15 +5816,15 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.6" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "memmap2" -version = "0.9.9" +version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "744133e4a0e0a658e1374cf3bf8e415c4052a15a111acd372764c55b4177d490" +checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" dependencies = [ "libc", ] @@ -6202,9 +5905,9 @@ checksum = "dce6dd36094cac388f119d2e9dc82dc730ef91c32a6222170d630e5414b956e6" [[package]] name = "moka" -version = "0.12.13" +version = "0.12.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4ac832c50ced444ef6be0767a008b02c106a909ba79d1d830501e94b96f6b7e" +checksum = "85f8024e1c8e71c778968af91d43700ce1d11b219d127d79fb2934153b82b42b" dependencies = [ "async-lock", "crossbeam-channel", @@ -6244,7 +5947,7 @@ checksum = "b093064383341eb3271f42e381cb8f10a01459478446953953c75d24bd339fc0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", "target-features", ] @@ -6256,17 +5959,17 @@ checksum = "2195bf6aa996a481483b29d62a7663eed3fe39600c460e323f8ff41e90bdd89b" [[package]] name = "native-tls" -version = "0.2.14" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e" +checksum = "465500e14ea162429d264d44189adc38b199b62b1c21eea9f69e4b73cb03bbf2" dependencies = [ "libc", "log", "openssl", - "openssl-probe 0.1.6", + "openssl-probe", "openssl-sys", "schannel", - "security-framework 2.11.1", + "security-framework", "security-framework-sys", "tempfile", ] @@ -6298,7 +6001,7 @@ version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "cfg-if", "cfg_aliases", "libc", @@ -6311,7 +6014,7 @@ version = "0.30.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "cfg-if", "cfg_aliases", "libc", @@ -6428,9 +6131,9 @@ dependencies = [ [[package]] name = "ntapi" -version = "0.4.2" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c70f219e21142367c70c0b30c6a9e3a14d55b4d12a204d897fbec83a0363f081" +checksum = "c3b335231dfd352ffb0f8017f3b6027a4917f7df785ea2143d8af2adc66980ae" dependencies = [ "winapi", ] @@ -6441,7 +6144,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -6452,22 +6155,7 @@ checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" dependencies = [ "num-integer", "num-traits", -] - -[[package]] -name = "num-bigint-dig" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e661dda6640fad38e827a6d4a310ff4763082116fe217f279885c97f511bb0b7" -dependencies = [ - "lazy_static", - "libm", - "num-integer", - "num-iter", - "num-traits", - "rand 0.8.5", - "smallvec", - "zeroize", + "serde", ] [[package]] @@ -6493,7 +6181,7 @@ checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -6505,17 +6193,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "num-iter" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - [[package]] name = "num-traits" version = "0.2.19" @@ -6555,7 +6232,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -6573,7 +6250,7 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", ] [[package]] @@ -6586,6 +6263,15 @@ dependencies = [ "objc2-core-foundation", ] +[[package]] +name = "object" +version = "0.37.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" +dependencies = [ + "memchr", +] + [[package]] name = "object_store" version = "0.12.5" @@ -6598,7 +6284,7 @@ dependencies = [ "chrono", "form_urlencoded", "futures", - "http 1.4.0", + "http", "http-body-util", "httparse", "humantime", @@ -6607,7 +6293,7 @@ dependencies = [ "md-5", "parking_lot", "percent-encoding", - "quick-xml 0.38.4", + "quick-xml", "rand 0.9.2", "reqwest", "ring", @@ -6624,22 +6310,6 @@ dependencies = [ "web-time", ] -[[package]] -name = "object_store_opendal" -version = "0.55.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "113ab0769e972eee585e57407b98de08bda5354fa28e8ba4d89038d6cb6a8991" -dependencies = [ - "async-trait", - "bytes", - "chrono", - "futures", - "object_store", - "opendal", - "pin-project", - "tokio", -] - [[package]] name = "once_cell" version = "1.21.3" @@ -6667,43 +6337,13 @@ version = "11.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" -[[package]] -name = "opendal" -version = "0.55.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d075ab8a203a6ab4bc1bce0a4b9fe486a72bf8b939037f4b78d95386384bc80a" -dependencies = [ - "anyhow", - "backon", - "base64", - "bytes", - "crc32c", - "futures", - "getrandom 0.2.17", - "http 1.4.0", - "http-body 1.0.1", - "jiff", - "log", - "md-5", - "percent-encoding", - "quick-xml 0.38.4", - "reqsign", - "reqwest", - "serde", - "serde_json", - "sha2", - "tokio", - "url", - "uuid", -] - [[package]] name = "openssl" version = "0.10.75" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "cfg-if", "foreign-types", "libc", @@ -6720,15 +6360,9 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] -[[package]] -name = "openssl-probe" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" - [[package]] name = "openssl-probe" version = "0.2.1" @@ -6769,7 +6403,7 @@ checksum = "d7a6d09a73194e6b66df7c8f1b680f156d916a1a942abf2de06823dd02b7855d" dependencies = [ "async-trait", "bytes", - "http 1.4.0", + "http", "opentelemetry", "reqwest", ] @@ -6780,7 +6414,7 @@ version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a2366db2dca4d2ad033cad11e6ee42844fd727007af5ad04a1730f4cb8163bf" dependencies = [ - "http 1.4.0", + "http", "opentelemetry", "opentelemetry-http", "opentelemetry-proto", @@ -6852,22 +6486,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "ordered-multimap" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49203cdcae0030493bad186b28da2fa25645fa276a51b6fec8010d281e02ef79" -dependencies = [ - "dlv-list", - "hashbrown 0.14.5", -] - -[[package]] -name = "outref" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" - [[package]] name = "ownedbytes" version = "0.9.0" @@ -6877,6 +6495,12 @@ dependencies = [ "stable_deref_trait", ] +[[package]] +name = "owo-colors" +version = "4.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d211803b9b6b570f68772237e415a029d5a50c65d382910b879fb19d3271f94d" + [[package]] name = "papergrid" version = "0.17.0" @@ -6885,7 +6509,7 @@ checksum = "6978128c8b51d8f4080631ceb2302ab51e32cc6e8615f735ee2f83fd269ae3f1" dependencies = [ "bytecount", "fnv", - "unicode-width", + "unicode-width 0.2.2", ] [[package]] @@ -6913,7 +6537,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -6947,9 +6571,9 @@ dependencies = [ [[package]] name = "parquet" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f6a2926a30477c0b95fea6c28c3072712b139337a242c2cc64817bdc20a8854" +checksum = "6ee96b29972a257b855ff2341b37e61af5f12d6af1158b6dcdb5b31ea07bb3cb" dependencies = [ "ahash 0.8.12", "arrow-array", @@ -7001,44 +6625,62 @@ dependencies = [ ] [[package]] -name = "pbkdf2" -version = "0.12.2" +name = "pbjson" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8ed6a7761f76e3b9f92dfb0a60a6a6477c61024b775147ff0973a02653abaf2" +checksum = "898bac3fa00d0ba57a4e8289837e965baa2dee8c3749f3b11d45a64b4223d9c3" dependencies = [ - "digest", - "hmac", + "base64", + "serde", ] [[package]] -name = "pco" -version = "0.4.9" +name = "pbjson-build" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42382de9fb564e2d10cb4d5ca97cc06d928f0f9667bbef456b57e60827b6548b" +checksum = "af22d08a625a2213a78dbb0ffa253318c5c79ce3133d32d296655a7bdfb02095" dependencies = [ - "better_io", - "dtype_dispatch", - "half", - "rand_xoshiro 0.6.0", + "heck", + "itertools 0.14.0", + "prost 0.14.3", + "prost-types", ] [[package]] -name = "pem" -version = "3.0.6" +name = "pbjson-types" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d30c53c26bc5b31a98cd02d20f25a7c8567146caf63ed593a9d87b2775291be" +checksum = "8e748e28374f10a330ee3bb9f29b828c0ac79831a32bab65015ad9b661ead526" dependencies = [ - "base64", - "serde_core", + "bytes", + "chrono", + "pbjson", + "pbjson-build", + "prost 0.14.3", + "prost-build", + "serde", ] [[package]] -name = "pem-rfc7468" -version = "0.7.0" +name = "pbkdf2" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ed6a7761f76e3b9f92dfb0a60a6a6477c61024b775147ff0973a02653abaf2" +dependencies = [ + "digest", + "hmac", +] + +[[package]] +name = "pco" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" +checksum = "e89d71ab3c07ed898defa4915bdc2a963131d811a1eab0eeacfac65c94cdeae8" dependencies = [ - "base64ct", + "better_io", + "dtype_dispatch", + "half", + "rand_xoshiro 0.6.0", ] [[package]] @@ -7055,9 +6697,9 @@ checksum = "df202b0b0f5b8e389955afd5f27b007b00fb948162953f1db9c70d2c7e3157d7" [[package]] name = "pest" -version = "2.8.5" +version = "2.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9eb05c21a464ea704b53158d358a31e6425db2f63a1a7312268b05fe2b75f7" +checksum = "e0848c601009d37dfa3430c4666e147e49cdcf1b92ecd3e63657d8a5f19da662" dependencies = [ "memchr", "ucd-trie", @@ -7065,9 +6707,9 @@ dependencies = [ [[package]] name = "pest_derive" -version = "2.8.5" +version = "2.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68f9dbced329c441fa79d80472764b1a2c7e57123553b8519b36663a2fb234ed" +checksum = "11f486f1ea21e6c10ed15d5a7c77165d0ee443402f0780849d1768e7d9d6fe77" dependencies = [ "pest", "pest_generator", @@ -7075,22 +6717,22 @@ dependencies = [ [[package]] name = "pest_generator" -version = "2.8.5" +version = "2.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3bb96d5051a78f44f43c8f712d8e810adb0ebf923fc9ed2655a7f66f63ba8ee5" +checksum = "8040c4647b13b210a963c1ed407c1ff4fdfa01c31d6d2a098218702e6664f94f" dependencies = [ "pest", "pest_meta", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "pest_meta" -version = "2.8.5" +version = "2.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "602113b5b5e8621770cfd490cfd90b9f84ab29bd2b0e49ad83eb6d186cef2365" +checksum = "89815c69d36021a140146f26659a81d6c2afa33d216d736dd4be5381a7362220" dependencies = [ "pest", "sha2", @@ -7157,7 +6799,7 @@ dependencies = [ "phf_shared 0.11.3", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -7195,14 +6837,14 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] name = "pin-project-lite" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" [[package]] name = "pin-utils" @@ -7232,44 +6874,6 @@ dependencies = [ "futures-io", ] -[[package]] -name = "pkcs1" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" -dependencies = [ - "der", - "pkcs8", - "spki", -] - -[[package]] -name = "pkcs5" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e847e2c91a18bfa887dd028ec33f2fe6f25db77db3619024764914affe8b69a6" -dependencies = [ - "aes", - "cbc", - "der", - "pbkdf2", - "scrypt", - "sha2", - "spki", -] - -[[package]] -name = "pkcs8" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" -dependencies = [ - "der", - "pkcs5", - "rand_core 0.6.4", - "spki", -] - [[package]] name = "pkg-config" version = "0.3.32" @@ -7314,7 +6918,7 @@ dependencies = [ "concurrent-queue", "hermit-abi", "pin-project-lite", - "rustix 1.1.3", + "rustix 1.1.4", "windows-sys 0.61.2", ] @@ -7376,7 +6980,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -7418,7 +7022,7 @@ dependencies = [ "proc-macro-error-attr2", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -7465,7 +7069,7 @@ dependencies = [ "prost 0.14.3", "prost-types", "regex", - "syn 2.0.114", + "syn 2.0.117", "tempfile", ] @@ -7479,7 +7083,7 @@ dependencies = [ "itertools 0.12.1", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -7492,7 +7096,7 @@ dependencies = [ "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -7504,6 +7108,16 @@ dependencies = [ "prost 0.14.3", ] +[[package]] +name = "psm" +version = "0.1.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3852766467df634d74f0b2d7819bf8dc483a0eb2e3b0f50f756f9cfe8b0d18d8" +dependencies = [ + "ar_archive_writer", + "cc", +] + [[package]] name = "ptr_meta" version = "0.1.4" @@ -7524,6 +7138,20 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "public-api" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4597eaaa646a5df48e5f37924b58bd4b2e2a3746d4665195d03538846784858" +dependencies = [ + "hashbag", + "rustdoc-types", + "serde", + "serde_json", + "snapshot-testing", + "thiserror 2.0.18", +] + [[package]] name = "pyo3" version = "0.27.2" @@ -7587,9 +7215,9 @@ dependencies = [ [[package]] name = "pyo3-log" -version = "0.13.2" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f8bae9ad5ba08b0b0ed2bb9c2bdbaeccc69cafca96d78cf0fbcea0d45d122bb" +checksum = "26c2ec80932c5c3b2d4fbc578c9b56b2d4502098587edb8bef5b6bfcad43682e" dependencies = [ "arc-swap", "log", @@ -7605,7 +7233,7 @@ dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -7618,7 +7246,7 @@ dependencies = [ "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -7631,7 +7259,7 @@ dependencies = [ "bytes", "chrono", "futures", - "http 1.4.0", + "http", "humantime", "itertools 0.14.0", "object_store", @@ -7645,14 +7273,10 @@ dependencies = [ ] [[package]] -name = "quick-xml" -version = "0.37.5" +name = "quad-rand" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb" -dependencies = [ - "memchr", - "serde", -] +checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40" [[package]] name = "quick-xml" @@ -7675,7 +7299,7 @@ dependencies = [ "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash 2.1.1", + "rustc-hash", "rustls", "socket2 0.6.2", "thiserror 2.0.18", @@ -7695,7 +7319,7 @@ dependencies = [ "lru-slab", "rand 0.9.2", "ring", - "rustc-hash 2.1.1", + "rustc-hash", "rustls", "rustls-pki-types", "slab", @@ -7842,14 +7466,12 @@ name = "random-access-bench" version = "0.1.0" dependencies = [ "anyhow", - "async-trait", "clap", "indicatif", "lance-bench", "rand 0.9.2", "rand_distr 0.5.1", "tokio", - "vortex", "vortex-bench", ] @@ -7892,7 +7514,7 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ef8dea09a92caaf73bff7adb70b76162e5937524058a7e5bff37869cbbec293" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "compact_str", "hashbrown 0.16.1", "indoc", @@ -7903,7 +7525,7 @@ dependencies = [ "thiserror 2.0.18", "unicode-segmentation", "unicode-truncate", - "unicode-width", + "unicode-width 0.2.2", ] [[package]] @@ -7944,7 +7566,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7dbfa023cd4e604c2553483820c5fe8aa9d71a42eea5aa77c6e7f35756612db" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "hashbrown 0.16.1", "indoc", "instability", @@ -7954,7 +7576,7 @@ dependencies = [ "strum 0.27.2", "time", "unicode-segmentation", - "unicode-width", + "unicode-width 0.2.2", ] [[package]] @@ -7983,22 +7605,42 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "recursive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" +dependencies = [ + "recursive-proc-macro-impl", + "stacker", +] + +[[package]] +name = "recursive-proc-macro-impl" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" +dependencies = [ + "quote", + "syn 2.0.117", +] + [[package]] name = "redox_syscall" version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", ] [[package]] name = "redox_syscall" -version = "0.7.0" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f3fe0889e69e2ae9e41f4d6c4c0181701d00e4697b356fb1f74173a5e0ee27" +checksum = "6d94dd2f7cd932d4dc02cc8b2b50dfd38bd079a4e5d79198b99743d7fcf9a4b4" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", ] [[package]] @@ -8029,7 +7671,7 @@ checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -8046,9 +7688,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" dependencies = [ "aho-corasick", "memchr", @@ -8057,15 +7699,25 @@ dependencies = [ [[package]] name = "regex-lite" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d942b98df5e658f56f20d592c7f868833fe38115e65c33003d8cd224b0155da" +checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973" [[package]] name = "regex-syntax" -version = "0.8.8" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + +[[package]] +name = "regress" +version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +checksum = "2057b2325e68a893284d1538021ab90279adac1139957ca2a74426c6f118fb48" +dependencies = [ + "hashbrown 0.16.1", + "memchr", +] [[package]] name = "relative-path" @@ -8082,38 +7734,6 @@ dependencies = [ "bytecheck", ] -[[package]] -name = "reqsign" -version = "0.16.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43451dbf3590a7590684c25fb8d12ecdcc90ed3ac123433e500447c7d77ed701" -dependencies = [ - "anyhow", - "async-trait", - "base64", - "chrono", - "form_urlencoded", - "getrandom 0.2.17", - "hex", - "hmac", - "home", - "http 1.4.0", - "jsonwebtoken", - "log", - "once_cell", - "percent-encoding", - "quick-xml 0.37.5", - "rand 0.8.5", - "reqwest", - "rsa", - "rust-ini", - "serde", - "serde_json", - "sha1", - "sha2", - "tokio", -] - [[package]] name = "reqwest" version = "0.12.28" @@ -8127,8 +7747,8 @@ dependencies = [ "futures-core", "futures-util", "h2", - "http 1.4.0", - "http-body 1.0.1", + "http", + "http-body", "http-body-util", "hyper", "hyper-rustls", @@ -8233,27 +7853,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4e27ee8bb91ca0adcf0ecb116293afa12d393f9c2b9b9cd54d33e8078fe19839" -[[package]] -name = "rsa" -version = "0.9.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8573f03f5883dcaebdfcf4725caa1ecb9c15b2ef50c43a07b816e06799bb12d" -dependencies = [ - "const-oid", - "digest", - "num-bigint-dig", - "num-integer", - "num-traits", - "pkcs1", - "pkcs8", - "rand_core 0.6.4", - "sha2", - "signature", - "spki", - "subtle", - "zeroize", -] - [[package]] name = "rstar" version = "0.12.2" @@ -8290,7 +7889,7 @@ dependencies = [ "regex", "relative-path", "rustc_version", - "syn 2.0.114", + "syn 2.0.117", "unicode-ident", ] @@ -8302,17 +7901,7 @@ checksum = "b3a8fb4672e840a587a66fc577a5491375df51ddb88f2a2c2a792598c326fe14" dependencies = [ "quote", "rand 0.8.5", - "syn 2.0.114", -] - -[[package]] -name = "rust-ini" -version = "0.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "796e8d2b6696392a43bea58116b667fb4c29727dc5abd27d6acf338bb4f688c7" -dependencies = [ - "cfg-if", - "ordered-multimap", + "syn 2.0.117", ] [[package]] @@ -8338,14 +7927,8 @@ dependencies = [ "rand 0.8.5", "rkyv", "serde", - "serde_json", -] - -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + "serde_json", +] [[package]] name = "rustc-hash" @@ -8368,13 +7951,23 @@ dependencies = [ "semver", ] +[[package]] +name = "rustdoc-types" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27bf787c529efe523ed9eb6dcdbaa5954d34329f08d5c243fce928441826ca90" +dependencies = [ + "serde", + "serde_derive", +] + [[package]] name = "rustix" version = "0.38.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "errno", "libc", "linux-raw-sys 0.4.15", @@ -8383,24 +7976,23 @@ dependencies = [ [[package]] name = "rustix" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "errno", "libc", - "linux-raw-sys 0.11.0", - "windows-sys 0.61.2", + "linux-raw-sys 0.12.1", + "windows-sys 0.52.0", ] [[package]] name = "rustls" -version = "0.23.36" +version = "0.23.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b" +checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4" dependencies = [ - "aws-lc-rs", "once_cell", "ring", "rustls-pki-types", @@ -8415,10 +8007,10 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" dependencies = [ - "openssl-probe 0.2.1", + "openssl-probe", "rustls-pki-types", "schannel", - "security-framework 3.5.1", + "security-framework", ] [[package]] @@ -8446,7 +8038,6 @@ version = "0.103.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53" dependencies = [ - "aws-lc-rs", "ring", "rustls-pki-types", "untrusted", @@ -8460,26 +8051,26 @@ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "ryu" -version = "1.0.22" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" [[package]] -name = "salsa20" -version = "0.10.2" +name = "same-file" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97a22f5af31f73a954c10289c93e8a50cc23d971e80ee446f1f6f7137a088213" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" dependencies = [ - "cipher", + "winapi-util", ] [[package]] -name = "same-file" -version = "1.0.6" +name = "scc" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +checksum = "46e6f046b7fef48e2660c57ed794263155d713de679057f2d0c169bfc6e756cc" dependencies = [ - "winapi-util", + "sdd", ] [[package]] @@ -8491,6 +8082,18 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "schemars" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" +dependencies = [ + "dyn-clone", + "schemars_derive", + "serde", + "serde_json", +] + [[package]] name = "schemars" version = "1.2.1" @@ -8503,6 +8106,18 @@ dependencies = [ "serde_json", ] +[[package]] +name = "schemars_derive" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e265784ad618884abaea0600a9adf15393368d840e0222d101a072f3f7534d" +dependencies = [ + "proc-macro2", + "quote", + "serde_derive_internals", + "syn 2.0.117", +] + [[package]] name = "scoped-tls" version = "1.0.1" @@ -8522,15 +8137,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d68f2ec51b097e4c1a75b681a8bec621909b5e91f15bb7b840c4f2f7b01148b2" [[package]] -name = "scrypt" -version = "0.11.0" +name = "sdd" +version = "3.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0516a385866c09368f0b5bcd1caff3366aace790fcd46e2bb032697bb172fd1f" -dependencies = [ - "pbkdf2", - "salsa20", - "sha2", -] +checksum = "490dcfcbfef26be6800d11870ff2df8774fa6e86d047e3e8c8a76b25655e41ca" [[package]] name = "seahash" @@ -8540,24 +8150,11 @@ checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" [[package]] name = "security-framework" -version = "2.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" -dependencies = [ - "bitflags 2.10.0", - "core-foundation 0.9.4", - "core-foundation-sys", - "libc", - "security-framework-sys", -] - -[[package]] -name = "security-framework" -version = "3.5.1" +version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef" +checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "core-foundation 0.10.1", "core-foundation-sys", "libc", @@ -8566,9 +8163,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.15.0" +version = "2.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc1f0cbffaac4852523ce30d8bd3c5cdc873501d96ff467ca09b6767bb8cd5c0" +checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3" dependencies = [ "core-foundation-sys", "libc", @@ -8579,6 +8176,10 @@ name = "semver" version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" +dependencies = [ + "serde", + "serde_core", +] [[package]] name = "seq-macro" @@ -8596,6 +8197,16 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde_bytes" +version = "0.11.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5d440709e79d88e51ac01c4b72fc6cb7314017bb7da9eeff678aa94c10e3ea8" +dependencies = [ + "serde", + "serde_core", +] + [[package]] name = "serde_core" version = "1.0.228" @@ -8613,7 +8224,18 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", +] + +[[package]] +name = "serde_derive_internals" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", ] [[package]] @@ -8637,7 +8259,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -8658,6 +8280,18 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_tokenstream" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c49585c52c01f13c5c2ebb333f14f6885d76daa768d8a037d28017ec538c69" +dependencies = [ + "proc-macro2", + "quote", + "serde", + "syn 2.0.117", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -8670,6 +8304,45 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + +[[package]] +name = "serial_test" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "911bd979bf1070a3f3aa7b691a3b3e9968f339ceeec89e08c280a8a22207a32f" +dependencies = [ + "futures-executor", + "futures-util", + "log", + "once_cell", + "parking_lot", + "scc", + "serial_test_derive", +] + +[[package]] +name = "serial_test_derive" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a7d91949b85b0d2fb687445e448b40d322b6b3e4af6b44a29b21d9a5f33e6d9" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "sha1" version = "0.10.6" @@ -8703,9 +8376,9 @@ dependencies = [ [[package]] name = "shellexpand" -version = "3.1.1" +version = "3.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b1fdf65dd6331831494dd616b30351c38e96e45921a27745cf98490458b90bb" +checksum = "32824fab5e16e6c4d86dc1ba84489390419a39f97699852b66480bb87d297ed8" dependencies = [ "dirs", ] @@ -8747,16 +8420,6 @@ dependencies = [ "libc", ] -[[package]] -name = "signature" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" -dependencies = [ - "digest", - "rand_core 0.6.4", -] - [[package]] name = "simd-adler32" version = "0.3.8" @@ -8789,18 +8452,6 @@ dependencies = [ "similar", ] -[[package]] -name = "simple_asn1" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "297f631f50729c8c99b84667867963997ec0b50f32b2a7dbcab828ef0541e8bb" -dependencies = [ - "num-bigint", - "num-traits", - "thiserror 2.0.18", - "time", -] - [[package]] name = "siphasher" version = "1.0.2" @@ -8809,9 +8460,9 @@ checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" [[package]] name = "sketches-ddsketch" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1e9a774a6c28142ac54bb25d25562e6bcf957493a184f15ad4eebccb23e410a" +checksum = "0c6f73aeb92d671e0cc4dca167e59b2deb6387c375391bc99ee743f326994a2b" dependencies = [ "serde", ] @@ -8872,7 +8523,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -8881,6 +8532,16 @@ version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" +[[package]] +name = "snapshot-testing" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bebf2194b9611339d00b28260cf6bd640073c60179ce7dd1e47badef1eb606e7" +dependencies = [ + "console 0.15.11", + "similar-asserts", +] + [[package]] name = "socket2" version = "0.4.10" @@ -8914,19 +8575,28 @@ dependencies = [ ] [[package]] -name = "spin" -version = "0.9.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" - -[[package]] -name = "spki" -version = "0.7.3" +name = "sqllogictest" +version = "0.28.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" +checksum = "3566426f72a13e393aa34ca3d542c5b0eb86da4c0db137ee9b5cfccc6179e52d" dependencies = [ - "base64ct", - "der", + "async-trait", + "educe", + "fs-err", + "futures", + "glob", + "humantime", + "itertools 0.13.0", + "libtest-mimic", + "md-5", + "owo-colors", + "rand 0.8.5", + "regex", + "similar", + "subst", + "tempfile", + "thiserror 2.0.18", + "tracing", ] [[package]] @@ -8936,6 +8606,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" dependencies = [ "log", + "recursive", "sqlparser_derive", ] @@ -8947,7 +8618,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -8956,6 +8627,19 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" +[[package]] +name = "stacker" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d74a23609d509411d10e2176dc2a4346e3b4aea2e7b1869f19fdedbc71c013" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys 0.59.0", +] + [[package]] name = "static_assertions" version = "1.1.0" @@ -9018,7 +8702,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -9030,7 +8714,42 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", +] + +[[package]] +name = "subst" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a9a86e5144f63c2d18334698269a8bfae6eece345c70b64821ea5b35054ec99" +dependencies = [ + "memchr", + "unicode-width 0.1.14", +] + +[[package]] +name = "substrait" +version = "0.62.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62fc4b483a129b9772ccb9c3f7945a472112fdd9140da87f8a4e7f1d44e045d0" +dependencies = [ + "heck", + "pbjson", + "pbjson-build", + "pbjson-types", + "prettyplease", + "prost 0.14.3", + "prost-build", + "prost-types", + "regress", + "schemars 0.8.22", + "semver", + "serde", + "serde_json", + "serde_yaml", + "syn 2.0.117", + "typify", + "walkdir", ] [[package]] @@ -9052,9 +8771,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.114" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -9078,7 +8797,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -9115,7 +8834,7 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "core-foundation 0.9.4", "system-configuration-sys", ] @@ -9197,7 +8916,7 @@ dependencies = [ "rayon", "regex", "rust-stemmers", - "rustc-hash 2.1.1", + "rustc-hash", "serde", "serde_json", "sketches-ddsketch", @@ -9335,21 +9054,30 @@ checksum = "c1bbb9f3c5c463a01705937a24fdabc5047929ac764b2d5b9cf681c1f5041ed5" [[package]] name = "target-lexicon" -version = "0.13.4" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1dd07eb858a2067e2f3c7155d54e929265c264e6f37efe3ee7a8d1b5a1dd0ba" +checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca" + +[[package]] +name = "temp-env" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96374855068f47402c3121c6eed88d29cb1de8f3ab27090e273e420bdabcf050" +dependencies = [ + "parking_lot", +] [[package]] name = "tempfile" -version = "3.24.0" +version = "3.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" +checksum = "82a72c767771b47409d2345987fda8628641887d5466101319899796367354a0" dependencies = [ "fastrand", "getrandom 0.3.4", "once_cell", - "rustix 1.1.3", - "windows-sys 0.61.2", + "rustix 1.1.4", + "windows-sys 0.52.0", ] [[package]] @@ -9367,7 +9095,7 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60b8cb979cb11c32ce1603f8137b22262a9d131aaa5c37b5678025f22b8becd0" dependencies = [ - "rustix 1.1.3", + "rustix 1.1.4", "windows-sys 0.60.2", ] @@ -9406,7 +9134,7 @@ checksum = "4676b37242ccbd1aabf56edb093a4827dc49086c0ffd764a5705899e0f35f8f7" dependencies = [ "anyhow", "base64", - "bitflags 2.10.0", + "bitflags 2.11.0", "fancy-regex", "filedescriptor", "finl_unicode", @@ -9455,7 +9183,7 @@ dependencies = [ "quote", "regex", "reqwest", - "syn 2.0.114", + "syn 2.0.117", "sysinfo 0.35.2", "uzers", "which", @@ -9467,7 +9195,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0f8daae29995a24f65619e19d8d31dea5b389f3d853d8bf297bbf607cd0014cc" dependencies = [ - "unicode-width", + "unicode-width 0.2.2", ] [[package]] @@ -9496,7 +9224,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -9507,7 +9235,7 @@ checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -9651,7 +9379,7 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -9700,9 +9428,9 @@ dependencies = [ [[package]] name = "toml" -version = "0.9.11+spec-1.1.0" +version = "0.9.12+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3afc9a848309fe1aaffaed6e1546a7a14de1f935dc9d89d32afd9a44bab7c46" +checksum = "cf92845e79fc2e2def6a5d828f0801e29a2f8acc037becc5ab08595c7d5e9863" dependencies = [ "indexmap", "serde_core", @@ -9736,9 +9464,9 @@ dependencies = [ [[package]] name = "toml_parser" -version = "1.0.6+spec-1.1.0" +version = "1.0.9+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44" +checksum = "702d4415e08923e7e1ef96cd5727c0dfed80b4d2fa25db9647fe5eb6f7c5a4c4" dependencies = [ "winnow", ] @@ -9751,15 +9479,15 @@ checksum = "ab16f14aed21ee8bfd8ec22513f7287cd4a91aa92e44edfe2c17ddd004e92607" [[package]] name = "tonic" -version = "0.14.3" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a286e33f82f8a1ee2df63f4fa35c0becf4a85a0cb03091a15fd7bf0b402dc94a" +checksum = "fec7c61a0695dc1887c1b53952990f3ad2e3a31453e1f49f10e75424943a93ec" dependencies = [ "async-trait", "base64", "bytes", - "http 1.4.0", - "http-body 1.0.1", + "http", + "http-body", "http-body-util", "percent-encoding", "pin-project", @@ -9772,9 +9500,9 @@ dependencies = [ [[package]] name = "tonic-prost" -version = "0.14.3" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6c55a2d6a14174563de34409c9f92ff981d006f56da9c6ecd40d9d4a31500b0" +checksum = "a55376a0bbaa4975a3f10d009ad763d8f4108f067c7c2e74f3001fb49778d309" dependencies = [ "bytes", "prost 0.14.3", @@ -9803,12 +9531,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" dependencies = [ "async-compression", - "bitflags 2.10.0", + "bitflags 2.11.0", "bytes", "futures-core", "futures-util", - "http 1.4.0", - "http-body 1.0.1", + "http", + "http-body", "http-body-util", "iri-string", "pin-project-lite", @@ -9867,7 +9595,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -9907,6 +9635,16 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "tracing-serde" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704b1aeb7be0d0a84fc9828cae51dab5970fee5088f83d1dd7ee6f6246fc6ff1" +dependencies = [ + "serde", + "tracing-core", +] + [[package]] name = "tracing-subscriber" version = "0.3.22" @@ -9917,12 +9655,15 @@ dependencies = [ "nu-ansi-term", "once_cell", "regex-automata", + "serde", + "serde_json", "sharded-slab", "smallvec", "thread_local", "tracing", "tracing-core", "tracing-log", + "tracing-serde", ] [[package]] @@ -9946,6 +9687,53 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" +[[package]] +name = "typify" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5bcc6f62eb1fa8aa4098f39b29f93dcb914e17158b76c50360911257aa629" +dependencies = [ + "typify-impl", + "typify-macro", +] + +[[package]] +name = "typify-impl" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1eb359f7ffa4f9ebe947fa11a1b2da054564502968db5f317b7e37693cb2240" +dependencies = [ + "heck", + "log", + "proc-macro2", + "quote", + "regress", + "schemars 0.8.22", + "semver", + "serde", + "serde_json", + "syn 2.0.117", + "thiserror 2.0.18", + "unicode-ident", +] + +[[package]] +name = "typify-macro" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "911c32f3c8514b048c1b228361bebb5e6d73aeec01696e8cc0e82e2ffef8ab7a" +dependencies = [ + "proc-macro2", + "quote", + "schemars 0.8.22", + "semver", + "serde", + "serde_json", + "serde_tokenstream", + "syn 2.0.117", + "typify-impl", +] + [[package]] name = "ucd-trie" version = "0.1.7" @@ -9973,9 +9761,9 @@ checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" [[package]] name = "unicode-ident" -version = "1.0.22" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "unicode-segmentation" @@ -9991,9 +9779,15 @@ checksum = "16b380a1238663e5f8a691f9039c73e1cdae598a30e9855f541d29b08b53e9a5" dependencies = [ "itertools 0.14.0", "unicode-segmentation", - "unicode-width", + "unicode-width 0.2.2", ] +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + [[package]] name = "unicode-width" version = "0.2.2" @@ -10018,6 +9812,12 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81e544489bf3d8ef66c953931f56617f423cd4b5494be343d9b9d3dda037b9a3" +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + [[package]] name = "untrusted" version = "0.9.0" @@ -10036,12 +9836,6 @@ dependencies = [ "serde", ] -[[package]] -name = "urlencoding" -version = "2.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" - [[package]] name = "utf8-ranges" version = "1.0.5" @@ -10068,12 +9862,12 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.20.0" +version = "1.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee48d38b119b0cd71fe4141b30f5ba9c7c5d9f4e7a3a8b4a674e4b6ef789976f" +checksum = "b672338555252d43fd2240c714dc444b8c6fb0a5c5335e65a07bba7742735ddb" dependencies = [ "atomic", - "getrandom 0.3.4", + "getrandom 0.4.1", "js-sys", "serde_core", "wasm-bindgen", @@ -10115,7 +9909,6 @@ dependencies = [ "arrow-array", "codspeed-divan-compat", "fastlanes", - "itertools 0.14.0", "mimalloc", "parquet", "rand 0.9.2", @@ -10129,12 +9922,8 @@ dependencies = [ "vortex-btrblocks", "vortex-buffer", "vortex-bytebool", - "vortex-compute", - "vortex-cuda", - "vortex-cuda-macros", "vortex-datetime-parts", "vortex-decimal-byte-parts", - "vortex-dtype", "vortex-error", "vortex-fastlanes", "vortex-file", @@ -10148,7 +9937,6 @@ dependencies = [ "vortex-pco", "vortex-proto", "vortex-runend", - "vortex-scalar", "vortex-scan", "vortex-sequence", "vortex-session", @@ -10168,14 +9956,12 @@ dependencies = [ "prost 0.14.3", "rand 0.9.2", "rstest", - "rustc-hash 2.1.1", + "rustc-hash", "vortex-array", "vortex-buffer", - "vortex-dtype", "vortex-error", "vortex-fastlanes", "vortex-mask", - "vortex-scalar", "vortex-session", "vortex-utils", ] @@ -10195,46 +9981,51 @@ dependencies = [ "arrow-schema", "arrow-select", "arrow-string", + "async-lock", + "bytes", "cfg-if", "codspeed-divan-compat", + "cudarc", "enum-iterator", "flatbuffers", "futures", "getrandom 0.3.4", "goldenfile", + "half", "humansize", "insta", "inventory", "itertools 0.14.0", + "jiff", "multiversion", "num-traits", "num_enum", "parking_lot", "paste", "pin-project-lite", + "primitive-types", "prost 0.14.3", "rand 0.9.2", "rand_distr 0.5.1", "rstest", "rstest_reuse", - "rustc-hash 2.1.1", + "rustc-hash", "serde", + "serde_json", + "serde_test", "simdutf8", + "static_assertions", "tabled", "termtree", "tracing", "vortex-array", "vortex-buffer", - "vortex-compute", - "vortex-dtype", "vortex-error", "vortex-flatbuffers", "vortex-mask", "vortex-proto", - "vortex-scalar", "vortex-session", "vortex-utils", - "vortex-vector", ] [[package]] @@ -10292,23 +10083,21 @@ dependencies = [ "num-traits", "pco", "rand 0.9.2", - "rustc-hash 2.1.1", + "rstest", + "rustc-hash", "test-with", "tracing", - "tracing-subscriber", "vortex-alp", "vortex-array", "vortex-buffer", "vortex-datetime-parts", "vortex-decimal-byte-parts", - "vortex-dtype", "vortex-error", "vortex-fastlanes", "vortex-fsst", "vortex-mask", "vortex-pco", "vortex-runend", - "vortex-scalar", "vortex-sequence", "vortex-sparse", "vortex-utils", @@ -10342,44 +10131,19 @@ dependencies = [ "rstest", "vortex-array", "vortex-buffer", - "vortex-dtype", "vortex-error", - "vortex-mask", - "vortex-scalar", "vortex-session", ] -[[package]] -name = "vortex-compute" -version = "0.1.0" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-schema", - "codspeed-divan-compat", - "half", - "itertools 0.14.0", - "multiversion", - "num-traits", - "paste", - "rstest", - "tracing", - "vortex-buffer", - "vortex-dtype", - "vortex-error", - "vortex-mask", - "vortex-vector", -] - [[package]] name = "vortex-cub" version = "0.1.0" dependencies = [ - "bindgen 0.72.1", + "bindgen", "libloading 0.8.9", "paste", + "vortex-array", "vortex-cuda-macros", - "vortex-dtype", ] [[package]] @@ -10387,48 +10151,35 @@ name = "vortex-cuda" version = "0.1.0" dependencies = [ "arc-swap", - "arrow-data", - "arrow-schema", "async-trait", + "bindgen", + "bytes", "codspeed-criterion-compat-walltime", "cudarc", "fastlanes", "futures", "kanal", - "paste", + "object_store", + "parking_lot", + "prost 0.14.3", "rstest", "tokio", "tracing", - "vortex-alp", + "vortex", "vortex-array", - "vortex-buffer", "vortex-cub", "vortex-cuda", "vortex-cuda-macros", - "vortex-datetime-parts", - "vortex-decimal-byte-parts", - "vortex-dtype", "vortex-error", - "vortex-fastlanes", - "vortex-io", - "vortex-mask", "vortex-nvcomp", - "vortex-runend", - "vortex-scalar", - "vortex-sequence", - "vortex-session", - "vortex-utils", - "vortex-zigzag", - "vortex-zstd", ] [[package]] name = "vortex-cuda-macros" version = "0.1.0" dependencies = [ - "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -10440,7 +10191,6 @@ dependencies = [ "arrow-schema", "async-fs", "cxx", - "cxx-build", "futures", "paste", "take_mut", @@ -10454,7 +10204,6 @@ dependencies = [ "anyhow", "arrow-schema", "async-trait", - "chrono", "datafusion 52.1.0", "datafusion-catalog 52.1.0", "datafusion-common 52.1.0", @@ -10471,7 +10220,6 @@ dependencies = [ "futures", "insta", "itertools 0.14.0", - "moka", "object_store", "rstest", "tempfile", @@ -10481,7 +10229,6 @@ dependencies = [ "url", "vortex", "vortex-utils", - "walkdir", ] [[package]] @@ -10493,10 +10240,8 @@ dependencies = [ "rstest", "vortex-array", "vortex-buffer", - "vortex-dtype", "vortex-error", "vortex-mask", - "vortex-scalar", "vortex-session", ] @@ -10509,43 +10254,9 @@ dependencies = [ "rstest", "vortex-array", "vortex-buffer", - "vortex-dtype", "vortex-error", "vortex-mask", - "vortex-scalar", - "vortex-session", -] - -[[package]] -name = "vortex-dtype" -version = "0.1.0" -dependencies = [ - "arbitrary", - "arcref", - "arrow-buffer", - "arrow-schema", - "cudarc", - "flatbuffers", - "half", - "insta", - "itertools 0.14.0", - "jiff", - "num-traits", - "num_enum", - "paste", - "primitive-types", - "prost 0.14.3", - "rstest", - "serde", - "serde_json", - "serde_test", - "static_assertions", - "vortex-buffer", - "vortex-error", - "vortex-flatbuffers", - "vortex-proto", "vortex-session", - "vortex-utils", ] [[package]] @@ -10553,17 +10264,17 @@ name = "vortex-duckdb" version = "0.1.0" dependencies = [ "anyhow", - "async-compat", "async-fs", - "bindgen 0.72.1", + "async-trait", + "bindgen", "bitvec", "cbindgen", "cc", "custom-labels", "futures", - "glob", "itertools 0.14.0", "jiff", + "kanal", "num-traits", "object_store", "once_cell", @@ -10592,10 +10303,9 @@ dependencies = [ "jiff", "object_store", "prost 0.14.3", - "pyo3", - "serde_json", + "serial_test", + "temp-env", "tokio", - "url", ] [[package]] @@ -10614,12 +10324,9 @@ dependencies = [ "vortex-alp", "vortex-array", "vortex-buffer", - "vortex-compute", - "vortex-dtype", "vortex-error", "vortex-fastlanes", "vortex-mask", - "vortex-scalar", "vortex-session", ] @@ -10653,6 +10360,7 @@ dependencies = [ "getrandom 0.3.4", "itertools 0.14.0", "kanal", + "moka", "object_store", "oneshot", "parking_lot", @@ -10665,21 +10373,18 @@ dependencies = [ "vortex-btrblocks", "vortex-buffer", "vortex-bytebool", - "vortex-cuda", - "vortex-cuda-macros", "vortex-datetime-parts", "vortex-decimal-byte-parts", - "vortex-dtype", "vortex-error", "vortex-fastlanes", "vortex-flatbuffers", "vortex-fsst", "vortex-io", "vortex-layout", + "vortex-mask", "vortex-metrics", "vortex-pco", "vortex-runend", - "vortex-scalar", "vortex-scan", "vortex-sequence", "vortex-session", @@ -10704,16 +10409,13 @@ version = "0.1.0" dependencies = [ "codspeed-divan-compat", "fsst-rs", - "num-traits", "prost 0.14.3", "rand 0.9.2", "rstest", "vortex-array", "vortex-buffer", - "vortex-dtype", "vortex-error", "vortex-mask", - "vortex-scalar", "vortex-session", ] @@ -10731,14 +10433,11 @@ dependencies = [ "vortex-btrblocks", "vortex-buffer", "vortex-cuda", - "vortex-dtype", "vortex-error", "vortex-file", "vortex-io", - "vortex-layout", "vortex-mask", "vortex-runend", - "vortex-scalar", "vortex-session", "vortex-utils", ] @@ -10748,7 +10447,6 @@ name = "vortex-io" version = "0.1.0" dependencies = [ "anyhow", - "async-compat", "async-fs", "async-stream", "async-trait", @@ -10756,6 +10454,7 @@ dependencies = [ "custom-labels", "futures", "getrandom 0.3.4", + "glob", "handle", "itertools 0.14.0", "kanal", @@ -10788,7 +10487,6 @@ dependencies = [ "tokio", "vortex-array", "vortex-buffer", - "vortex-dtype", "vortex-error", "vortex-flatbuffers", "vortex-session", @@ -10833,7 +10531,7 @@ dependencies = [ "pin-project-lite", "prost 0.14.3", "rstest", - "rustc-hash 2.1.1", + "rustc-hash", "termtree", "tokio", "tracing", @@ -10841,14 +10539,11 @@ dependencies = [ "vortex-array", "vortex-btrblocks", "vortex-buffer", - "vortex-decimal-byte-parts", - "vortex-dtype", "vortex-error", "vortex-flatbuffers", "vortex-io", "vortex-mask", "vortex-metrics", - "vortex-scalar", "vortex-sequence", "vortex-session", "vortex-utils", @@ -10873,14 +10568,13 @@ dependencies = [ "getrandom 0.3.4", "parking_lot", "sketches-ddsketch", - "vortex-session", ] [[package]] name = "vortex-nvcomp" version = "0.1.0" dependencies = [ - "bindgen 0.72.1", + "bindgen", "libloading 0.8.9", "liblzma", "reqwest", @@ -10892,18 +10586,13 @@ dependencies = [ name = "vortex-pco" version = "0.1.0" dependencies = [ - "codspeed-divan-compat", - "mimalloc", "pco", "prost 0.14.3", - "rand 0.9.2", "rstest", "vortex-array", "vortex-buffer", - "vortex-dtype", "vortex-error", "vortex-mask", - "vortex-scalar", "vortex-session", ] @@ -10932,10 +10621,8 @@ dependencies = [ "pyo3-log", "pyo3-object_store", "tokio", - "tracing", "url", "vortex", - "vortex-array", "vortex-tui", ] @@ -10945,6 +10632,7 @@ version = "0.1.0" dependencies = [ "arbitrary", "arrow-array", + "arrow-schema", "codspeed-divan-compat", "itertools 0.14.0", "num-traits", @@ -10953,32 +10641,9 @@ dependencies = [ "rstest", "vortex-array", "vortex-buffer", - "vortex-dtype", - "vortex-error", - "vortex-mask", - "vortex-scalar", - "vortex-session", -] - -[[package]] -name = "vortex-scalar" -version = "0.1.0" -dependencies = [ - "arbitrary", - "arrow-array", - "bytes", - "itertools 0.14.0", - "num-traits", - "paste", - "prost 0.14.3", - "rstest", - "vortex-buffer", - "vortex-dtype", "vortex-error", "vortex-mask", - "vortex-proto", "vortex-session", - "vortex-utils", ] [[package]] @@ -10997,7 +10662,6 @@ dependencies = [ "tracing", "vortex-array", "vortex-buffer", - "vortex-dtype", "vortex-error", "vortex-io", "vortex-layout", @@ -11016,14 +10680,9 @@ dependencies = [ "rstest", "vortex-array", "vortex-buffer", - "vortex-dtype", "vortex-error", - "vortex-file", - "vortex-layout", "vortex-mask", "vortex-proto", - "vortex-runend", - "vortex-scalar", "vortex-session", ] @@ -11048,24 +10707,37 @@ dependencies = [ "rstest", "vortex-array", "vortex-buffer", - "vortex-dtype", "vortex-error", "vortex-mask", - "vortex-scalar", "vortex-session", ] +[[package]] +name = "vortex-sqllogictest" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-trait", + "clap", + "datafusion 52.1.0", + "datafusion-sqllogictest", + "futures", + "indicatif", + "sqllogictest", + "thiserror 2.0.18", + "tokio", + "vortex", + "vortex-datafusion", + "vortex-duckdb", +] + [[package]] name = "vortex-test-e2e" version = "0.1.0" dependencies = [ "futures", - "rstest", "tokio", "vortex", - "vortex-array", - "vortex-error", - "vortex-file", ] [[package]] @@ -11116,19 +10788,6 @@ dependencies = [ "vortex-error", ] -[[package]] -name = "vortex-vector" -version = "0.1.0" -dependencies = [ - "num-traits", - "paste", - "static_assertions", - "vortex-buffer", - "vortex-dtype", - "vortex-error", - "vortex-mask", -] - [[package]] name = "vortex-zigzag" version = "0.1.0" @@ -11136,10 +10795,8 @@ dependencies = [ "rstest", "vortex-array", "vortex-buffer", - "vortex-dtype", "vortex-error", "vortex-mask", - "vortex-scalar", "vortex-session", "zigzag", ] @@ -11154,20 +10811,12 @@ dependencies = [ "rstest", "vortex-array", "vortex-buffer", - "vortex-dtype", "vortex-error", "vortex-mask", - "vortex-scalar", "vortex-session", "zstd", ] -[[package]] -name = "vsimd" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" - [[package]] name = "vtparse" version = "0.6.2" @@ -11211,11 +10860,20 @@ dependencies = [ "wit-bindgen", ] +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen", +] + [[package]] name = "wasm-bindgen" -version = "0.2.108" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566" +checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" dependencies = [ "cfg-if", "once_cell", @@ -11226,9 +10884,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.58" +version = "0.4.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70a6e77fd0ae8029c9ea0063f87c46fde723e7d887703d74ad2616d792e51e6f" +checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8" dependencies = [ "cfg-if", "futures-util", @@ -11240,9 +10898,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.108" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608" +checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -11250,26 +10908,48 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.108" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55" +checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3" dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.108" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12" +checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16" dependencies = [ "unicode-ident", ] +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + [[package]] name = "wasm-streams" version = "0.4.2" @@ -11283,11 +10963,23 @@ dependencies = [ "web-sys", ] +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags 2.11.0", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + [[package]] name = "web-sys" -version = "0.3.85" +version = "0.3.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "312e32e551d92129218ea9a2452120f4aabc03529ef03e4d0d82fb2780608598" +checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9" dependencies = [ "js-sys", "wasm-bindgen", @@ -11305,9 +10997,9 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12bed680863276c63889429bfd6cab3b99943659923822de1c8a39c49e4d722c" +checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed" dependencies = [ "rustls-pki-types", ] @@ -11391,7 +11083,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3fabb953106c3c8eea8306e4393700d7657561cb43122571b172bbfb7c7ba1d" dependencies = [ "env_home", - "rustix 1.1.3", + "rustix 1.1.4", "winsafe", ] @@ -11417,7 +11109,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -11493,7 +11185,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -11504,7 +11196,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -11836,6 +11528,88 @@ name = "wit-bindgen" version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn 2.0.117", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn 2.0.117", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags 2.11.0", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] [[package]] name = "wkb" @@ -11884,15 +11658,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156" dependencies = [ "libc", - "rustix 1.1.3", + "rustix 1.1.4", ] -[[package]] -name = "xmlparser" -version = "0.13.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" - [[package]] name = "xshell" version = "0.2.7" @@ -11913,8 +11681,10 @@ name = "xtask" version = "0.1.0" dependencies = [ "anyhow", + "cargo_metadata", "clap", "prost-build", + "public-api", "xshell", ] @@ -11949,28 +11719,28 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", "synstructure", ] [[package]] name = "zerocopy" -version = "0.8.37" +version = "0.8.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7456cf00f0685ad319c5b1693f291a650eaf345e941d082fc4e03df8a03996ac" +checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.37" +version = "0.8.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1328722bbf2115db7e19d69ebcc15e795719e2d66b60827c6a69a117365e37a0" +checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -11990,7 +11760,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", "synstructure", ] @@ -12011,7 +11781,7 @@ checksum = "85a5b4158499876c763cb03bc4e49185d3cccbabb15b33c627f7884f43db852e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -12044,7 +11814,7 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.117", ] [[package]] @@ -12085,15 +11855,15 @@ dependencies = [ [[package]] name = "zlib-rs" -version = "0.5.5" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40990edd51aae2c2b6907af74ffb635029d5788228222c4bb811e9351c0caad3" +checksum = "c745c48e1007337ed136dc99df34128b9faa6ed542d80a1c673cf55a6d7236c8" [[package]] name = "zmij" -version = "1.0.19" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ff05f8caa9038894637571ae6b9e29466c1f4f829d26c9b28f869a29cbe3445" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" [[package]] name = "zopfli" diff --git a/benchmarks/clickhouse-bench/Cargo.toml b/benchmarks/clickhouse-bench/Cargo.toml index 789cce5a69f..7b26ae12053 100644 --- a/benchmarks/clickhouse-bench/Cargo.toml +++ b/benchmarks/clickhouse-bench/Cargo.toml @@ -18,8 +18,5 @@ tokio = { workspace = true, features = ["full"] } tracing = { workspace = true } vortex-bench = { workspace = true } -[build-dependencies] -reqwest = { workspace = true, features = ["blocking"] } - [lints] workspace = true diff --git a/benchmarks/clickhouse-bench/build.rs b/benchmarks/clickhouse-bench/build.rs index 917d248f9d8..7ef98c8e48d 100644 --- a/benchmarks/clickhouse-bench/build.rs +++ b/benchmarks/clickhouse-bench/build.rs @@ -1,117 +1,18 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -//! Build script that downloads a full ClickHouse binary (with Parquet support) -//! into the target directory. The binary path is exported via -//! `cargo:rustc-env=CLICKHOUSE_BINARY=...` so that `lib.rs` can locate it at runtime -//! via `env!("CLICKHOUSE_BINARY")` without any user-installed dependency. -//! -//! The approach mirrors `vortex-duckdb/build.rs` which auto-downloads a DuckDB dylib. +//! Build script that exports the ClickHouse binary path. //! //! Resolution order: -//! 1. `CLICKHOUSE_LOCAL` env var — use as-is (skip download). -//! 2. Download from `builds.clickhouse.com` (official master builds) into -//! `target/clickhouse-local/clickhouse`. +//! 1. `CLICKHOUSE_BINARY` env var — use as-is. +//! 2. Falls back to `"clickhouse"` (i.e., resolve from `$PATH` at runtime). //! -//! We use the official master builds because macOS binaries are only available -//! from `builds.clickhouse.com`, not from the tgz/stable package repos. - -#![allow(clippy::unwrap_used)] -#![allow(clippy::expect_used)] -#![allow(clippy::panic)] - -use std::env; -use std::fs; -use std::os::unix::fs::PermissionsExt; -use std::path::PathBuf; - -/// Returns the download URL for the clickhouse binary based on the compilation target. -fn download_url() -> Result> { - let target = env::var("TARGET")?; - let dir = match target.as_str() { - "x86_64-apple-darwin" => "macos", - "aarch64-apple-darwin" => "macos-aarch64", - "x86_64-unknown-linux-gnu" => "amd64", - "aarch64-unknown-linux-gnu" => "aarch64", - other => return Err(format!("Unsupported target for clickhouse download: {other}").into()), - }; - Ok(format!( - "https://builds.clickhouse.com/master/{dir}/clickhouse" - )) -} - -/// Get the base target directory for ClickHouse artifacts. -fn target_dir() -> PathBuf { - let manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()); - manifest_dir.parent().unwrap().parent().unwrap().join("target") -} +//! Users must install ClickHouse themselves for local runs. +//! In CI, it is installed via the workflow before the benchmark step. fn main() { - println!("cargo:rerun-if-env-changed=CLICKHOUSE_LOCAL"); - - // If the user explicitly provides a binary path, just export it. - if let Ok(path) = env::var("CLICKHOUSE_LOCAL") { - println!("cargo:rustc-env=CLICKHOUSE_BINARY={path}"); - return; - } - - let ch_dir = target_dir().join("clickhouse-local"); - let binary_path = ch_dir.join("clickhouse"); - - // If the binary already exists (and is executable), skip download. - if binary_path.exists() { - println!("cargo:rustc-env=CLICKHOUSE_BINARY={}", binary_path.display()); - return; - } - - // Download the full ClickHouse binary. - let url = download_url().expect("Failed to determine clickhouse download URL"); - println!("cargo:warning=Downloading ClickHouse binary from {url} (this may take a minute)..."); - - fs::create_dir_all(&ch_dir).expect("Failed to create clickhouse-local directory"); - - let timeout_secs: u64 = env::var("CARGO_HTTP_TIMEOUT") - .or_else(|_| env::var("HTTP_TIMEOUT")) - .ok() - .and_then(|s| s.parse().ok()) - .unwrap_or(300); // 5 minute timeout for ~160MB download - - let client = reqwest::blocking::Client::builder() - .timeout(std::time::Duration::from_secs(timeout_secs)) - .build() - .expect("Failed to create HTTP client"); - - let response = client - .get(&url) - .send() - .expect("Failed to download ClickHouse binary"); - - assert!( - response.status().is_success(), - "Failed to download ClickHouse binary: HTTP {}", - response.status() - ); - - let bytes = response - .bytes() - .expect("Failed to read ClickHouse binary response body"); - - // Write to a temporary file first, then rename (atomic on same filesystem). - let tmp_path = ch_dir.join("clickhouse.tmp"); - fs::write(&tmp_path, &bytes).expect("Failed to write ClickHouse binary"); - - // Make it executable (0o755). - let mut perms = fs::metadata(&tmp_path) - .expect("Failed to read tmp binary metadata") - .permissions(); - perms.set_mode(0o755); - fs::set_permissions(&tmp_path, perms).expect("Failed to set executable permissions"); - - fs::rename(&tmp_path, &binary_path).expect("Failed to rename ClickHouse binary into place"); + println!("cargo:rerun-if-env-changed=CLICKHOUSE_BINARY"); - println!("cargo:rustc-env=CLICKHOUSE_BINARY={}", binary_path.display()); - println!( - "cargo:warning=ClickHouse binary downloaded to {}", - binary_path.display() - ); + let binary = std::env::var("CLICKHOUSE_BINARY").unwrap_or_else(|_| "clickhouse".to_string()); + println!("cargo:rustc-env=CLICKHOUSE_BINARY={binary}"); } diff --git a/benchmarks/clickhouse-bench/src/lib.rs b/benchmarks/clickhouse-bench/src/lib.rs index 6f622b9c3a1..960675a251f 100644 --- a/benchmarks/clickhouse-bench/src/lib.rs +++ b/benchmarks/clickhouse-bench/src/lib.rs @@ -6,50 +6,13 @@ //! Uses `clickhouse-local` via `std::process::Command` to execute SQL queries //! against Parquet files on disk. //! -//! The ClickHouse binary is **automatically downloaded** at build time by `build.rs` -//! (similar to how `vortex-duckdb/build.rs` downloads the DuckDB dynamic library). -//! No manual installation is required. +//! The ClickHouse binary is resolved at build time via `build.rs`: +//! 1. `CLICKHOUSE_BINARY` env var — use the specified path. +//! 2. Falls back to `"clickhouse"` — resolved from `$PATH` at runtime. //! -//! ## Scan API Evaluation for ClickHouse Integration -//! -//! Per @gatesn's request in Discussion #6425, we evaluated whether the Vortex Scan API -//! (`vortex-scan/src/api.rs`) can support a good ClickHouse integration. -//! -//! ### Mapping -//! -//! The Scan API's four-layer abstraction maps naturally to ClickHouse: -//! -//! | Scan API | ClickHouse mapping | -//! |---|---| -//! | `DataSource` | Table metadata + connection config (`Send + Sync`, shareable) | -//! | `ScanRequest.projection` | `SELECT` column/expression pushdown (needs `Expression` → SQL converter) | -//! | `ScanRequest.filter` | `WHERE` clause pushdown (similar to `vortex-datafusion/convert/exprs.rs`) | -//! | `ScanRequest.limit` | `LIMIT N` pushdown (trivial) | -//! | `DataSourceScan` | Query planning + partition discovery (`system.parts` or file-level) | -//! | `Split` | Per-partition query execution unit | -//! | `Split::execute()` | Executes partition query, streams results as `SendableArrayStream` | -//! -//! ### Potential API Gaps -//! -//! 1. **No engine capability negotiation** — `DataSource` cannot declare which expression types -//! it supports for pushdown. Suggest adding `capabilities()` method. -//! 2. **`Split::execute()` is sync** — ClickHouse queries are inherently async (network I/O). -//! The pattern used by `LayoutReaderDataSource` (pre-compute `BoxFuture` in `scan()`) works -//! but should be documented as the recommended approach. -//! 3. **No column statistics API** — only `row_count_estimate()` exists. ClickHouse has rich -//! column stats (min/max/NDV) that could enable better query planning. -//! 4. **No transaction/snapshot semantics** — could lead to inconsistent reads across splits -//! on ClickHouse replicas. -//! -//! ### Conclusion -//! -//! The Scan API is a reasonable fit. None of the gaps are blockers. The recommended integration -//! order is: -//! 1. This PR: ClickBench baseline with `clickhouse-local` CLI (performance reference) -//! 2. `vortex-clickhouse` crate with type conversion (DType ↔ ClickHouse types) -//! 3. `ClickHouseDataSource` implementing `DataSource` trait (basic scan, no pushdown) -//! 4. Filter pushdown (`Expression` → ClickHouse WHERE clause) -//! 5. Projection pushdown and performance optimization +//! For local runs, install ClickHouse manually (e.g., `brew install clickhouse` +//! or download from ). +//! In CI, it is installed by the workflow before the benchmark step. use std::io::Write; use std::path::PathBuf; @@ -65,6 +28,9 @@ use vortex_bench::Benchmark; use vortex_bench::Format; /// Path to the ClickHouse binary, set by build.rs at compile time. +/// +/// This is either the value of the `CLICKHOUSE_BINARY` env var at build time, +/// or `"clickhouse"` (resolved from `$PATH` at runtime). const CLICKHOUSE_BINARY: &str = env!("CLICKHOUSE_BINARY"); /// A client that wraps `clickhouse-local` for running SQL benchmarks. @@ -77,18 +43,19 @@ pub struct ClickHouseClient { impl ClickHouseClient { /// Create a new client. Only Parquet format is supported. + /// + /// The ClickHouse binary is resolved from (in order): + /// 1. `CLICKHOUSE_BINARY` env var at build time + /// 2. `"clickhouse"` on `$PATH` pub fn new(benchmark: &dyn Benchmark, format: Format) -> Result { if format != Format::Parquet { anyhow::bail!("clickhouse-bench only supports Parquet format, got {format}"); } let binary = PathBuf::from(CLICKHOUSE_BINARY); - anyhow::ensure!( - binary.exists(), - "ClickHouse binary not found at '{}'. \ - This should have been downloaded by build.rs. Try `cargo clean -p clickhouse-bench`.", - binary.display() - ); + + // Verify the binary is usable (either absolute path exists, or resolvable via PATH). + Self::verify_binary(&binary)?; tracing::info!(binary = %binary.display(), "Using clickhouse-local"); @@ -100,6 +67,44 @@ impl ClickHouseClient { Ok(client) } + /// Check that the ClickHouse binary is available. + /// + /// For absolute paths, checks that the file exists on disk. + /// For bare names (e.g., `"clickhouse"`), tries to resolve via `$PATH` using `which`. + fn verify_binary(binary: &PathBuf) -> Result<()> { + if binary.is_absolute() { + anyhow::ensure!( + binary.exists(), + "ClickHouse binary not found at '{path}'. \ + Set CLICKHOUSE_BINARY env var to the correct path, or install ClickHouse \ + and ensure it is on $PATH.", + path = binary.display() + ); + } else { + // Try to find the binary on $PATH via `which`. + let output = Command::new("which") + .arg(binary.as_os_str()) + .output() + .context("Failed to run `which` to locate clickhouse binary")?; + + anyhow::ensure!( + output.status.success(), + "ClickHouse binary '{name}' not found on $PATH. \ + Install ClickHouse (https://clickhouse.com/docs/en/install) or set \ + CLICKHOUSE_BINARY env var to an absolute path before building.", + name = binary.display() + ); + + let resolved = String::from_utf8_lossy(&output.stdout); + tracing::debug!( + resolved = resolved.trim(), + "Resolved clickhouse binary from PATH" + ); + } + + Ok(()) + } + /// Generate `CREATE VIEW ... AS SELECT * FROM file(...)` statements. /// /// We use a VIEW over the `file()` table function rather than `CREATE TABLE ... ENGINE = File()` @@ -172,8 +177,7 @@ impl ClickHouseClient { let time_instant = Instant::now(); - // The downloaded binary is the multi-tool `clickhouse` binary, - // so we always invoke it as `clickhouse local`. + // The `clickhouse` binary is a multi-tool; invoke it as `clickhouse local`. let mut child = Command::new(&self.binary) .args(["local", "--format", "TabSeparated"]) .stdin(Stdio::piped()) From 1bfefb08bb26680007f5bfb4e531b9f92074f8b0 Mon Sep 17 00:00:00 2001 From: fastio Date: Wed, 4 Mar 2026 10:38:59 +0800 Subject: [PATCH 4/7] bench(clickhouse): fix review issues and pin LTS version - Pass subcommand arg to clickhouse-bench in run-sql-bench.sh for consistency - Use BenchmarkArg + create_benchmark() in main.rs like other engines - Replace `which` with `clickhouse local --version` for binary verification - Pin ClickHouse to LTS release v25.8.18.1 from GitHub Releases Signed-off-by: fastio --- .github/scripts/run-sql-bench.sh | 2 +- .github/workflows/sql-benchmarks.yml | 9 +++-- benchmarks/clickhouse-bench/src/lib.rs | 45 +++++++++++++------------ benchmarks/clickhouse-bench/src/main.rs | 19 +++++------ 4 files changed, 40 insertions(+), 35 deletions(-) diff --git a/.github/scripts/run-sql-bench.sh b/.github/scripts/run-sql-bench.sh index 73d2a26d962..9fd91b0dd7f 100755 --- a/.github/scripts/run-sql-bench.sh +++ b/.github/scripts/run-sql-bench.sh @@ -132,7 +132,7 @@ fi # ClickHouse-bench only runs for local benchmarks (clickhouse-local reads local files). if ! $is_remote && [[ "$has_clickhouse" == "true" ]] && [[ -f "target/release_debug/clickhouse-bench" ]]; then # shellcheck disable=SC2086 - target/release_debug/clickhouse-bench \ + target/release_debug/clickhouse-bench "$subcommand" \ -d gh-json \ $opts \ -o ch-results.json diff --git a/.github/workflows/sql-benchmarks.yml b/.github/workflows/sql-benchmarks.yml index 31175904275..b612f195a6c 100644 --- a/.github/workflows/sql-benchmarks.yml +++ b/.github/workflows/sql-benchmarks.yml @@ -129,10 +129,13 @@ jobs: - name: Install ClickHouse if: contains(matrix.targets, 'clickhouse:') + env: + CLICKHOUSE_VERSION: "25.8.18.1" run: | - curl https://clickhouse.com/ | sh - sudo ./clickhouse install - echo "CLICKHOUSE_BINARY=$(which clickhouse)" >> $GITHUB_ENV + wget -qO- "https://github.com/ClickHouse/ClickHouse/releases/download/v${CLICKHOUSE_VERSION}-lts/clickhouse-common-static-${CLICKHOUSE_VERSION}-amd64.tgz" | tar xz + cp clickhouse-common-static-${CLICKHOUSE_VERSION}/usr/bin/clickhouse . + chmod +x clickhouse + echo "CLICKHOUSE_BINARY=$PWD/clickhouse" >> $GITHUB_ENV - name: Build binaries shell: bash diff --git a/benchmarks/clickhouse-bench/src/lib.rs b/benchmarks/clickhouse-bench/src/lib.rs index 960675a251f..9327776578b 100644 --- a/benchmarks/clickhouse-bench/src/lib.rs +++ b/benchmarks/clickhouse-bench/src/lib.rs @@ -70,7 +70,7 @@ impl ClickHouseClient { /// Check that the ClickHouse binary is available. /// /// For absolute paths, checks that the file exists on disk. - /// For bare names (e.g., `"clickhouse"`), tries to resolve via `$PATH` using `which`. + /// For bare names (e.g., `"clickhouse"`), tries to invoke it to verify it's resolvable. fn verify_binary(binary: &PathBuf) -> Result<()> { if binary.is_absolute() { anyhow::ensure!( @@ -80,28 +80,31 @@ impl ClickHouseClient { and ensure it is on $PATH.", path = binary.display() ); - } else { - // Try to find the binary on $PATH via `which`. - let output = Command::new("which") - .arg(binary.as_os_str()) - .output() - .context("Failed to run `which` to locate clickhouse binary")?; - - anyhow::ensure!( - output.status.success(), - "ClickHouse binary '{name}' not found on $PATH. \ - Install ClickHouse (https://clickhouse.com/docs/en/install) or set \ - CLICKHOUSE_BINARY env var to an absolute path before building.", - name = binary.display() - ); - - let resolved = String::from_utf8_lossy(&output.stdout); - tracing::debug!( - resolved = resolved.trim(), - "Resolved clickhouse binary from PATH" - ); } + // Verify the binary is actually usable by running `clickhouse local --version`. + let output = Command::new(binary.as_os_str()) + .args(["local", "--version"]) + .output() + .with_context(|| { + format!( + "ClickHouse binary '{name}' not found on $PATH. \ + Install ClickHouse (https://clickhouse.com/docs/en/install) or set \ + CLICKHOUSE_BINARY env var to an absolute path before building.", + name = binary.display() + ) + })?; + + anyhow::ensure!( + output.status.success(), + "ClickHouse binary at '{name}' failed to run: {stderr}", + name = binary.display(), + stderr = String::from_utf8_lossy(&output.stderr) + ); + + let version = String::from_utf8_lossy(&output.stdout); + tracing::debug!(version = version.trim(), "Verified clickhouse binary"); + Ok(()) } diff --git a/benchmarks/clickhouse-bench/src/main.rs b/benchmarks/clickhouse-bench/src/main.rs index 03e8db27fd6..bd8e7a7666d 100644 --- a/benchmarks/clickhouse-bench/src/main.rs +++ b/benchmarks/clickhouse-bench/src/main.rs @@ -6,13 +6,12 @@ use std::path::PathBuf; use clap::Parser; use clickhouse_bench::ClickHouseClient; use tokio::runtime::Runtime; -use vortex_bench::Benchmark; +use vortex_bench::BenchmarkArg; use vortex_bench::Engine; use vortex_bench::Format; use vortex_bench::Opt; use vortex_bench::Opts; -use vortex_bench::clickbench::ClickBenchBenchmark; -use vortex_bench::clickbench::Flavor; +use vortex_bench::create_benchmark; use vortex_bench::create_output_writer; use vortex_bench::display::DisplayFormat; use vortex_bench::runner::SqlBenchmarkRunner; @@ -21,11 +20,14 @@ use vortex_bench::setup_logging_and_tracing; /// ClickHouse (clickhouse-local) benchmark runner. /// -/// Runs ClickBench queries against Parquet data using clickhouse-local as a performance baseline. +/// Runs queries against Parquet data using clickhouse-local as a performance baseline. /// This allows comparing ClickHouse's native Parquet reading performance against other engines /// (DuckDB, DataFusion) on the same hardware and dataset. #[derive(Parser)] struct Args { + #[arg(value_enum)] + benchmark: BenchmarkArg, + #[arg(short, long, default_value_t = 5)] iterations: usize, @@ -63,10 +65,7 @@ fn main() -> anyhow::Result<()> { setup_logging_and_tracing(args.verbose, args.tracing)?; - let flavor = opts.get_as::("flavor").unwrap_or_default(); - let remote_data_dir = opts.get_as::("remote-data-dir"); - let benchmark = - ClickBenchBenchmark::new(flavor, None, remote_data_dir)?.with_engine(Engine::ClickHouse); + let benchmark = create_benchmark(args.benchmark, &opts)?; let filtered_queries = filter_queries( benchmark.queries()?, @@ -83,7 +82,7 @@ fn main() -> anyhow::Result<()> { let formats = vec![Format::Parquet]; let mut runner = SqlBenchmarkRunner::new( - &benchmark, + benchmark.as_ref(), Engine::ClickHouse, formats, args.track_memory, @@ -93,7 +92,7 @@ fn main() -> anyhow::Result<()> { runner.run_all( &filtered_queries, args.iterations, - |format| ClickHouseClient::new(&benchmark, format), + |format| ClickHouseClient::new(benchmark.as_ref(), format), |ctx, _query_idx, _format, query| ctx.execute_query(query), )?; From 7fffcf56f713fb900fe36e0ab7e78c93432dac4c Mon Sep 17 00:00:00 2001 From: fastio Date: Tue, 10 Mar 2026 10:59:15 +0800 Subject: [PATCH 5/7] Remove the and helpers that were only needed for ClickHouse unquoted identifier handling. Queries are now returned as-is without dialect-specific transformation. Signed-off-by: fastio --- vortex-bench/src/clickbench/benchmark.rs | 85 +----------------------- 1 file changed, 1 insertion(+), 84 deletions(-) diff --git a/vortex-bench/src/clickbench/benchmark.rs b/vortex-bench/src/clickbench/benchmark.rs index cece28c6b6b..a0dcb4ea44f 100644 --- a/vortex-bench/src/clickbench/benchmark.rs +++ b/vortex-bench/src/clickbench/benchmark.rs @@ -10,7 +10,6 @@ use url::Url; use crate::Benchmark; use crate::BenchmarkDataset; -use crate::Engine; use crate::IdempotentPath; use crate::TableSpec; use crate::clickbench::*; @@ -20,8 +19,6 @@ pub struct ClickBenchBenchmark { pub flavor: Flavor, pub queries_file: Option, pub data_url: Url, - /// Override the engine to select engine-specific query files. - pub engine: Option, } impl ClickBenchBenchmark { @@ -35,16 +32,9 @@ impl ClickBenchBenchmark { flavor, queries_file, data_url: url, - engine: None, }) } - /// Set the engine to select engine-specific query files. - pub fn with_engine(mut self, engine: Engine) -> Self { - self.engine = Some(engine); - self - } - /// Returns the path to the queries file. fn queries_file_path(&self) -> PathBuf { if let Some(file) = &self.queries_file { @@ -54,21 +44,6 @@ impl ClickBenchBenchmark { manifest_dir.join("clickbench_queries.sql") } - /// Returns true if the engine requires unquoted column names. - fn uses_unquoted_identifiers(&self) -> bool { - matches!(self.engine, Some(Engine::ClickHouse)) - } - - /// Strips double quotes only from simple SQL identifiers for engines like - /// ClickHouse that don't require quoted column names. - fn normalize_query(&self, query: &str) -> String { - if !self.uses_unquoted_identifiers() { - return query.to_string(); - } - - strip_simple_identifier_quotes(query) - } - fn create_data_url(remote_data_dir: &Option, flavor: Flavor) -> Result { match remote_data_dir { None => { @@ -96,64 +71,6 @@ impl ClickBenchBenchmark { } } -fn strip_simple_identifier_quotes(query: &str) -> String { - let bytes = query.as_bytes(); - let mut out = String::with_capacity(query.len()); - let mut i = 0; - - while i < query.len() { - let rel = match query[i..].find('"') { - Some(pos) => pos, - None => { - out.push_str(&query[i..]); - break; - } - }; - - let start = i + rel; - out.push_str(&query[i..start]); - - let mut end = start + 1; - while end < bytes.len() { - if bytes[end] == b'"' { - if end + 1 < bytes.len() && bytes[end + 1] == b'"' { - end += 2; - } else { - break; - } - } else { - end += 1; - } - } - - if end >= bytes.len() { - out.push_str(&query[start..]); - break; - } - - let inner = &query[start + 1..end]; - if is_simple_identifier(inner) { - out.push_str(inner); - } else { - out.push_str(&query[start..=end]); - } - - i = end + 1; - } - - out -} - -fn is_simple_identifier(s: &str) -> bool { - let mut chars = s.chars(); - let Some(first) = chars.next() else { - return false; - }; - - (first.is_ascii_alphabetic() || first == '_') - && chars.all(|c| c.is_ascii_alphanumeric() || c == '_') -} - #[async_trait::async_trait] impl Benchmark for ClickBenchBenchmark { fn queries(&self) -> Result> { @@ -163,7 +80,7 @@ impl Benchmark for ClickBenchBenchmark { .split(';') .map(|s| s.trim()) .filter(|s| !s.is_empty()) - .map(|s| self.normalize_query(s)) + .map(|s| s.to_string()) .enumerate() .collect()) } From 5aa201a2f7fcfe036374fc136e7c1e4aad1653e4 Mon Sep 17 00:00:00 2001 From: fastio Date: Tue, 10 Mar 2026 11:22:19 +0800 Subject: [PATCH 6/7] fix build errors Signed-off-by: fastio --- benchmarks/clickhouse-bench/src/main.rs | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/benchmarks/clickhouse-bench/src/main.rs b/benchmarks/clickhouse-bench/src/main.rs index bd8e7a7666d..d705b23eb22 100644 --- a/benchmarks/clickhouse-bench/src/main.rs +++ b/benchmarks/clickhouse-bench/src/main.rs @@ -14,6 +14,8 @@ use vortex_bench::Opts; use vortex_bench::create_benchmark; use vortex_bench::create_output_writer; use vortex_bench::display::DisplayFormat; +use vortex_bench::runner::BenchmarkMode; +use vortex_bench::runner::BenchmarkQueryResult; use vortex_bench::runner::SqlBenchmarkRunner; use vortex_bench::runner::filter_queries; use vortex_bench::setup_logging_and_tracing; @@ -59,6 +61,20 @@ struct Args { options: Vec, } +struct ClickHouseQueryResult { + row_count: usize, +} + +impl BenchmarkQueryResult for ClickHouseQueryResult { + fn row_count(&self) -> usize { + self.row_count + } + + fn display(self) -> String { + format!("{} rows", self.row_count) + } +} + fn main() -> anyhow::Result<()> { let args = Args::parse(); let opts = Opts::from(args.options); @@ -91,9 +107,14 @@ fn main() -> anyhow::Result<()> { runner.run_all( &filtered_queries, - args.iterations, + BenchmarkMode::Run { + iterations: args.iterations, + }, |format| ClickHouseClient::new(benchmark.as_ref(), format), - |ctx, _query_idx, _format, query| ctx.execute_query(query), + |ctx, _query_idx, _format, query| { + let (row_count, duration) = ctx.execute_query(query)?; + Ok((duration, ClickHouseQueryResult { row_count })) + }, )?; let benchmark_id = format!("clickhouse-{}", benchmark.dataset_name()); From 14df8365e1b365213f78c3f2becbe1e4c8143292 Mon Sep 17 00:00:00 2001 From: fastio Date: Wed, 11 Mar 2026 11:39:23 +0800 Subject: [PATCH 7/7] update Cargo.lock Signed-off-by: fastio --- Cargo.lock | 50 ++------------------------------------------------ 1 file changed, 2 insertions(+), 48 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fc71dddbea7..82d021bd032 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3143,52 +3143,6 @@ dependencies = [ "sqlparser", ] -[[package]] -name = "datafusion-sqllogictest" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccb859e97759dcbff66b484bdf4f251f9a76784d3dd7883c124de57510b1e1c2" -dependencies = [ - "arrow", - "async-trait", - "bigdecimal", - "clap", - "datafusion 52.2.0", - "datafusion-spark", - "datafusion-substrait", - "futures", - "half", - "indicatif", - "itertools 0.14.0", - "log", - "object_store", - "sqllogictest", - "sqlparser", - "tempfile", - "thiserror 2.0.18", - "tokio", -] - -[[package]] -name = "datafusion-substrait" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "199790fd96e852997b30da4ff11109378c944841757d93875ea85fc69587ec91" -dependencies = [ - "async-recursion", - "async-trait", - "chrono", - "datafusion 52.2.0", - "half", - "itertools 0.14.0", - "object_store", - "pbjson-types", - "prost 0.14.3", - "substrait", - "tokio", - "url", -] - [[package]] name = "datafusion-sqllogictest" version = "52.1.0" @@ -3199,7 +3153,7 @@ dependencies = [ "async-trait", "bigdecimal", "clap", - "datafusion 52.1.0", + "datafusion 52.2.0", "datafusion-spark", "datafusion-substrait", "futures", @@ -3224,7 +3178,7 @@ dependencies = [ "async-recursion", "async-trait", "chrono", - "datafusion 52.1.0", + "datafusion 52.2.0", "half", "itertools 0.14.0", "object_store",