Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions .github/scripts/run-sql-bench.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,21 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright the Vortex contributors
#
# Runs SQL benchmarks (datafusion-bench, duckdb-bench, lance-bench) for the given targets.
# This script is used by the sql-benchmarks.yml workflow.
# Runs SQL benchmarks (datafusion-bench, duckdb-bench, lance-bench, clickhouse-bench)
# for the given targets. This script is used by the sql-benchmarks.yml workflow.
#
# Usage:
# run-sql-bench.sh <subcommand> <targets> [options]
#
# Arguments:
# subcommand The benchmark subcommand (e.g., tpch, clickbench, tpcds)
# targets Comma-separated list of engine:format pairs
# (e.g., "datafusion:parquet,datafusion:vortex,duckdb:parquet")
# (e.g., "datafusion:parquet,datafusion:vortex,duckdb:parquet,clickhouse:parquet")
#
# Options:
# --scale-factor <sf> Scale factor for the benchmark (e.g., 1.0, 10.0)
# --remote-storage <url> Remote storage URL (e.g., s3://bucket/path/)
# If provided, runs in remote mode (no lance support).
# If provided, runs in remote mode (no lance/clickhouse support).
# --benchmark-id <id> Benchmark ID for error messages (e.g., tpch-s3)

set -Eeu -o pipefail
Expand Down Expand Up @@ -78,6 +78,7 @@ fi
df_formats=$(echo "$targets" | tr ',' '\n' | (grep '^datafusion:' | grep -v ':lance$' || true) | sed 's/datafusion://' | tr '\n' ',' | sed 's/,$//')
ddb_formats=$(echo "$targets" | tr ',' '\n' | (grep '^duckdb:' || true) | sed 's/duckdb://' | tr '\n' ',' | sed 's/,$//')
has_lance=$(echo "$targets" | grep -q 'datafusion:lance' && echo "true" || echo "false")
has_clickhouse=$(echo "$targets" | grep -q '^clickhouse:' && echo "true" || echo "false")

# Build options string.
opts=""
Expand Down Expand Up @@ -127,3 +128,14 @@ if ! $is_remote && [[ "$has_lance" == "true" ]] && [[ -f "target/release_debug/l

cat lance-results.json >> results.json
fi

# ClickHouse-bench only runs for local benchmarks (clickhouse-local reads local files).
if ! $is_remote && [[ "$has_clickhouse" == "true" ]] && [[ -f "target/release_debug/clickhouse-bench" ]]; then
# shellcheck disable=SC2086
target/release_debug/clickhouse-bench "$subcommand" \
-d gh-json \
$opts \
-o ch-results.json

cat ch-results.json >> results.json
fi
2 changes: 1 addition & 1 deletion .github/workflows/bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ jobs:
"id": "clickbench-nvme",
"subcommand": "clickbench",
"name": "Clickbench on NVME",
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,datafusion:lance,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact,duckdb:duckdb",
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,datafusion:lance,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact,duckdb:duckdb,clickhouse:parquet",
"build_lance": true
},
{
Expand Down
15 changes: 14 additions & 1 deletion .github/workflows/sql-benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ on:
"id": "clickbench-nvme",
"subcommand": "clickbench",
"name": "Clickbench on NVME",
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact,duckdb:duckdb"
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact,duckdb:duckdb,clickhouse:parquet"
},
{
"id": "tpch-nvme",
Expand Down Expand Up @@ -130,6 +130,16 @@ jobs:

- uses: ./.github/actions/system-info

- name: Install ClickHouse
if: contains(matrix.targets, 'clickhouse:')
env:
CLICKHOUSE_VERSION: "25.8.18.1"
run: |
wget -qO- "https://github.com/ClickHouse/ClickHouse/releases/download/v${CLICKHOUSE_VERSION}-lts/clickhouse-common-static-${CLICKHOUSE_VERSION}-amd64.tgz" | tar xz
cp clickhouse-common-static-${CLICKHOUSE_VERSION}/usr/bin/clickhouse .
chmod +x clickhouse
echo "CLICKHOUSE_BINARY=$PWD/clickhouse" >> $GITHUB_ENV

- name: Build binaries
shell: bash
env:
Expand All @@ -139,6 +149,9 @@ jobs:
if [ "${{ matrix.build_lance }}" = "true" ]; then
packages="$packages --bin lance-bench"
fi
if echo "${{ matrix.targets }}" | grep -q 'clickhouse:'; then
packages="$packages --bin clickhouse-bench"
fi
cargo build $packages --profile release_debug

- name: Generate data
Expand Down
76 changes: 67 additions & 9 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ members = [
"encodings/zstd",
"encodings/bytebool",
# Benchmarks
"benchmarks/clickhouse-bench",
"benchmarks/lance-bench",
"benchmarks/compress-bench",
"benchmarks/datafusion-bench",
Expand Down
22 changes: 22 additions & 0 deletions benchmarks/clickhouse-bench/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
[package]
name = "clickhouse-bench"
description = "ClickHouse (clickhouse-local) benchmark runner for Vortex"
authors.workspace = true
edition.workspace = true
homepage.workspace = true
license.workspace = true
readme.workspace = true
repository.workspace = true
rust-version.workspace = true
version.workspace = true
publish = false

[dependencies]
anyhow = { workspace = true }
clap = { workspace = true, features = ["derive"] }
tokio = { workspace = true, features = ["full"] }
tracing = { workspace = true }
vortex-bench = { workspace = true }

[lints]
workspace = true
18 changes: 18 additions & 0 deletions benchmarks/clickhouse-bench/build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

//! Build script that exports the ClickHouse binary path.
//!
//! Resolution order:
//! 1. `CLICKHOUSE_BINARY` env var — use as-is.
//! 2. Falls back to `"clickhouse"` (i.e., resolve from `$PATH` at runtime).
//!
//! Users must install ClickHouse themselves for local runs.
//! In CI, it is installed via the workflow before the benchmark step.

fn main() {
println!("cargo:rerun-if-env-changed=CLICKHOUSE_BINARY");

let binary = std::env::var("CLICKHOUSE_BINARY").unwrap_or_else(|_| "clickhouse".to_string());
println!("cargo:rustc-env=CLICKHOUSE_BINARY={binary}");
}
Loading
Loading