oxidecomputer · jmcarp · Mar 24, 2026 · Mar 20, 2026 · bnaecker · Mar 23, 2026
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/oximeter/db/Cargo.toml b/oximeter/db/Cargo.toml
@@ -112,6 +112,7 @@ expectorate.workspace = true
 itertools.workspace = true
 omicron-test-utils.workspace = true
 oximeter-test-utils.workspace = true
+rand.workspace = true
 slog-dtrace.workspace = true
 sqlformat.workspace = true
 sqlparser.workspace = true
@@ -150,3 +151,7 @@ doc = false
 [[bench]]
 name = "protocol"
 harness = false
+
+[[bench]]
+name = "oxql"
+harness = false
diff --git a/oximeter/db/benches/README.md b/oximeter/db/benches/README.md
@@ -0,0 +1,36 @@
+# Oximeter benchmarks
+
+## Field lookup
+
+Filtering and pivoting OxQL field labels can take a significant fraction of overall query time, so we include a benchmark focusing on field lookup. This benchmark queries all timeseries for a given table, filtering on a far-future timestamp so that we don't exercise measurement lookup. Because field lookup latency varies with the number of field tables to be combined, we include metrics that use varying numbers of field types. In the interest of benchmarking realistic queries, this benchmark doesn't generate synthetic data, but instead provides scripts for the operator to back up real field data from a running rack and restore them into a test database.
+
+To fetch field data:
+
+```bash
+$ mkdir -p /tmp/oximeter-field-bench
+$ oximeter/db/benches/backup_field_tables.sh /tmp/oximeter-field-bench [port]
+```
+
+To restore into a test database. Note: take care not to restore into a real Oxide rack. For safety, the load script will fail if the destination database has nonzero rows.
+
+```bash
+$ oximeter/db/benches/load_field_tables.sh /tmp/oximeter-field-bench [port]
+```
+
+Then run the benchmark:
+
+```bash
+$ cargo bench --package oximeter-db --bench oxql -- --save-baseline main
+```
+
+To evaluate performance changes, run the benchmark using a new baseline:
+
+```bash
+$ cargo bench --package oximeter-db --bench oxql -- --save-baseline my-branch
+```
+
+Then compare with `critcmp`:
+
+```bash
+$ critcmp main my-branch
+```
diff --git a/oximeter/db/benches/backup_field_tables.sh b/oximeter/db/benches/backup_field_tables.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+#
+# Dump ClickHouse field and schema tables to disk in native format. Run against
+# a test rack with realistic oximeter data. Used to capture test data for
+# benchmarking.
+#
+# Usage: ./backup_field_tables.sh <output_dir> [port]
+
+set -euo pipefail
+
+if [[ $# -lt 1 ]]; then
+    echo "Usage: $0 <output_dir> [port]" >&2
+    exit 1
+fi
+
+OUTPUT_DIR="$1"
+PORT="${2:-9000}"
+DATABASE="oximeter"
+
+mkdir -p "$OUTPUT_DIR"
+
+# Back up field tables.
+#
+# Note: Use SELECT rather than RESTORE because we may not have access to the
+# remote ClickHouse's local disk, or have backups enabled at all.
+for table in timeseries_schema fields_{bool,i8,i16,i32,i64,ipaddr,string,u8,u16,u32,u64,uuid}; do
+    count=$(clickhouse client --port "$PORT" \
+        --query "SELECT count() FROM $DATABASE.$table")
+    if [[ "$count" -eq 0 ]]; then
+        echo "No rows in table $DATABASE.$table; skipping"
+        continue
+    fi
+    output="$OUTPUT_DIR/${table}.native.gz"
+    echo "Backing up $DATABASE.$table ($count rows) to $output"
+    clickhouse client --port "$PORT" \
+        --query "SELECT * FROM $DATABASE.$table FORMAT Native" \
+        | gzip > "$output"
+done
diff --git a/oximeter/db/benches/load_field_tables.sh b/oximeter/db/benches/load_field_tables.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+#
+# Load field table backups into a fresh ClickHouse for benchmarking.
+# Crashes if the destination database already contains data.
+#
+# Usage: ./load_field_tables.sh <input_dir> [port]
+
+set -euo pipefail
+
+if [[ $# -lt 1 ]]; then
+    echo "Usage: $0 <input_dir> [port]" >&2
+    exit 1
+fi
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+SCHEMA_DIR="$SCRIPT_DIR/../schema/single-node"
+
+INPUT_DIR="$1"
+PORT="${2:-9000}"
+
+DATABASE="oximeter"
+
+# Error if database isn't empty.
+echo "Checking for existing data..."
+count=$(clickhouse client --port "$PORT" \
+    --query "SELECT ifNull(sum(total_rows), 0) FROM system.tables WHERE database = '$DATABASE'")
+
+if [[ "$count" -gt 0 ]]; then
+    echo "Error: $DATABASE database already contains data ($count rows)"
+    echo "Refusing to initialize a non-empty database."
+    exit 1
+fi
+
+# Initialize schema.
+echo "Initializing database schema..."
+clickhouse client --port "$PORT" --multiquery < "$SCHEMA_DIR/db-init.sql"
+
+# Load backups.
+#
+# Note: Use INSERT rather than RESTORE because we may not have access to the
+# remote ClickHouse's local disk, or have backups enabled at all.
+for table in timeseries_schema fields_{bool,i8,i16,i32,i64,ipaddr,string,u8,u16,u32,u64,uuid}; do
+    input="$INPUT_DIR/${table}.native.gz"
+    if [[ ! -f "$input" ]]; then
+        echo "No backup for table $table; skipping"
+        continue
+    fi
+    echo "Loading $table"
+    gunzip -c "$input" | clickhouse client --port "$PORT" \
+        --query "INSERT INTO $DATABASE.$table FORMAT Native"
+done
diff --git a/oximeter/db/benches/oxql.rs b/oximeter/db/benches/oxql.rs
@@ -0,0 +1,184 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Benchmark for OxQL query performance.
+//!
+//! Tests multiple timeseries with varying numbers of field types.
+
+// Copyright 2026 Oxide Computer Company
+
+use criterion::BenchmarkId;
+use criterion::Criterion;
+use criterion::{criterion_group, criterion_main};
+use oximeter_db::Client;
+use oximeter_db::native::Connection;
+use oximeter_db::oxql::query::QueryAuthzScope;
+use rand::seq::SliceRandom;
+use std::net::IpAddr;
+use std::net::SocketAddr;
+use std::sync::Arc;
+use uuid::Uuid;
+
+const DEFAULT_CLICKHOUSE_PORT: u16 = 9000;
+
+/// Timeseries to benchmark, spanning a range of field table counts.
+const TIMESERIES_NAMES: &[&str] = &[
+    "crucible_upstairs:flush",
+    "ddm_session:advertisements_received",
+    "virtual_machine:vcpu_usage",
+    "bgp_session:active_connections_accepted",
+    "switch_data_link:bytes_sent",
+];
+
+/// Metadata about a timeseries, fetched from the database.
+struct TimeseriesInfo {
+    name: String,
+    field_tables: u64,
+    cardinality: u64,
+}
+
+fn get_clickhouse_addr() -> IpAddr {
+    std::env::var("CLICKHOUSE_ADDRESS")
+        .ok()
+        .and_then(|s| s.parse().ok())
+        .unwrap_or_else(|| IpAddr::from([127, 0, 0, 1]))
+}
+
+fn get_clickhouse_port() -> u16 {
+    std::env::var("CLICKHOUSE_PORT")
+        .ok()
+        .and_then(|s| s.parse().ok())
+        .unwrap_or(DEFAULT_CLICKHOUSE_PORT)
+}
+
+fn get_socket_addr() -> SocketAddr {
+    SocketAddr::new(get_clickhouse_addr(), get_clickhouse_port())
+}
+
+fn get_client(rt: &tokio::runtime::Runtime) -> Arc<Client> {
+    let addr = get_socket_addr();
+    let log = slog::Logger::root(slog::Discard, slog::o!());
+
+    rt.block_on(async {
+        let client = Arc::new(Client::new(addr, &log));
+        client.ping().await.unwrap();
+        client
+    })
+}
+
+/// Fetch field table count and cardinality for each timeseries.
+fn get_timeseries_info(rt: &tokio::runtime::Runtime) -> Vec<TimeseriesInfo> {
+    let names_list = TIMESERIES_NAMES
+        .iter()
+        .map(|name| format!("'{}'", name))
+        .collect::<Vec<_>>()
+        .join(", ");
+
+    let query = format!(
+        "SELECT
+            series.timeseries_name,
+            length(arrayDistinct(any(series.fields.type))) AS field_tables,
+            count(DISTINCT fields.timeseries_key) AS cardinality
+        FROM oximeter.timeseries_schema series
+        JOIN merge('oximeter', '^fields_') fields
+            ON series.timeseries_name = fields.timeseries_name
+        WHERE series.timeseries_name IN ({})
+        GROUP BY series.timeseries_name
+        ORDER BY field_tables, cardinality",
+        names_list
+    );
+
+    rt.block_on(async {
+        let mut conn = Connection::new(get_socket_addr()).await.unwrap();
+        let result = conn.query(Uuid::new_v4(), &query).await.unwrap();
+        let block = result.data.as_ref().expect("query returned no data");
+
+        let names = block
+            .column_values("timeseries_name")
+            .unwrap()
+            .as_string()
+            .unwrap();
+        let field_tables =
+            block.column_values("field_tables").unwrap().as_u64().unwrap();
+        let cardinalities =
+            block.column_values("cardinality").unwrap().as_u64().unwrap();
+
+        names
+            .iter()
+            .zip(field_tables.iter())
+            .zip(cardinalities.iter())
+            .map(|((name, &field_tables), &cardinality)| TimeseriesInfo {
+                name: name.clone(),
+                field_tables,
+                cardinality,
+            })
+            .collect()
+    })
+}
+
+// Benchmark field lookup. As of this writing, filtering and collating fields
+// can make up a significant proportion of overall query time, and its latency
+// varies with both the cardinality and the number of field tables that need to
+// be combined for the relevant series. Query each timeseries in TIMESERIES_NAMES,
+// filtering to a future timestamp so that we only benchmark the performance of
+// field lookup, and ignore measurements. Note that the user is responsible for
+// populating ClickHouse with test data.
+fn oxql_field_lookup(c: &mut Criterion) {
+    let rt = tokio::runtime::Builder::new_multi_thread()
+        .enable_all()
+        .build()
+        .unwrap();
+
+    let client = get_client(&rt);
+    let mut group = c.benchmark_group("oxql");
+
+    let mut timeseries_info = get_timeseries_info(&rt);
+    timeseries_info.shuffle(&mut rand::rng());
+
+    let max_cardinality =
+        timeseries_info.iter().map(|i| i.cardinality).max().unwrap_or(0);
+    let cardinality_width = max_cardinality.to_string().len();
+
+    for info in &timeseries_info {
+        // Use a far-future timestamp to benchmark field lookup only, with no
+        // measurements.
+        let query =
+            format!("get {} | filter timestamp > @2200-01-01", info.name);
+
+        rt.block_on(client.oxql_query(&query, QueryAuthzScope::Fleet)).unwrap();
+
+        let bench_id = format!(
+            "{} tables/{:0width$} keys: {}",
+            info.field_tables,
+            info.cardinality,
+            info.name,
+            width = cardinality_width
+        );
+
+        group.bench_function(
+            BenchmarkId::new("field_lookup", &bench_id),
+            |bench| {
+                let client = client.clone();
+                let query = query.clone();
+                bench.to_async(&rt).iter(|| {
+                    let client = client.clone();
+                    let query = query.clone();
+                    async move {
+                        client.oxql_query(&query, QueryAuthzScope::Fleet).await
+                    }
+                })
+            },
+        );
+    }
+
+    group.finish();
+}
+
+criterion_group!(
+    name = benches;
+    config = Criterion::default().sample_size(50).noise_threshold(0.05);
+    targets = oxql_field_lookup
+);
+
+criterion_main!(benches);