Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
238 changes: 238 additions & 0 deletions benchmarking/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
#!/usr/bin/env python3.13
"""
Tesseract Decoder Benchmarker

This script automates the process of benchmarking the Tesseract decoder using hyperfine.
It compares the performance of your current working directory against a baseline revision.

Basic Usage:
Run the benchmarker with default settings (compares current directory against 'main'):
$ ./benchmarking/benchmark.py

Run a quick benchmark (minimal shots and runs, useful for sanity checking before a long run):
$ ./benchmarking/benchmark.py -q

Compare against a specific baseline revision (e.g., a specific commit or branch):
$ ./benchmarking/benchmark.py -b my-feature-branch

Filter circuits by group name (e.g., only run 'surface_code' circuits) See circuits.json for available groups:
$ ./benchmarking/benchmark.py -g surface_code

Benchmarking Multiple Changes:
You can benchmark multiple working directories simultaneously against the baseline.
This is useful if you have several different implementations across different
directories that you want to compare side-by-side in a single run.

To set up additional directories for your changes:
- Using git: Create a new worktree.
$ git worktree add ../path-to-experiment1 <branch-or-commit>
- Using jj (jujutsu): Add a new workspace.
$ jj workspace add ../path-to-experiment1 -r <revision>

Use the -d or --dir flag for each additional directory you want to include:
$ ./benchmarking/benchmark.py -d ../path-to-experiment1 -d ../path-to-experiment2

You can also provide a label for the plot by using the format label=path:
$ ./benchmarking/benchmark.py -d "experiment1=../path-to-experiment1"

This will benchmark the baseline, the current working directory, and the two
extra directories specified, providing a single cohesive report.

Command Line Flags:
-b, --baseline <rev> : Specify baseline revision (default: main). Can be a branch or commit.
-d, --dir <lbl=path> : Add extra working directories to benchmark against. Format: path or label=path. Can be specified multiple times.
-q, --quick : Enable quick mode (fewer shots, warmup rounds, and runs). Useful for testing.
-g, --group <name> : Filter circuits to benchmark by group name (e.g. 'surface_code').
--skip-build : Skip the bazel build step (assuming binaries are already built).
--loop : Continuously loop the benchmarks. Take a step away from your computer, and grab a Nuka Cola.
--shots <num> : Override the default sample-num-shots (default: 5000). Mutually exclusive with -q.
--warmup <num> : Override the default warmup-rounds (default: 15). Mutually exclusive with -q.
--runs <num> : Override the default num-runs (default: 50). Mutually exclusive with -q.
"""

import argparse
import contextlib
import json
import logging
import shutil
import subprocess
import sys
import time
from datetime import datetime
from pathlib import Path
from zoneinfo import ZoneInfo
import plotting
import workspace

# Configure logging with LA timezone
class Formatter(logging.Formatter):
def converter(self, timestamp):
dt = datetime.fromtimestamp(timestamp, tz=ZoneInfo('America/Los_Angeles'))
return dt.timetuple()

logger = logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(sys.stdout)
formatter = Formatter('[%(asctime)s] %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
handler.setFormatter(formatter)
logger.addHandler(handler)

def print_batch_summary(json_output_files: list[Path], circuit_names: list[str]) -> None:
logger.info("===================================================")
logger.info(">>> BATCH RUN SUMMARY")
logger.info("===================================================")

for json_file, c_name in zip(json_output_files, circuit_names):
if Path(json_file).exists():
try:
with open(json_file, 'r') as f:
results_data = json.load(f)

results_list = results_data.get('results', [])
if len(results_list) >= 2:
baseline_mean = results_list[0].get('mean')
pwd_mean = results_list[1].get('mean')

if baseline_mean is not None and pwd_mean is not None and pwd_mean > 0:
speedup = baseline_mean / pwd_mean
logger.info(f"Circuit: {c_name}")
logger.info(f" Baseline Mean: {baseline_mean:.4f} s")
logger.info(f" PWD Mean: {pwd_mean:.4f} s")
logger.info(f" Speedup: {speedup:.4f}x")
logger.info("---------------------------------------------------")
except Exception as e:
logger.error(f"Failed to parse or summarize {json_file}: {e}")

def run_benchmark_batch(args: argparse.Namespace, workspaces: list[str | Path], workspace_names: list[str]) -> None:
logger.info("===================================================")
logger.info(">>> STARTING NEW BATCH RUN SEQUENCE")
logger.info("===================================================")


if args.quick:
logger.info(f">>> Quick mode enabled: Reduced shots ({args.sample_num_shots}), warmup ({args.warmup_rounds}), and runs ({args.num_runs}).")

la_tz = ZoneInfo('America/Los_Angeles')
timestamp = datetime.now(la_tz).strftime('%Y-%m-%d_%H_%M')
result_dir = Path(f"benchmarking/results/{timestamp}_{args.num_runs}")

logger.info(f">>> Output directory: {result_dir}")
(result_dir / "benchmark_json").mkdir(parents=True, exist_ok=True)
(result_dir / "benchmark_whiskers").mkdir(parents=True, exist_ok=True)

try:
with open("benchmarking/circuits.json", 'r') as f:
circuits_data = json.load(f)
except (FileNotFoundError, json.JSONDecodeError) as e:
logger.error(f"Failed to load circuits JSON: {e}")
sys.exit(1)

if args.group:
logger.info(f">>> Filtering circuits by group: {args.group}")
circuits = [c for c in circuits_data if c.get('group') == args.group]
else:
circuits = circuits_data

json_output_files = []
circuit_names = []

tesseract_args = [
"--sample-num-shots", str(args.sample_num_shots),
"--print-stats", "--threads", "48", "--beam", "5",
"--no-revisit-dets", "--num-det-orders", "1",
"--pqlimit", "100000", "--sample-seed", "123456"
]

for circuit in circuits:
c_name = circuit['name']
c_path = circuit['path']

json_file = result_dir / "benchmark_json" / f"results_{c_name}.json"
whisker_file = result_dir / "benchmark_whiskers" / f"results_{c_name}.png"

json_output_files.append(json_file)
circuit_names.append(c_name)

logger.info("---------------------------------------------------")
logger.info(f">>> BENCHMARKING CIRCUIT: {c_name}")
logger.info(f">>> Path: {c_path}")

hyperfine_cmd = [
"hyperfine",
"--warmup", str(args.warmup_rounds),
"--runs", str(args.num_runs),
"--export-json", str(json_file)
]

for name, d in zip(workspace_names, workspaces):
hyperfine_cmd.extend(["-n", name])

binary_path = Path(d) / "bazel-bin" / "src" / "tesseract"
if str(d) == ".":
binary_path = Path("bazel-bin") / "src" / "tesseract"

cmd_for_binary = f"{binary_path} --circuit '{c_path}' " + " ".join(tesseract_args)
hyperfine_cmd.append(cmd_for_binary)

workspace.run_cmd(hyperfine_cmd)

plotting.plot_benchmark_results(json_file=str(json_file), labels=workspace_names, output_file=str(whisker_file))

print_batch_summary(json_output_files, circuit_names)
logger.info(f">>> Batch Run Complete! Results saved in: {result_dir}")

def main() -> None:
parser = argparse.ArgumentParser(description="Benchmark tesseract decoder using hyperfine.")
parser.add_argument("-b", "--baseline", default="main", help="Specify baseline revision (default: main)")
parser.add_argument("-d", "--dir", action="append", default=[], help="Add extra working directories to benchmark against. Format: path or label=path. Can be specified multiple times.")
parser.add_argument("--skip-build", action="store_true", help="Skip the bazel build step")
parser.add_argument("--loop", action="store_true", help="Loop runs rather than running once.")

parser.add_argument("-q", "--quick", action="store_true", help="Enable quick mode (fewer shots/runs)")
parser.add_argument("-g", "--group", default="", help="Filter circuits by group name")
parser.add_argument("--shots", type=int, default=5000, help="Override the default sample-num-shots (mutually exclusive with -q)")
parser.add_argument("--warmup", type=int, default=15, help="Override the default warmup-rounds (mutually exclusive with -q)")
parser.add_argument("--runs", type=int, default=50, help="Override the default num-runs (mutually exclusive with -q)")

args = parser.parse_args()

if args.quick and (args.shots != 5000 or args.warmup != 15 or args.runs != 50):
parser.error("-q/--quick cannot be used with --shots, --warmup, or --runs")

args.sample_num_shots = 500 if args.quick else args.shots
args.warmup_rounds = 1 if args.quick else args.warmup
args.num_runs = 2 if args.quick else args.runs

baseline_dir = "../baseline_bench_tmp"
vcs = workspace.check_vcs()
if not vcs:
logger.error("Error: Neither a jj nor git repository detected.")
sys.exit(1)
with workspace.managed_baseline(baseline_dir, args.baseline, vcs):
extra_workspaces = []
extra_names = []
for d in args.dir:
if '=' in d:
lbl, pth = d.split('=', 1)
extra_names.append(lbl)
extra_workspaces.append(pth)
else:
extra_names.append(Path(d).name)
extra_workspaces.append(d)

workspaces = [baseline_dir, "."] + extra_workspaces
workspace_names = ["baseline", "pwd"] + extra_names

workspace.build_all(workspaces, args.skip_build)

if args.loop:
while True:
run_benchmark_batch(args, workspaces, workspace_names)
logger.info(">>> Restarting in 5 seconds... (Press Ctrl+C to stop)")
time.sleep(5)
workspace.build_all(workspaces, args.skip_build)
else:
run_benchmark_batch(args, workspaces, workspace_names)

if __name__ == "__main__":
main()
67 changes: 67 additions & 0 deletions benchmarking/circuits.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
[
{
"name": "r12_d12_p0.001_bivariate_bicycle_X",
"group": "bivariate_bicycle_X",
"path": "benchmarking/testsuite/r=12,d=12,p=0.001,noise=si1000,c=bivariate_bicycle_X,nkd=[[144,12,12]],q=288,iscolored=True,A_poly=x^3+y+y^2,B_poly=y^3+x+x^2.stim"
},
{
"name": "r12_d12_p0.002_bivariate_bicycle_X",
"group": "bivariate_bicycle_X",
"path": "benchmarking/testsuite/r=12,d=12,p=0.002,noise=si1000,c=bivariate_bicycle_X,nkd=[[144,12,12]],q=288,iscolored=True,A_poly=x^3+y+y^2,B_poly=y^3+x+x^2.stim"
},
{
"name": "r18_d18_p0.001_bivariate_bicycle_X",
"group": "bivariate_bicycle_X",
"path": "benchmarking/testsuite/r=18,d=18,p=0.001,noise=si1000,c=bivariate_bicycle_X,nkd=[[288,12,18]],q=576,iscolored=True,A_poly=x^3+y^2+y^7,B_poly=y^3+x+x^2.stim"
},
{
"name": "r11_d11_p0.001_superdense_color_code_X",
"group": "superdense_color_code_X",
"path": "benchmarking/testsuite/r=11,d=11,p=0.001,noise=si1000,c=superdense_color_code_X,q=181,gates=cz.stim"
},
{
"name": "r11_d11_p0.002_superdense_color_code_X",
"group": "superdense_color_code_X",
"path": "benchmarking/testsuite/r=11,d=11,p=0.002,noise=si1000,c=superdense_color_code_X,q=181,gates=cz.stim"
},
{
"name": "r9_d9_p0.001_superdense_color_code_X",
"group": "superdense_color_code_X",
"path": "benchmarking/testsuite/r=9,d=9,p=0.001,noise=si1000,c=superdense_color_code_X,q=121,gates=cz.stim"
},
{
"name": "r9_d9_p0.002_superdense_color_code_X",
"group": "superdense_color_code_X",
"path": "benchmarking/testsuite/r=9,d=9,p=0.002,noise=si1000,c=superdense_color_code_X,q=121,gates=cz.stim"
},
{
"name": "r23_d23_p0.001_surface_code",
"group": "surface_code",
"path": "benchmarking/testsuite/r=23,d=23,p=0.001,noise=uniform,c=surface_code,q=2025,gates=cx.stim"
},
{
"name": "r23_d23_p0.008_surface_code",
"group": "surface_code",
"path": "benchmarking/testsuite/r=23,d=23,p=0.008,noise=uniform,c=surface_code,q=2025,gates=cx.stim"
},
{
"name": "r29_d29_p0.001_surface_code",
"group": "surface_code",
"path": "benchmarking/testsuite/r=29,d=29,p=0.001,noise=uniform,c=surface_code,q=3249,gates=cx.stim"
},
{
"name": "r11_d11_p0.001_surface_code_trans_cx_X",
"group": "surface_code_trans_cx_X",
"path": "benchmarking/testsuite/r=11,d=11,p=0.001,noise=si1000,c=surface_code_trans_cx_X,q=482,gates=cz.stim"
},
{
"name": "r9_d9_p0.001_surface_code_trans_cx_X",
"group": "surface_code_trans_cx_X",
"path": "benchmarking/testsuite/r=9,d=9,p=0.001,noise=si1000,c=surface_code_trans_cx_X,q=322,gates=cz.stim"
},
{
"name": "r9_d9_p0.002_surface_code_trans_cx_X",
"group": "surface_code_trans_cx_X",
"path": "benchmarking/testsuite/r=9,d=9,p=0.002,noise=si1000,c=surface_code_trans_cx_X,q=322,gates=cz.stim"
}
]
44 changes: 44 additions & 0 deletions benchmarking/plotting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import json
import matplotlib.pyplot as plt

def plot_benchmark_results(
json_file: str,
labels: list[str],
output_file: str | None = None,
title: str | None = None,
) -> None:
"""
Plots benchmark results from a JSON file generated by hyperfine.

Args:
json_file: Path to the JSON file with benchmark results.
labels: List of labels for the plot legend.
output_file: Optional path to save the generated image. If None, the plot is shown instead.
title: Optional title for the plot.
"""
with open(json_file, encoding="utf-8") as f:
results = json.load(f)["results"]

times = [b["times"] for b in results]

plt.figure(figsize=(10, 6), constrained_layout=True)
boxplot = plt.boxplot(times, vert=True, patch_artist=True)
cmap = plt.get_cmap("rainbow")
colors = [cmap(val / len(times)) for val in range(len(times))]

for patch, color in zip(boxplot["boxes"], colors):
patch.set_facecolor(color)

if title:
plt.title(title)

plt.legend(handles=boxplot["boxes"], labels=labels, loc="best", fontsize="medium")
plt.ylabel("Time [s]")
plt.ylim(0, None)
plt.xticks(list(range(1, len(labels) + 1)), labels, rotation=45)

if output_file:
plt.savefig(output_file)
else:
plt.show()
plt.close()
Loading