From fe749725bf09ae7d7ece566e30fabdf96f7b8f40 Mon Sep 17 00:00:00 2001 From: Themis Skamagkis Date: Fri, 20 Mar 2026 15:47:44 +0100 Subject: [PATCH 1/3] [script] Renew the runBenchmarks_script.py --- scripts/runBenchmarks_script.py | 381 +++++++++++++++++++++++++------- 1 file changed, 302 insertions(+), 79 deletions(-) diff --git a/scripts/runBenchmarks_script.py b/scripts/runBenchmarks_script.py index 4ede289..bbb3ba1 100644 --- a/scripts/runBenchmarks_script.py +++ b/scripts/runBenchmarks_script.py @@ -1,79 +1,302 @@ -import os, re, subprocess, csv, argparse -from git import Repo -from statistics import mean - -# To run this script properly, you must set up your benchmarking subcases using git -# Naming is important: -# You must be on branch 'your_branch_name' - the script will not run your scene on this branch -# And must have named your benchmark branches as 'your_branch_name-test1', 'your_branch_name-test2' etc -# The script will check out these branches, run your scene and accumulate results - -# Edit to your system -parser = argparse.ArgumentParser(description="Run benchmarks for a scene") -parser.add_argument("-sofaExe", type=str, default="runSofa", help="Path to runSofa executable in your system") -parser.add_argument("-scene", type=str, help="Path to scene file you wish to run") -parser.add_argument("-iterations", type=int, default=100, help="Number of ODE solver iterations to perform") -parser.add_argument("-tests", type=int, default=3, help="Number of tests to run") -args = parser.parse_args() - -# get arguments -runSofa=args.sofaExe -# Scene name -xml_name = args.scene -# Runtime setup -n_iterations = args.iterations -# Number of tests to run for each case -n_tests = args.tests - -# Dictionary to store results -benchmarks = {} -results = {'time': [] , 'fps' : [], 'iterations' : n_iterations, 'git-branch' : ''} - -# Get git info to find branches -repo = Repo(search_parent_directories=True) -branch_prefix = repo.active_branch.name -benchmark_branches = [ - branch for branch in repo.branches if branch.name.startswith(branch_prefix + '-')] - -print(f'Running {xml_name} spawned from {branch_prefix} with {n_iterations} iterations') - -output_filename = 'log.performance.csv' -with open(output_filename, mode='w', newline='') as csv_file: - csv_file.write(branch_prefix + ', time [s], fps\n') - - for branch in benchmark_branches: - repo.git.checkout(branch.name) - git_tag = branch.name[len(branch_prefix + '-'):] - benchmarks[git_tag] = results - benchmarks[git_tag]['git-branch'] = branch.name - - for i in range(n_tests): - print(f'Git tag: {git_tag} - test {i+1}/{n_tests}') - - # This is the way to measure performance - output = subprocess.run([runSofa, "-g", "batch", "-n", str(n_iterations), xml_name], shell=False, capture_output=True, text=True) - for line in output.stdout.splitlines(): - if "iterations done in" in line: - numbers = re.findall(r"\d+\.\d+", line) - time_taken, fps = float(numbers[-2]), float(numbers[-1]) - benchmarks[git_tag]['time'].append(time_taken) - benchmarks[git_tag]['fps'].append(fps) - break - - ## This is to troubleshoot in case SOFA crashes and no message is available - #output = subprocess.Popen([runSofa, "-g", "batch", "-n", str(n_iterations), xml_name], shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) - #for line in output.stdout: - # print(line, end="") - # if "iterations done in" in line: - # numbers = re.findall(r"\d+\.\d+", line) - # time_taken, fps = numbers[-2], numbers[-1] - # scenarios[scenario]['time'].append(time_taken) - # scenarios[scenario]['fps'].append(fps) - # break - #output.wait() - - mean_time = mean(benchmarks[git_tag]['time']) - mean_fps = mean(benchmarks[git_tag]['fps'] ) - csv_file.write(f'{git_tag}, {mean_time}, {mean_fps}\n') - -repo.git.checkout(branch_prefix) +#!/usr/bin/env python3 +import os +import re +import sys +import json +import shutil +import argparse +import subprocess +import csv +from datetime import datetime +from statistics import mean, stdev +from dataclasses import dataclass, field +from typing import List, Optional, Dict, Any, Tuple + + +# ── Data types ───────────────────────────────────────────────────────────────── + +@dataclass +class RunResult: + time_s: float + fps: float + + +@dataclass +class CaseResults: + name: str + scene: str + results: List[RunResult] = field(default_factory=list) + failures: List[str] = field(default_factory=list) + + @property + def n_success(self) -> int: + return len(self.results) + + @property + def n_failures(self) -> int: + return len(self.failures) + + def stats(self) -> Optional[Dict[str, float]]: + if not self.results: + return None + times = [r.time_s for r in self.results] + fpss = [r.fps for r in self.results] + return { + 'mean_time': mean(times), + 'stddev_time': stdev(times) if len(times) > 1 else 0.0, + 'min_time': min(times), + 'max_time': max(times), + 'mean_fps': mean(fpss), + } + + +# ── Config ───────────────────────────────────────────────────────────────────── + +DEFAULTS: Dict[str, Any] = { + 'sofa_exe': 'runSofa', + 'warmup': 1, + 'output': 'log.benchmark', +} + +# Must be explicitly set in config or via CLI — no silent defaults. +REQUIRED_KEYS = ('iterations', 'n_tests', 'timeout') + +# All keys overrideable via CLI — must match build_parser arguments exactly. +OVERRIDE_KEYS = (*DEFAULTS.keys(), *REQUIRED_KEYS) + + +def load_config(config_path: str, overrides: Dict[str, Any]) -> Dict[str, Any]: + with open(config_path) as f: + config = json.load(f) + for key, val in DEFAULTS.items(): + config.setdefault(key, val) + for key, val in overrides.items(): + if val is not None: + config[key] = val + return config + + +def validate_config(config: Dict[str, Any]) -> List[str]: + errors = [] + + for key in REQUIRED_KEYS: + if key not in config: + errors.append(f"'{key}' is required but not set in config or CLI") + + sofa_exe = config.get('sofa_exe') + if not sofa_exe: + errors.append("'sofa_exe' is required but not set") + elif not shutil.which(sofa_exe): + errors.append(f"sofa_exe not found: '{sofa_exe}'") + + cases = config.get('cases') + if not cases: + errors.append("'cases' list is missing or empty") + return errors + + for i, case in enumerate(cases): + tag = f"Case '{case['name']}'" if 'name' in case else f"Case {i}" + if 'name' not in case: + errors.append(f"Case {i}: missing 'name'") + if 'scene' not in case: + errors.append(f"{tag}: missing 'scene'") + elif not os.path.isfile(case['scene']): + errors.append(f"{tag}: scene file not found: '{case['scene']}'") + + for key in REQUIRED_KEYS: + val = config.get(key) + if val is not None and (not isinstance(val, int) or val < 1): + errors.append(f"'{key}' must be a positive integer, got: {val!r}") + warmup = config.get('warmup') + if warmup is not None and (not isinstance(warmup, int) or warmup < 0): + errors.append(f"'warmup' must be a non-negative integer, got: {warmup!r}") + + return errors + + +# ── Runner ───────────────────────────────────────────────────────────────────── + +_TIMING_RE = re.compile(r'(\d+(?:\.\d+)?)\s+s\s*\(\s*(\d+(?:\.\d+)?)\s*FPS\s*\)', re.IGNORECASE) +_NUMBER_RE = re.compile(r'\d+\.\d+') + + +def _parse_timing_line(line: str) -> Optional[RunResult]: + m = _TIMING_RE.search(line) + if m: + return RunResult(float(m.group(1)), float(m.group(2))) + # Fallback: last two decimal numbers in the line + numbers = _NUMBER_RE.findall(line) + if len(numbers) >= 2: + return RunResult(float(numbers[-2]), float(numbers[-1])) + return None + + +def run_single( + sofa_exe: str, scene: str, iterations: int, timeout: int +) -> Tuple[Optional[RunResult], Optional[str]]: + """Returns (RunResult, None) on success, (None, error_message) on failure. Never raises.""" + try: + proc = subprocess.run( + [sofa_exe, '-g', 'batch', '-n', str(iterations), scene], + capture_output=True, + text=True, + timeout=timeout, + ) + except subprocess.TimeoutExpired: + return None, f'Timeout after {timeout}s' + except Exception as e: + return None, f'Failed to launch: {e}' + + for line in proc.stdout.splitlines(): + if 'iterations done in' in line: + result = _parse_timing_line(line) + if result: + return result, None + return None, f'Found timing line but could not parse it: {line!r}' + + error_detail = f'exit={proc.returncode}, no timing line in output' + last_stderr = proc.stderr.strip().rsplit('\n', 1)[-1][:200] + if last_stderr: + error_detail += f' | stderr: {last_stderr}' + return None, error_detail + + +def run_case(config: Dict[str, Any], case: Dict[str, str]) -> CaseResults: + sofa_exe = config['sofa_exe'] + iterations = config['iterations'] + timeout = config['timeout'] + n_tests = config['n_tests'] + warmup = config['warmup'] + scene = case['scene'] + name = case['name'] + + cr = CaseResults(name=name, scene=scene) + + for i in range(warmup + n_tests): + is_warmup = i < warmup + label = f'warmup {i + 1}/{warmup}' if is_warmup else f'test {i - warmup + 1}/{n_tests}' + print(f' [{name}] {label} ... ', end='', flush=True) + + result, error = run_single(sofa_exe, scene, iterations, timeout) + + if result is not None: + suffix = ' [warmup, discarded]' if is_warmup else '' + print(f'{result.time_s:.3f}s ({result.fps:.1f} FPS){suffix}') + if not is_warmup: + cr.results.append(result) + else: + suffix = ' [warmup]' if is_warmup else '' + print(f'FAILED{suffix}: {error}') + if not is_warmup: + cr.failures.append(error) + + return cr + + +# ── Reporting ────────────────────────────────────────────────────────────────── + +def print_table(case_results: List[CaseResults], all_stats: List[Optional[Dict]]) -> None: + col = 24 + header = ( + f'{"Case":<{col}} {"Ok":>4} {"Fail":>4}' + f' {"Mean (s)":>9} {"Std":>7} {"Min":>7} {"Max":>7} {"FPS":>7}' + ) + width = len(header) + print() + print('=' * width) + print(header) + print('-' * width) + for cr, s in zip(case_results, all_stats): + name = cr.name[:col] + if s: + print( + f'{name:<{col}} {cr.n_success:>4} {cr.n_failures:>4}' + f' {s["mean_time"]:>9.3f} {s["stddev_time"]:>7.3f}' + f' {s["min_time"]:>7.3f} {s["max_time"]:>7.3f}' + f' {s["mean_fps"]:>7.1f}' + ) + else: + print(f'{name:<{col}} {0:>4} {cr.n_failures:>4} ALL FAILED') + print('=' * width) + + +def write_csv(output_prefix: str, case_results: List[CaseResults], all_stats: List[Optional[Dict]]) -> str: + timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') + path = f'{output_prefix}.{timestamp}.csv' + with open(path, 'w', newline='') as f: + writer = csv.writer(f) + writer.writerow([ + 'name', 'scene', 'n_success', 'n_failures', + 'mean_time_s', 'stddev_time_s', 'min_time_s', 'max_time_s', 'mean_fps', + ]) + for cr, s in zip(case_results, all_stats): + if s: + writer.writerow([ + cr.name, cr.scene, cr.n_success, cr.n_failures, + s['mean_time'], s['stddev_time'], s['min_time'], s['max_time'], s['mean_fps'], + ]) + else: + writer.writerow([cr.name, cr.scene, 0, cr.n_failures, '', '', '', '', '']) + return path + + +# ── CLI ──────────────────────────────────────────────────────────────────────── + +def build_parser() -> argparse.ArgumentParser: + p = argparse.ArgumentParser( + description='Benchmark SOFA scenes', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + p.add_argument('-config', required=True, help='Path to JSON config file') + p.add_argument('-sofa_exe', default=None, help='Override: path to runSofa executable') + p.add_argument('-iterations', default=None, type=int, help='Override: ODE iterations per run') + p.add_argument('-n_tests', default=None, type=int, help='Override: timed test runs per case') + p.add_argument('-warmup', default=None, type=int, help='Override: warmup runs (discarded from stats)') + p.add_argument('-timeout', default=None, type=int, help='Override: per-run timeout in seconds') + p.add_argument('-output', default=None, help='Override: output CSV filename prefix') + return p + + +def main() -> None: + args = build_parser().parse_args() + + overrides = {k: getattr(args, k) for k in OVERRIDE_KEYS} + + try: + config = load_config(args.config, overrides) + except FileNotFoundError: + print(f'Error: config file not found: {args.config}', file=sys.stderr) + sys.exit(1) + except json.JSONDecodeError as e: + print(f'Error: invalid JSON in config: {e}', file=sys.stderr) + sys.exit(1) + + errors = validate_config(config) + if errors: + print('Config errors:', file=sys.stderr) + for e in errors: + print(f' - {e}', file=sys.stderr) + sys.exit(1) + + print( + f'Benchmark: {len(config["cases"])} case(s), ' + f'{config["n_tests"]} tests + {config["warmup"]} warmup, ' + f'{config["iterations"]} iterations, ' + f'timeout={config["timeout"]}s' + ) + + case_results: List[CaseResults] = [] + for case in config['cases']: + print(f'\nCase: {case["name"]} -> {case["scene"]}') + case_results.append(run_case(config, case)) + + all_stats = [cr.stats() for cr in case_results] + print_table(case_results, all_stats) + + csv_path = write_csv(config['output'], case_results, all_stats) + print(f'\nResults written to: {csv_path}') + + +if __name__ == '__main__': + main() From 6d599243d97501cd60bf867d8d8a06e30d8c8aa2 Mon Sep 17 00:00:00 2001 From: Themis Skamagkis Date: Fri, 20 Mar 2026 16:09:56 +0100 Subject: [PATCH 2/3] [doc] Document script usage --- doc/Scripts.md | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 doc/Scripts.md diff --git a/doc/Scripts.md b/doc/Scripts.md new file mode 100644 index 0000000..b450df7 --- /dev/null +++ b/doc/Scripts.md @@ -0,0 +1,41 @@ +# Scripts Folder Contents + +The Scripts folder has a collection of python scripts to provide basic utilities. + +## runBenchmarks_script.py + +""" +SOFA Scene Benchmarking Tool + +Usage: + python benchmark.py -config benchmark.json + python benchmark.py -config benchmark.json -n_tests 10 -timeout 180 + +Config file format (JSON): + { + "sofa_exe": "runSofa", + "iterations": 110, + "n_tests": 5, + "warmup": 2, + "timeout": 120, + "output": "log.benchmark", + "cases": [ + { "name": "baseline", "scene": "scene_baseline.scn" }, + { "name": "refactored", "scene": "scene_refactored.scn" } + ] + } + + - `sofa_exe`: command to run (to choose SOFA version). + - Default: `runSofa` + - `iterations`: # of time steps to run in batch mode. + - `n_tests`: # of tests to run. Mean & std of FPS are computed. + - `warmup`: # of times to do a dry run to avoid false reports due to caching. + - Default: 2 + - `timeout`: # seconds to run before killing process. For stale runs that crashed. + - `output`: Name of the file to report results. + - Default: "log.benchmark" + - `cases`: Which scene files to use + + +All top-level config keys can be overridden individually via CLI arguments. +""" From db6461898ce4de0565e4503e8025b0fcad5f8eb3 Mon Sep 17 00:00:00 2001 From: Themis Skamagkis <70031729+th-skam@users.noreply.github.com> Date: Fri, 20 Mar 2026 16:21:49 +0100 Subject: [PATCH 3/3] Fix formatting in Scripts.md for benchmark usage Updated documentation for runBenchmarks_script.py usage. --- doc/Scripts.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/Scripts.md b/doc/Scripts.md index b450df7..c6602c9 100644 --- a/doc/Scripts.md +++ b/doc/Scripts.md @@ -4,7 +4,7 @@ The Scripts folder has a collection of python scripts to provide basic utilities ## runBenchmarks_script.py -""" +``` SOFA Scene Benchmarking Tool Usage: @@ -38,4 +38,4 @@ Config file format (JSON): All top-level config keys can be overridden individually via CLI arguments. -""" +```