diff --git a/benchmarks/pandas/bench_at_iat.py b/benchmarks/pandas/bench_at_iat.py
new file mode 100644
index 00000000..662c5e43
--- /dev/null
+++ b/benchmarks/pandas/bench_at_iat.py
@@ -0,0 +1,37 @@
+"""Benchmark: Series.at, Series.iat, DataFrame.at, DataFrame.iat — fast scalar access"""
+import json
+import time
+import pandas as pd
+
+N = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+labels = [f"r{i}" for i in range(N)]
+values = [i * 1.5 for i in range(N)]
+
+s = pd.Series(values, index=labels)
+df = pd.DataFrame({"a": values, "b": [v * 2 for v in values]}, index=labels)
+
+mid_label = f"r{N // 2}"
+
+for _ in range(WARMUP):
+ _ = s.at[mid_label]
+ _ = s.iat[N // 2]
+ _ = df.at[mid_label, "a"]
+ _ = df.iat[N // 2, 0]
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ _ = s.at[mid_label]
+ _ = s.iat[N // 2]
+ _ = df.at[mid_label, "a"]
+ _ = df.iat[N // 2, 0]
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "at_iat",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_convert_dtypes.py b/benchmarks/pandas/bench_convert_dtypes.py
new file mode 100644
index 00000000..543fa870
--- /dev/null
+++ b/benchmarks/pandas/bench_convert_dtypes.py
@@ -0,0 +1,50 @@
+"""
+Benchmark: pandas Series.convert_dtypes() and DataFrame.convert_dtypes()
+
+Creates a 50k-row dataset with object-dtype numeric, boolean, and string
+columns, then measures how fast pandas can infer and convert to best dtypes.
+"""
+import json
+import time
+import numpy as np
+import pandas as pd
+
+N = 50_000
+WARMUP = 3
+ITERATIONS = 20
+
+# Object-dtype arrays (same structure as the TypeScript version)
+int_data = [None if i % 17 == 0 else i for i in range(N)]
+float_data = [None if i % 13 == 0 else i * 1.5 for i in range(N)]
+str_data = [None if i % 11 == 0 else f"str_{i}" for i in range(N)]
+bool_data = [None if i % 7 == 0 else (i % 2 == 0) for i in range(N)]
+
+int_series = pd.Series(int_data, dtype=object)
+float_series = pd.Series(float_data, dtype=object)
+
+df = pd.DataFrame({
+ "int_col": int_data,
+ "float_col": float_data,
+ "str_col": str_data,
+ "bool_col": bool_data,
+})
+
+# Warm-up
+for _ in range(WARMUP):
+ int_series.convert_dtypes()
+ float_series.convert_dtypes()
+ df.convert_dtypes()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ int_series.convert_dtypes()
+ float_series.convert_dtypes()
+ df.convert_dtypes()
+total_ms = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "convert_dtypes",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+}))
diff --git a/benchmarks/pandas/bench_cross_join.py b/benchmarks/pandas/bench_cross_join.py
new file mode 100644
index 00000000..ad1de45b
--- /dev/null
+++ b/benchmarks/pandas/bench_cross_join.py
@@ -0,0 +1,32 @@
+"""Benchmark: cross_join — Cartesian product of two 300-row DataFrames (90k result rows)"""
+import json
+import time
+import pandas as pd
+
+N = 300
+WARMUP = 3
+ITERATIONS = 10
+
+left = pd.DataFrame({
+ "id_a": list(range(N)),
+ "val_a": [i * 1.5 for i in range(N)],
+})
+right = pd.DataFrame({
+ "id_b": list(range(N)),
+ "val_b": [i * 2.5 for i in range(N)],
+})
+
+for _ in range(WARMUP):
+ pd.merge(left, right, how="cross")
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ pd.merge(left, right, how="cross")
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "cross_join",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_cut_bins_to_frame.py b/benchmarks/pandas/bench_cut_bins_to_frame.py
new file mode 100644
index 00000000..5ae5908c
--- /dev/null
+++ b/benchmarks/pandas/bench_cut_bins_to_frame.py
@@ -0,0 +1,56 @@
+"""Benchmark: cut_bins_to_frame — pd.cut with value_counts and bin summary on 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+NUM_BINS = 20
+WARMUP = 5
+ITERATIONS = 50
+
+data = np.array([(i % 1000) * 0.1 for i in range(SIZE)])
+
+for _ in range(WARMUP):
+ # pandas equivalent of cutBinsToFrame: cut + value_counts on the categorical result
+ cut_result = pd.cut(data, NUM_BINS)
+ # Summary DataFrame equivalent to cutBinsToFrame
+ counts = cut_result.value_counts(sort=False)
+ summary = pd.DataFrame({
+ "bin": counts.index.astype(str),
+ "left": [iv.left for iv in counts.index],
+ "right": [iv.right for iv in counts.index],
+ "count": counts.values,
+ "frequency": counts.values / len(data),
+ })
+ # cutBinCounts equivalent: counts dict
+ count_dict = dict(zip(counts.index.astype(str), counts.values))
+ # binEdges equivalent: DataFrame of interval edges
+ edges = pd.DataFrame({
+ "left": [iv.left for iv in counts.index],
+ "right": [iv.right for iv in counts.index],
+ })
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ cut_result = pd.cut(data, NUM_BINS)
+ counts = cut_result.value_counts(sort=False)
+ summary = pd.DataFrame({
+ "bin": counts.index.astype(str),
+ "left": [iv.left for iv in counts.index],
+ "right": [iv.right for iv in counts.index],
+ "count": counts.values,
+ "frequency": counts.values / len(data),
+ })
+ count_dict = dict(zip(counts.index.astype(str), counts.values))
+ edges = pd.DataFrame({
+ "left": [iv.left for iv in counts.index],
+ "right": [iv.right for iv in counts.index],
+ })
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "cut_bins_to_frame",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_dataframe_transform_named.py b/benchmarks/pandas/bench_dataframe_transform_named.py
new file mode 100644
index 00000000..045650e9
--- /dev/null
+++ b/benchmarks/pandas/bench_dataframe_transform_named.py
@@ -0,0 +1,40 @@
+"""
+Benchmark: pandas DataFrame.transform() with named aggregation strings.
+
+Mirrors tsb dataFrameTransform with string names like "mean", "cumsum",
+and ["sum", "mean"] applied column-wise.
+
+Uses 10k-row DataFrame to match the TypeScript benchmark.
+"""
+import json
+import time
+import pandas as pd
+
+ROWS = 10_000
+WARMUP = 3
+ITERATIONS = 20
+
+a = [(i % 100) * 1.5 + 1 for i in range(ROWS)]
+b = [((i * 3) % 200) * 0.5 + 2 for i in range(ROWS)]
+c = [((i * 7) % 50) * 2.0 + 0.5 for i in range(ROWS)]
+df = pd.DataFrame({"a": a, "b": b, "c": c})
+
+# Warm-up
+for _ in range(WARMUP):
+ df.transform("mean")
+ df.transform("cumsum")
+ df.transform(["sum", "mean"])
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ df.transform("mean")
+ df.transform("cumsum")
+ df.transform(["sum", "mean"])
+total_ms = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "dataframe_transform_named",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+}))
diff --git a/benchmarks/pandas/bench_dataframe_update.py b/benchmarks/pandas/bench_dataframe_update.py
new file mode 100644
index 00000000..cea97283
--- /dev/null
+++ b/benchmarks/pandas/bench_dataframe_update.py
@@ -0,0 +1,48 @@
+"""
+Benchmark: DataFrame.update() — in-place-style DataFrame value update.
+
+Mirrors tsb dataFrameUpdate.
+Overwrites non-null values from `other` into `self`.
+Outputs JSON: {"function": "dataframe_update", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+"""
+
+import json
+import time
+
+import numpy as np
+import pandas as pd
+
+N = 10_000
+WARMUP = 20
+ITERATIONS = 200
+
+# Build two DataFrames; `other` has NaN in ~2/3 of rows (so 1/3 rows are updated).
+a_data = [i * 1.0 for i in range(N)]
+b_data = [i * 2.0 for i in range(N)]
+a_other = [i * 10.0 if i % 3 == 0 else np.nan for i in range(N)]
+b_other = [i * 20.0 if i % 3 == 0 else np.nan for i in range(N)]
+
+df = pd.DataFrame({"a": a_data, "b": b_data})
+other = pd.DataFrame({"a": a_other, "b": b_other})
+
+# Warm-up
+for _ in range(WARMUP):
+ dc = df.copy()
+ dc.update(other)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ dc = df.copy()
+ dc.update(other)
+total_ms = (time.perf_counter() - start) * 1000
+
+print(
+ json.dumps(
+ {
+ "function": "dataframe_update",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+ }
+ )
+)
diff --git a/benchmarks/pandas/bench_filter_series.py b/benchmarks/pandas/bench_filter_series.py
new file mode 100644
index 00000000..ec653243
--- /dev/null
+++ b/benchmarks/pandas/bench_filter_series.py
@@ -0,0 +1,31 @@
+"""Benchmark: Series.filter — filter Series index labels by items/like/regex"""
+import json
+import time
+import pandas as pd
+
+N = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+labels = [f"label_{i}" for i in range(N)]
+values = [i * 0.5 for i in range(N)]
+s = pd.Series(values, index=labels)
+
+keep_items = [f"label_{i * 100}" for i in range(1_000)]
+
+for _ in range(WARMUP):
+ s.filter(items=keep_items)
+ s.filter(like="label_5")
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.filter(items=keep_items)
+ s.filter(like="label_5")
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "filter_series",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_get_set_option.py b/benchmarks/pandas/bench_get_set_option.py
new file mode 100644
index 00000000..df9c675e
--- /dev/null
+++ b/benchmarks/pandas/bench_get_set_option.py
@@ -0,0 +1,44 @@
+"""
+Benchmark: get_option / set_option / reset_option — pandas options API.
+
+Mirrors tsb getOption / setOption / resetOption.
+Outputs JSON: {"function": "get_set_option", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+"""
+
+import json
+import time
+
+import pandas as pd
+
+WARMUP = 10
+ITERATIONS = 10_000
+
+# Warm-up
+for _ in range(WARMUP):
+ pd.get_option("display.max_rows")
+ pd.set_option("display.max_rows", 50)
+ pd.reset_option("display.max_rows")
+ pd.get_option("display.precision")
+ pd.set_option("display.precision", 3)
+ pd.reset_option("display.precision")
+
+start = time.perf_counter()
+for i in range(ITERATIONS):
+ pd.get_option("display.max_rows")
+ pd.set_option("display.max_rows", (i % 90) + 10)
+ pd.reset_option("display.max_rows")
+ pd.get_option("display.precision")
+ pd.set_option("display.precision", (i % 8) + 2)
+ pd.reset_option("display.precision")
+total_ms = (time.perf_counter() - start) * 1000
+
+print(
+ json.dumps(
+ {
+ "function": "get_set_option",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+ }
+ )
+)
diff --git a/benchmarks/pandas/bench_join_all.py b/benchmarks/pandas/bench_join_all.py
new file mode 100644
index 00000000..040aa028
--- /dev/null
+++ b/benchmarks/pandas/bench_join_all.py
@@ -0,0 +1,30 @@
+"""Benchmark: join_all — sequential left-join of 4 DataFrames each with 5k rows"""
+import json
+import time
+import pandas as pd
+
+N = 5_000
+WARMUP = 3
+ITERATIONS = 10
+
+idx = [str(i) for i in range(N)]
+
+base = pd.DataFrame({"a": list(range(N))}, index=idx)
+df1 = pd.DataFrame({"b": [i * 2 for i in range(N)]}, index=idx)
+df2 = pd.DataFrame({"c": [i * 3 for i in range(N)]}, index=idx)
+df3 = pd.DataFrame({"d": [i * 4 for i in range(N)]}, index=idx)
+
+for _ in range(WARMUP):
+ base.join([df1, df2, df3])
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ base.join([df1, df2, df3])
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "join_all",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_math_ops.py b/benchmarks/pandas/bench_math_ops.py
new file mode 100644
index 00000000..1159ec02
--- /dev/null
+++ b/benchmarks/pandas/bench_math_ops.py
@@ -0,0 +1,35 @@
+"""Benchmark: math_ops — abs / round on Series and DataFrame of 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+s = pd.Series(np.where(np.arange(SIZE) % 2 == 0, -(np.arange(SIZE) + 0.567), np.arange(SIZE) + 0.567))
+df = pd.DataFrame({
+ "a": -(np.arange(SIZE) + 0.123),
+ "b": np.arange(SIZE) + 0.456,
+})
+
+for _ in range(WARMUP):
+ s.abs()
+ df.abs()
+ s.round(1)
+ df.round(1)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.abs()
+ df.abs()
+ s.round(1)
+ df.round(1)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "math_ops",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_merge_asof.py b/benchmarks/pandas/bench_merge_asof.py
new file mode 100644
index 00000000..5517d2f8
--- /dev/null
+++ b/benchmarks/pandas/bench_merge_asof.py
@@ -0,0 +1,34 @@
+"""Benchmark: merge_asof — backward asof join of two 10k-row sorted DataFrames"""
+import json
+import time
+import pandas as pd
+
+N = 10_000
+WARMUP = 3
+ITERATIONS = 10
+
+# Trades sorted by time: 0, 2, 4, ...
+trade_times = list(range(0, N * 2, 2))
+prices = [100.0 + i * 0.5 for i in range(N)]
+
+# Quotes sorted by time, sparser: 0, 3, 6, ...
+quote_times = list(range(0, N * 3, 3))
+bids = [99.0 + i * 0.5 for i in range(N)]
+
+trades = pd.DataFrame({"time": trade_times, "price": prices})
+quotes = pd.DataFrame({"time": quote_times, "bid": bids})
+
+for _ in range(WARMUP):
+ pd.merge_asof(trades, quotes, on="time")
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ pd.merge_asof(trades, quotes, on="time")
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "merge_asof",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_na_ops.py b/benchmarks/pandas/bench_na_ops.py
new file mode 100644
index 00000000..b7d0adf0
--- /dev/null
+++ b/benchmarks/pandas/bench_na_ops.py
@@ -0,0 +1,42 @@
+"""Benchmark: na_ops — isna / notna / ffill / bfill on 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+data = pd.array([i if i % 5 != 0 else pd.NA for i in range(SIZE)], dtype="Int64")
+s = pd.Series(data, dtype="float64")
+s[np.arange(SIZE) % 5 == 0] = np.nan
+
+df = pd.DataFrame({
+ "a": s,
+ "b": pd.Series([float(i * 2) if i % 7 != 0 else np.nan for i in range(SIZE)]),
+})
+
+for _ in range(WARMUP):
+ pd.isna(s)
+ pd.notna(s)
+ s.ffill()
+ s.bfill()
+ df.ffill()
+ df.bfill()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ pd.isna(s)
+ pd.notna(s)
+ s.ffill()
+ s.bfill()
+ df.ffill()
+ df.bfill()
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "na_ops",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_notna_boolean.py b/benchmarks/pandas/bench_notna_boolean.py
new file mode 100644
index 00000000..96c0a59d
--- /dev/null
+++ b/benchmarks/pandas/bench_notna_boolean.py
@@ -0,0 +1,36 @@
+"""Benchmark: notna_boolean — boolean-mask indexing on 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+s = pd.Series(np.arange(SIZE))
+mask = pd.Series(np.arange(SIZE) % 2 == 0)
+bool_arr = np.arange(SIZE) % 3 != 0
+
+df = pd.DataFrame({
+ "a": np.arange(SIZE),
+ "b": np.arange(SIZE) * 2,
+})
+
+for _ in range(WARMUP):
+ s[mask]
+ s[~mask]
+ df[bool_arr]
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s[mask]
+ s[~mask]
+ df[bool_arr]
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "notna_boolean",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_numeric_ops_log2_exp.py b/benchmarks/pandas/bench_numeric_ops_log2_exp.py
new file mode 100644
index 00000000..89208443
--- /dev/null
+++ b/benchmarks/pandas/bench_numeric_ops_log2_exp.py
@@ -0,0 +1,52 @@
+"""
+Benchmark: np.log2, np.log10, np.exp, np.sign applied to pandas Series and DataFrame.
+
+Mirrors tsb seriesLog2, seriesLog10, seriesExp, seriesSign and their DataFrame variants.
+Uses 100k-row data to match the TypeScript benchmark.
+"""
+import json
+import time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 30
+
+# Positive values for log2/log10; any values for exp/sign
+data = [(i + 1) * 0.1 for i in range(SIZE)]
+s = pd.Series(data, dtype=float)
+df = pd.DataFrame({
+ "a": [(i + 1) * 0.1 for i in range(SIZE)],
+ "b": [(i + 1) * 0.2 for i in range(SIZE)],
+})
+
+# Warm-up
+for _ in range(WARMUP):
+ np.log2(s)
+ np.log10(s)
+ np.exp(s)
+ np.sign(s)
+ np.log2(df)
+ np.log10(df)
+ np.exp(df)
+ np.sign(df)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ np.log2(s)
+ np.log10(s)
+ np.exp(s)
+ np.sign(s)
+ np.log2(df)
+ np.log10(df)
+ np.exp(df)
+ np.sign(df)
+total_ms = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "numeric_ops_log2_exp",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+}))
diff --git a/benchmarks/pandas/bench_pow_mod.py b/benchmarks/pandas/bench_pow_mod.py
new file mode 100644
index 00000000..3458eb26
--- /dev/null
+++ b/benchmarks/pandas/bench_pow_mod.py
@@ -0,0 +1,34 @@
+"""Benchmark: Series.pow, Series.mod, DataFrame.pow on 100k rows"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+data = (np.arange(ROWS) % 100) + 1
+s = pd.Series(data.astype(float))
+df = pd.DataFrame({
+ "a": ((np.arange(ROWS) % 100) + 1).astype(float),
+ "b": ((np.arange(ROWS) % 50) + 1).astype(float),
+})
+
+for _ in range(WARMUP):
+ s.pow(2)
+ s.mod(7)
+ df.pow(2)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.pow(2)
+ s.mod(7)
+ df.pow(2)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "pow_mod",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_read_html.py b/benchmarks/pandas/bench_read_html.py
new file mode 100644
index 00000000..03dd0199
--- /dev/null
+++ b/benchmarks/pandas/bench_read_html.py
@@ -0,0 +1,52 @@
+"""
+Benchmark: pd.read_html — parse HTML tables into DataFrames.
+Outputs JSON: {"function": "read_html", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+"""
+import json
+import time
+import math
+
+try:
+ import pandas as pd
+except ImportError:
+ import subprocess, sys
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "pandas", "--quiet"])
+ import pandas as pd
+
+try:
+ import lxml # noqa: F401
+except ImportError:
+ import subprocess, sys
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "lxml", "--quiet"])
+
+ROWS = 1_000
+WARMUP = 3
+ITERATIONS = 20
+
+
+def build_html(rows: int) -> str:
+ header = "
| id | name | value | score |
"
+ body_rows = [
+ f"| {i} | item_{i % 100} | {i * 1.5:.2f} | {math.sin(i * 0.01):.6f} |
"
+ for i in range(rows)
+ ]
+ return f"{header}{''.join(body_rows)}
"
+
+
+html = build_html(ROWS)
+
+# Warm-up
+for _ in range(WARMUP):
+ pd.read_html(html)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ pd.read_html(html)
+total_ms = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "read_html",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+}))
diff --git a/benchmarks/pandas/bench_reduce_ops.py b/benchmarks/pandas/bench_reduce_ops.py
new file mode 100644
index 00000000..2be36963
--- /dev/null
+++ b/benchmarks/pandas/bench_reduce_ops.py
@@ -0,0 +1,37 @@
+"""Benchmark: reduce_ops — nunique / any / all on Series and DataFrame of 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+s = pd.Series(np.arange(SIZE) % 1000)
+bool_s = pd.Series(np.arange(SIZE) > 0)
+df = pd.DataFrame({
+ "a": np.arange(SIZE) % 500,
+ "b": np.arange(SIZE) % 200,
+ "c": np.arange(SIZE) % 100,
+})
+
+for _ in range(WARMUP):
+ s.nunique()
+ bool_s.any()
+ bool_s.all()
+ df.nunique()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.nunique()
+ bool_s.any()
+ bool_s.all()
+ df.nunique()
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "reduce_ops",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_rename_ops.py b/benchmarks/pandas/bench_rename_ops.py
new file mode 100644
index 00000000..897f520b
--- /dev/null
+++ b/benchmarks/pandas/bench_rename_ops.py
@@ -0,0 +1,36 @@
+"""Benchmark: rename_ops — rename / add_prefix / add_suffix on Series/DataFrame of 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+s = pd.Series(np.arange(SIZE), index=[f"row_{i}" for i in range(SIZE)])
+df = pd.DataFrame({
+ "col_a": np.arange(SIZE),
+ "col_b": np.arange(SIZE) * 2,
+ "col_c": np.arange(SIZE) * 3,
+})
+
+for _ in range(WARMUP):
+ s.rename(lambda lbl: f"new_{lbl}")
+ df.rename(columns={"col_a": "a", "col_b": "b"})
+ df.add_prefix("pre_")
+ df.add_suffix("_suf")
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.rename(lambda lbl: f"new_{lbl}")
+ df.rename(columns={"col_a": "a", "col_b": "b"})
+ df.add_prefix("pre_")
+ df.add_suffix("_suf")
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "rename_ops",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_series_compare_pair.py b/benchmarks/pandas/bench_series_compare_pair.py
new file mode 100644
index 00000000..dbbb2043
--- /dev/null
+++ b/benchmarks/pandas/bench_series_compare_pair.py
@@ -0,0 +1,39 @@
+"""
+Benchmark: pandas Series-to-Series comparison operations.
+
+Mirrors tsb seriesNe(a, b), seriesGt(a, b), seriesLe(a, b), seriesEq(a, b).
+The existing compare benchmark tests scalar comparison; this tests Series-to-Series.
+Uses 100k-element Series to match the TypeScript benchmark.
+"""
+import json
+import time
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 100
+
+a = pd.Series([(i * 1.7) % 1000 for i in range(SIZE)], dtype=float)
+b = pd.Series([(i * 2.3) % 1000 for i in range(SIZE)], dtype=float)
+
+# Warm-up
+for _ in range(WARMUP):
+ a.ne(b)
+ a.gt(b)
+ a.le(b)
+ a.eq(b)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ a.ne(b)
+ a.gt(b)
+ a.le(b)
+ a.eq(b)
+total_ms = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "series_compare_pair",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+}))
diff --git a/benchmarks/pandas/bench_series_format_table.py b/benchmarks/pandas/bench_series_format_table.py
new file mode 100644
index 00000000..48abadd1
--- /dev/null
+++ b/benchmarks/pandas/bench_series_format_table.py
@@ -0,0 +1,42 @@
+"""
+Benchmark: pandas Series.to_markdown() and Series.to_latex() on a 500-element Series.
+
+Mirrors the tsb seriesToMarkdown and seriesToLaTeX benchmark.
+Exercises table-rendering of both numeric and string series.
+"""
+import json
+import time
+import math
+import pandas as pd
+
+N = 500
+WARMUP = 3
+ITERATIONS = 30
+
+num_data = [math.sin(i * 0.05) * 100 for i in range(N)]
+str_data = [None if i % 10 == 0 else f"item_{i}" for i in range(N)]
+
+num_series = pd.Series(num_data)
+str_series = pd.Series(str_data)
+
+# Warm-up
+for _ in range(WARMUP):
+ num_series.to_markdown()
+ num_series.to_latex()
+ str_series.to_markdown()
+ str_series.to_latex()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ num_series.to_markdown()
+ num_series.to_latex()
+ str_series.to_markdown()
+ str_series.to_latex()
+total_ms = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "series_format_table",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+}))
diff --git a/benchmarks/pandas/bench_shift_diff.py b/benchmarks/pandas/bench_shift_diff.py
new file mode 100644
index 00000000..878d05c6
--- /dev/null
+++ b/benchmarks/pandas/bench_shift_diff.py
@@ -0,0 +1,28 @@
+"""Benchmark: Series.shift and Series.diff on 100k-element Series"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+data = np.arange(ROWS, dtype=float) * 1.5
+s = pd.Series(data)
+
+for _ in range(WARMUP):
+ s.shift(1)
+ s.diff(1)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.shift(1)
+ s.diff(1)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "shift_diff",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_sort_ops.py b/benchmarks/pandas/bench_sort_ops.py
new file mode 100644
index 00000000..929558f3
--- /dev/null
+++ b/benchmarks/pandas/bench_sort_ops.py
@@ -0,0 +1,32 @@
+"""Benchmark: Series.sort_values and DataFrame.sort_values on 100k rows"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+data = np.sin(np.arange(ROWS, dtype=float)) * 1000
+s = pd.Series(data)
+df = pd.DataFrame({
+ "a": np.sin(np.arange(ROWS, dtype=float)) * 1000,
+ "b": np.cos(np.arange(ROWS, dtype=float)) * 500,
+})
+
+for _ in range(WARMUP):
+ s.sort_values()
+ df.sort_values("a")
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.sort_values()
+ df.sort_values("a")
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "sort_ops",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_str_findall_expand.py b/benchmarks/pandas/bench_str_findall_expand.py
new file mode 100644
index 00000000..54bf92fb
--- /dev/null
+++ b/benchmarks/pandas/bench_str_findall_expand.py
@@ -0,0 +1,36 @@
+"""
+Benchmark: pandas Series.str.extract() with named capture groups on a 5k-element Series.
+
+Mirrors the tsb strFindallExpand benchmark.
+Each string has the form "userN scoreM levelL" and the regex extracts
+named groups: word, num, score, level.
+"""
+import json
+import time
+import pandas as pd
+
+N = 5_000
+WARMUP = 3
+ITERATIONS = 20
+
+data = [None if i % 20 == 0 else f"user{i} score{(i * 7) % 100} level{(i % 5) + 1}" for i in range(N)]
+s = pd.Series(data, dtype="object")
+
+# Named capture-group pattern matching the TypeScript version
+pat = r"(?P[a-z]+)(?P\d+)\s+score(?P\d+)\s+level(?P\d+)"
+
+# Warm-up
+for _ in range(WARMUP):
+ s.str.extract(pat)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.str.extract(pat)
+total_ms = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "str_findall_expand",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+}))
diff --git a/benchmarks/pandas/bench_to_json_denormalize.py b/benchmarks/pandas/bench_to_json_denormalize.py
new file mode 100644
index 00000000..ae51decf
--- /dev/null
+++ b/benchmarks/pandas/bench_to_json_denormalize.py
@@ -0,0 +1,41 @@
+"""Benchmark: to_json_denormalize — json orient variants on 10k-row DataFrame."""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 10_000
+WARMUP = 5
+ITERATIONS = 30
+
+# DataFrame matching the tsb benchmark (nested-structure-like columns)
+df = pd.DataFrame({
+ "name": [f"user_{i}" for i in range(ROWS)],
+ "address.city": [f"city_{i % 100}" for i in range(ROWS)],
+ "address.zip": [str(10000 + (i % 9000)) for i in range(ROWS)],
+ "score": np.arange(ROWS) * 0.01,
+})
+
+for _ in range(WARMUP):
+ # pandas equivalent of toJsonDenormalize: to_dict("records") then reconstruct nesting
+ recs = df.to_dict("records")
+ # pandas equivalent of toJsonRecords: orient="records"
+ df.to_json(orient="records")
+ # pandas equivalent of toJsonSplit: orient="split"
+ df.to_json(orient="split")
+ # pandas equivalent of toJsonIndex: orient="index"
+ df.to_json(orient="index")
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ recs = df.to_dict("records")
+ df.to_json(orient="records")
+ df.to_json(orient="split")
+ df.to_json(orient="index")
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "to_json_denormalize",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_truncate_df.py b/benchmarks/pandas/bench_truncate_df.py
new file mode 100644
index 00000000..4f8b0c2a
--- /dev/null
+++ b/benchmarks/pandas/bench_truncate_df.py
@@ -0,0 +1,31 @@
+"""Benchmark: DataFrame.truncate — slice rows by before/after on 100k-row DataFrame"""
+import json
+import time
+import pandas as pd
+import numpy as np
+
+N = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+index = list(range(N))
+df = pd.DataFrame({
+ "a": np.arange(N, dtype=float),
+ "b": np.arange(N, dtype=float) * 2,
+ "c": np.arange(N, dtype=float) * 3,
+}, index=index)
+
+for _ in range(WARMUP):
+ df.truncate(before=10_000, after=90_000)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ df.truncate(before=10_000, after=90_000)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "truncate_df",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_value_counts_full.py b/benchmarks/pandas/bench_value_counts_full.py
new file mode 100644
index 00000000..284bb8ed
--- /dev/null
+++ b/benchmarks/pandas/bench_value_counts_full.py
@@ -0,0 +1,28 @@
+"""Benchmark: value_counts_full — value_counts(bins=N) on Series of 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+rng = np.random.default_rng(42)
+s = pd.Series(rng.random(SIZE) * 100)
+
+for _ in range(WARMUP):
+ s.value_counts(bins=10)
+ s.value_counts(bins=20)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.value_counts(bins=10)
+ s.value_counts(bins=20)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "value_counts_full",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_window_extended.py b/benchmarks/pandas/bench_window_extended.py
new file mode 100644
index 00000000..ddafc28a
--- /dev/null
+++ b/benchmarks/pandas/bench_window_extended.py
@@ -0,0 +1,32 @@
+"""Benchmark: window_extended — rolling sem/skew/kurt/quantile on 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 3
+ITERATIONS = 20
+WINDOW = 10
+
+s = pd.Series(np.sin(np.arange(SIZE) / 100) * 100 + np.arange(SIZE) * 0.001)
+
+for _ in range(WARMUP):
+ s.rolling(WINDOW).sem()
+ s.rolling(WINDOW).skew()
+ s.rolling(WINDOW).kurt()
+ s.rolling(WINDOW).quantile(0.5)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.rolling(WINDOW).sem()
+ s.rolling(WINDOW).skew()
+ s.rolling(WINDOW).kurt()
+ s.rolling(WINDOW).quantile(0.5)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "window_extended",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_xs_series.py b/benchmarks/pandas/bench_xs_series.py
new file mode 100644
index 00000000..41dab0aa
--- /dev/null
+++ b/benchmarks/pandas/bench_xs_series.py
@@ -0,0 +1,55 @@
+"""
+Benchmark: Series.xs() — cross-section lookup on Series.
+
+Mirrors tsb xsSeries.
+Tests flat-index lookup (returns scalar) and MultiIndex lookup (returns sub-Series).
+Outputs JSON: {"function": "xs_series", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+"""
+
+import json
+import time
+
+import pandas as pd
+
+N = 1_000
+WARMUP = 10
+ITERATIONS = 5_000
+
+# Flat-index Series: each key appears once → xs returns a scalar.
+flat_series = pd.Series(
+ [i * 1.5 for i in range(N)],
+ index=[f"k{i}" for i in range(N)],
+ name="flat",
+)
+
+# MultiIndex Series: 10 outer keys × 100 inner keys → xs returns a sub-Series (100 rows).
+outer_keys = [f"g{i // 100}" for i in range(N)]
+inner_keys = [i % 100 for i in range(N)]
+multi_index = pd.MultiIndex.from_arrays([outer_keys, inner_keys], names=["outer", "inner"])
+multi_series = pd.Series(
+ [i * 2.0 for i in range(N)],
+ index=multi_index,
+ name="multi",
+)
+
+# Warm-up
+for i in range(WARMUP):
+ flat_series.xs(f"k{i % N}")
+ multi_series.xs(f"g{i % 10}")
+
+start = time.perf_counter()
+for i in range(ITERATIONS):
+ flat_series.xs(f"k{i % N}")
+ multi_series.xs(f"g{i % 10}")
+total_ms = (time.perf_counter() - start) * 1000
+
+print(
+ json.dumps(
+ {
+ "function": "xs_series",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+ }
+ )
+)
diff --git a/benchmarks/tsb/bench_at_iat.ts b/benchmarks/tsb/bench_at_iat.ts
new file mode 100644
index 00000000..ed33ba07
--- /dev/null
+++ b/benchmarks/tsb/bench_at_iat.ts
@@ -0,0 +1,45 @@
+/**
+ * Benchmark: seriesAt, seriesIat, dataFrameAt, dataFrameIat — fast scalar access
+ * Outputs JSON: {"function": "at_iat", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, DataFrame, seriesAt, seriesIat, dataFrameAt, dataFrameIat } from "../../src/index.ts";
+
+const N = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const labels = Array.from({ length: N }, (_, i) => `r${i}`);
+const values = Array.from({ length: N }, (_, i) => i * 1.5);
+
+const s = new Series({ data: values, index: labels });
+const df = DataFrame.fromColumns(
+ { a: values, b: values.map((v) => v * 2) },
+ { index: labels },
+);
+
+const midLabel = `r${Math.floor(N / 2)}`;
+
+for (let i = 0; i < WARMUP; i++) {
+ seriesAt(s, midLabel);
+ seriesIat(s, N / 2);
+ dataFrameAt(df, midLabel, "a");
+ dataFrameIat(df, N / 2, 0);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ seriesAt(s, midLabel);
+ seriesIat(s, N / 2);
+ dataFrameAt(df, midLabel, "a");
+ dataFrameIat(df, N / 2, 0);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "at_iat",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_convert_dtypes.ts b/benchmarks/tsb/bench_convert_dtypes.ts
new file mode 100644
index 00000000..2ba7f4d3
--- /dev/null
+++ b/benchmarks/tsb/bench_convert_dtypes.ts
@@ -0,0 +1,53 @@
+/**
+ * Benchmark: convertDtypesSeries and convertDtypesDataFrame
+ *
+ * Mirrors pandas Series.convert_dtypes() and DataFrame.convert_dtypes().
+ * Creates a 50k-row dataset with object-typed numeric, boolean, and string
+ * columns, then measures how fast tsb can infer and convert to best dtypes.
+ */
+import { Series, DataFrame, convertDtypesSeries, convertDtypesDataFrame } from "../../src/index.ts";
+import type { Scalar } from "../../src/types.ts";
+
+const N = 50_000;
+const WARMUP = 3;
+const ITERATIONS = 20;
+
+// Object-dtype series: integers stored as Scalars (no typed array)
+const intData: Scalar[] = Array.from({ length: N }, (_, i) => (i % 17 === 0 ? null : i));
+const floatData: Scalar[] = Array.from({ length: N }, (_, i) => (i % 13 === 0 ? null : i * 1.5));
+const strData: Scalar[] = Array.from({ length: N }, (_, i) => (i % 11 === 0 ? null : `str_${i}`));
+const boolData: Scalar[] = Array.from({ length: N }, (_, i) => (i % 7 === 0 ? null : i % 2 === 0));
+
+const intSeries = new Series({ data: intData });
+const floatSeries = new Series({ data: floatData });
+
+const df = DataFrame.fromColumns({
+ int_col: intData,
+ float_col: floatData,
+ str_col: strData,
+ bool_col: boolData,
+});
+
+// Warm-up
+for (let i = 0; i < WARMUP; i++) {
+ convertDtypesSeries(intSeries);
+ convertDtypesSeries(floatSeries);
+ convertDtypesDataFrame(df);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ convertDtypesSeries(intSeries);
+ convertDtypesSeries(floatSeries);
+ convertDtypesDataFrame(df);
+}
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "convert_dtypes",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms,
+ }),
+);
diff --git a/benchmarks/tsb/bench_cross_join.ts b/benchmarks/tsb/bench_cross_join.ts
new file mode 100644
index 00000000..0bdf02fb
--- /dev/null
+++ b/benchmarks/tsb/bench_cross_join.ts
@@ -0,0 +1,38 @@
+/**
+ * Benchmark: crossJoin — Cartesian product of two 300-row DataFrames (90k result rows).
+ * Outputs JSON: {"function": "cross_join", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { DataFrame, crossJoin } from "../../src/index.ts";
+
+const N = 300;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+// Distinct column names so no suffix needed
+const left = DataFrame.fromColumns({
+ id_a: Array.from({ length: N }, (_, i) => i),
+ val_a: Array.from({ length: N }, (_, i) => i * 1.5),
+});
+const right = DataFrame.fromColumns({
+ id_b: Array.from({ length: N }, (_, i) => i),
+ val_b: Array.from({ length: N }, (_, i) => i * 2.5),
+});
+
+for (let i = 0; i < WARMUP; i++) {
+ crossJoin(left, right);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ crossJoin(left, right);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "cross_join",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_cut_bins_to_frame.ts b/benchmarks/tsb/bench_cut_bins_to_frame.ts
new file mode 100644
index 00000000..135fcd91
--- /dev/null
+++ b/benchmarks/tsb/bench_cut_bins_to_frame.ts
@@ -0,0 +1,36 @@
+/**
+ * Benchmark: cut_bins_to_frame — cutBinsToFrame / cutBinCounts / binEdges on 100k data points.
+ * Outputs JSON: {"function": "cut_bins_to_frame", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { cut, cutBinsToFrame, cutBinCounts, binEdges } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const NUM_BINS = 20;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+const data = Array.from({ length: SIZE }, (_, i) => (i % 1000) * 0.1);
+const binResult = cut(data, NUM_BINS);
+
+for (let i = 0; i < WARMUP; i++) {
+ cutBinsToFrame(binResult, { data });
+ cutBinCounts(binResult);
+ binEdges(binResult);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ cutBinsToFrame(binResult, { data });
+ cutBinCounts(binResult);
+ binEdges(binResult);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "cut_bins_to_frame",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_dataframe_transform_named.ts b/benchmarks/tsb/bench_dataframe_transform_named.ts
new file mode 100644
index 00000000..d45ab0f1
--- /dev/null
+++ b/benchmarks/tsb/bench_dataframe_transform_named.ts
@@ -0,0 +1,43 @@
+/**
+ * Benchmark: dataFrameTransform with named aggregation strings.
+ *
+ * Mirrors pandas DataFrame.transform(["sum", "mean", "cumsum"]) which applies
+ * multiple aggregation functions per column. Tests the string-name form of
+ * dataFrameTransform from stats/transform_agg.ts.
+ *
+ * Outputs JSON: {"function": "dataframe_transform_named", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { DataFrame, dataFrameTransform } from "../../src/index.ts";
+
+const ROWS = 10_000;
+const WARMUP = 3;
+const ITERATIONS = 20;
+
+const a = Array.from({ length: ROWS }, (_, i) => (i % 100) * 1.5 + 1);
+const b = Array.from({ length: ROWS }, (_, i) => ((i * 3) % 200) * 0.5 + 2);
+const c = Array.from({ length: ROWS }, (_, i) => ((i * 7) % 50) * 2.0 + 0.5);
+const df = DataFrame.fromColumns({ a, b, c });
+
+// Warm-up: single-string transform and array-of-strings transform
+for (let i = 0; i < WARMUP; i++) {
+ dataFrameTransform(df, "mean");
+ dataFrameTransform(df, "cumsum");
+ dataFrameTransform(df, ["sum", "mean"] as const);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ dataFrameTransform(df, "mean");
+ dataFrameTransform(df, "cumsum");
+ dataFrameTransform(df, ["sum", "mean"] as const);
+}
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "dataframe_transform_named",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms,
+ }),
+);
diff --git a/benchmarks/tsb/bench_dataframe_update.ts b/benchmarks/tsb/bench_dataframe_update.ts
new file mode 100644
index 00000000..eaacbe9d
--- /dev/null
+++ b/benchmarks/tsb/bench_dataframe_update.ts
@@ -0,0 +1,46 @@
+/**
+ * Benchmark: dataFrameUpdate — in-place-style DataFrame value update.
+ *
+ * Mirrors pandas `DataFrame.update()`.
+ * Overwrites non-null values from `other` into `self`.
+ * Outputs JSON: {"function": "dataframe_update", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, DataFrame, dataFrameUpdate } from "../../src/index.ts";
+
+const N = 10_000;
+const WARMUP = 20;
+const ITERATIONS = 200;
+
+// Build two DataFrames; `other` has null in ~2/3 of rows (so 1/3 rows are updated).
+const aData = Array.from({ length: N }, (_, i) => i * 1.0);
+const bData = Array.from({ length: N }, (_, i) => i * 2.0);
+
+const aOther = Array.from({ length: N }, (_, i) =>
+ i % 3 === 0 ? i * 10.0 : (null as unknown as number),
+);
+const bOther = Array.from({ length: N }, (_, i) =>
+ i % 3 === 0 ? i * 20.0 : (null as unknown as number),
+);
+
+const df = new DataFrame({ a: aData, b: bData });
+const other = new DataFrame({ a: aOther, b: bOther });
+
+// Warm-up
+for (let i = 0; i < WARMUP; i++) {
+ dataFrameUpdate(df, other);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ dataFrameUpdate(df, other);
+}
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "dataframe_update",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total_ms,
+ }),
+);
diff --git a/benchmarks/tsb/bench_filter_series.ts b/benchmarks/tsb/bench_filter_series.ts
new file mode 100644
index 00000000..d1bdef87
--- /dev/null
+++ b/benchmarks/tsb/bench_filter_series.ts
@@ -0,0 +1,38 @@
+/**
+ * Benchmark: filterSeries — filter Series index labels by items/like/regex
+ * Outputs JSON: {"function": "filter_series", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, filterSeries } from "../../src/index.ts";
+
+const N = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+// Series with string labels: "label_0", "label_1", ..., "label_N-1"
+const labels = Array.from({ length: N }, (_, i) => `label_${i}`);
+const values = Array.from({ length: N }, (_, i) => i * 0.5);
+const s = new Series({ data: values, index: labels });
+
+// Pre-build a set of 1000 items to keep
+const keepItems = Array.from({ length: 1_000 }, (_, i) => `label_${i * 100}`);
+
+for (let i = 0; i < WARMUP; i++) {
+ filterSeries(s, { items: keepItems });
+ filterSeries(s, { like: "label_5" });
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ filterSeries(s, { items: keepItems });
+ filterSeries(s, { like: "label_5" });
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "filter_series",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_get_set_option.ts b/benchmarks/tsb/bench_get_set_option.ts
new file mode 100644
index 00000000..c9c4d07a
--- /dev/null
+++ b/benchmarks/tsb/bench_get_set_option.ts
@@ -0,0 +1,40 @@
+/**
+ * Benchmark: getOption / setOption / resetOption — pandas options API.
+ *
+ * Mirrors pandas `pd.get_option`, `pd.set_option`, `pd.reset_option`.
+ * Outputs JSON: {"function": "get_set_option", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { getOption, setOption, resetOption } from "../../src/index.ts";
+
+const WARMUP = 10;
+const ITERATIONS = 10_000;
+
+// Warm-up
+for (let i = 0; i < WARMUP; i++) {
+ getOption("display.max_rows");
+ setOption("display.max_rows", 50);
+ resetOption("display.max_rows");
+ getOption("display.precision");
+ setOption("display.precision", 3);
+ resetOption("display.precision");
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ getOption("display.max_rows");
+ setOption("display.max_rows", (i % 90) + 10);
+ resetOption("display.max_rows");
+ getOption("display.precision");
+ setOption("display.precision", (i % 8) + 2);
+ resetOption("display.precision");
+}
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "get_set_option",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total_ms,
+ }),
+);
diff --git a/benchmarks/tsb/bench_join_all.ts b/benchmarks/tsb/bench_join_all.ts
new file mode 100644
index 00000000..2dfb3358
--- /dev/null
+++ b/benchmarks/tsb/bench_join_all.ts
@@ -0,0 +1,36 @@
+/**
+ * Benchmark: joinAll — sequential left-join of 4 DataFrames each with 5k rows.
+ * Outputs JSON: {"function": "join_all", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { DataFrame, joinAll } from "../../src/index.ts";
+
+const N = 5_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const idx = Array.from({ length: N }, (_, i) => String(i));
+
+// Base DataFrame and three others — distinct column names, shared index
+const base = DataFrame.fromColumns({ a: Array.from({ length: N }, (_, i) => i) }, { index: idx });
+const df1 = DataFrame.fromColumns({ b: Array.from({ length: N }, (_, i) => i * 2) }, { index: idx });
+const df2 = DataFrame.fromColumns({ c: Array.from({ length: N }, (_, i) => i * 3) }, { index: idx });
+const df3 = DataFrame.fromColumns({ d: Array.from({ length: N }, (_, i) => i * 4) }, { index: idx });
+
+for (let i = 0; i < WARMUP; i++) {
+ joinAll(base, [df1, df2, df3]);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ joinAll(base, [df1, df2, df3]);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "join_all",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_math_ops.ts b/benchmarks/tsb/bench_math_ops.ts
new file mode 100644
index 00000000..5559bde5
--- /dev/null
+++ b/benchmarks/tsb/bench_math_ops.ts
@@ -0,0 +1,40 @@
+/**
+ * Benchmark: math_ops — absSeries / absDataFrame / roundSeries / roundDataFrame on 100k rows.
+ * Outputs JSON: {"function": "math_ops", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, DataFrame, absSeries, absDataFrame, roundSeries, roundDataFrame } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => (i % 2 === 0 ? -(i + 0.567) : i + 0.567)) });
+const df = DataFrame.fromColumns({
+ a: Array.from({ length: SIZE }, (_, i) => -(i + 0.123)),
+ b: Array.from({ length: SIZE }, (_, i) => i + 0.456),
+});
+
+for (let i = 0; i < WARMUP; i++) {
+ absSeries(s);
+ absDataFrame(df);
+ roundSeries(s, 1);
+ roundDataFrame(df, 1);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ absSeries(s);
+ absDataFrame(df);
+ roundSeries(s, 1);
+ roundDataFrame(df, 1);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "math_ops",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_merge_asof.ts b/benchmarks/tsb/bench_merge_asof.ts
new file mode 100644
index 00000000..9ef2a2b8
--- /dev/null
+++ b/benchmarks/tsb/bench_merge_asof.ts
@@ -0,0 +1,39 @@
+/**
+ * Benchmark: mergeAsof — backward asof join of two 10k-row sorted DataFrames.
+ * Outputs JSON: {"function": "merge_asof", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { DataFrame, mergeAsof } from "../../src/index.ts";
+
+const N = 10_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+// Trades sorted by time: 0, 2, 4, ...
+const tradeTimes = Array.from({ length: N }, (_, i) => i * 2);
+const prices = Array.from({ length: N }, (_, i) => 100.0 + i * 0.5);
+
+// Quotes sorted by time, sparser: 0, 3, 6, ...
+const quoteTimes = Array.from({ length: N }, (_, i) => i * 3);
+const bids = Array.from({ length: N }, (_, i) => 99.0 + i * 0.5);
+
+const trades = DataFrame.fromColumns({ time: tradeTimes, price: prices });
+const quotes = DataFrame.fromColumns({ time: quoteTimes, bid: bids });
+
+for (let i = 0; i < WARMUP; i++) {
+ mergeAsof(trades, quotes, { on: "time" });
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ mergeAsof(trades, quotes, { on: "time" });
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "merge_asof",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_na_ops.ts b/benchmarks/tsb/bench_na_ops.ts
new file mode 100644
index 00000000..31990d0c
--- /dev/null
+++ b/benchmarks/tsb/bench_na_ops.ts
@@ -0,0 +1,47 @@
+/**
+ * Benchmark: na_ops — isna / notna / ffillSeries / bfillSeries on 100k rows.
+ * Outputs JSON: {"function": "na_ops", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, DataFrame, isna, notna, ffillSeries, bfillSeries, dataFrameFfill, dataFrameBfill } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+const data: (number | null)[] = Array.from({ length: SIZE }, (_, i) =>
+ i % 5 === 0 ? null : i,
+);
+const s = new Series({ data });
+const df = DataFrame.fromColumns({
+ a: data,
+ b: Array.from({ length: SIZE }, (_, i) => (i % 7 === 0 ? null : i * 2)),
+});
+
+for (let i = 0; i < WARMUP; i++) {
+ isna(s);
+ notna(s);
+ ffillSeries(s);
+ bfillSeries(s);
+ dataFrameFfill(df);
+ dataFrameBfill(df);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ isna(s);
+ notna(s);
+ ffillSeries(s);
+ bfillSeries(s);
+ dataFrameFfill(df);
+ dataFrameBfill(df);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "na_ops",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_notna_boolean.ts b/benchmarks/tsb/bench_notna_boolean.ts
new file mode 100644
index 00000000..ecd113db
--- /dev/null
+++ b/benchmarks/tsb/bench_notna_boolean.ts
@@ -0,0 +1,41 @@
+/**
+ * Benchmark: notna_boolean — keepTrue / keepFalse / filterBy on 100k rows.
+ * Outputs JSON: {"function": "notna_boolean", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, DataFrame, keepTrue, keepFalse, filterBy } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => i) });
+const mask = new Series({ data: Array.from({ length: SIZE }, (_, i) => i % 2 === 0) });
+const boolArr = Array.from({ length: SIZE }, (_, i) => i % 3 !== 0);
+
+const df = DataFrame.fromColumns({
+ a: Array.from({ length: SIZE }, (_, i) => i),
+ b: Array.from({ length: SIZE }, (_, i) => i * 2),
+});
+
+for (let i = 0; i < WARMUP; i++) {
+ keepTrue(s, mask);
+ keepFalse(s, mask);
+ filterBy(df, boolArr);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ keepTrue(s, mask);
+ keepFalse(s, mask);
+ filterBy(df, boolArr);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "notna_boolean",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_numeric_ops_log2_exp.ts b/benchmarks/tsb/bench_numeric_ops_log2_exp.ts
new file mode 100644
index 00000000..b717b219
--- /dev/null
+++ b/benchmarks/tsb/bench_numeric_ops_log2_exp.ts
@@ -0,0 +1,66 @@
+/**
+ * Benchmark: seriesLog2 / seriesLog10 / seriesExp / seriesSign and DataFrame variants.
+ *
+ * Mirrors numpy/pandas element-wise math functions on 100k-row data:
+ * - np.log2(s), np.log10(s), np.exp(s), np.sign(s)
+ * - DataFrame.apply(np.log2), etc.
+ *
+ * Outputs JSON: {"function": "numeric_ops_log2_exp", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import {
+ Series,
+ DataFrame,
+ seriesLog2,
+ seriesLog10,
+ seriesExp,
+ seriesSign,
+ dataFrameLog2,
+ dataFrameLog10,
+ dataFrameExp,
+ dataFrameSign,
+} from "../../src/index.ts";
+
+const SIZE = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 30;
+
+// Positive values for log2/log10; any values for exp/sign
+const data = Array.from({ length: SIZE }, (_, i) => (i + 1) * 0.1);
+const s = new Series({ data });
+const df = DataFrame.fromColumns({
+ a: Array.from({ length: SIZE }, (_, i) => (i + 1) * 0.1),
+ b: Array.from({ length: SIZE }, (_, i) => (i + 1) * 0.2),
+});
+
+for (let i = 0; i < WARMUP; i++) {
+ seriesLog2(s);
+ seriesLog10(s);
+ seriesExp(s);
+ seriesSign(s);
+ dataFrameLog2(df);
+ dataFrameLog10(df);
+ dataFrameExp(df);
+ dataFrameSign(df);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ seriesLog2(s);
+ seriesLog10(s);
+ seriesExp(s);
+ seriesSign(s);
+ dataFrameLog2(df);
+ dataFrameLog10(df);
+ dataFrameExp(df);
+ dataFrameSign(df);
+}
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "numeric_ops_log2_exp",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms,
+ }),
+);
diff --git a/benchmarks/tsb/bench_pow_mod.ts b/benchmarks/tsb/bench_pow_mod.ts
new file mode 100644
index 00000000..1873099c
--- /dev/null
+++ b/benchmarks/tsb/bench_pow_mod.ts
@@ -0,0 +1,40 @@
+/**
+ * Benchmark: seriesPow, seriesMod, dataFramePow on 100k rows
+ */
+import { Series, DataFrame, seriesPow, seriesMod, dataFramePow } from "../../src/index.ts";
+
+const ROWS = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const data = Array.from({ length: ROWS }, (_, i) => (i % 100) + 1);
+const s = new Series({ data });
+
+const dfData = {
+ a: Array.from({ length: ROWS }, (_, i) => (i % 100) + 1),
+ b: Array.from({ length: ROWS }, (_, i) => (i % 50) + 1),
+};
+const df = new DataFrame(dfData);
+
+for (let i = 0; i < WARMUP; i++) {
+ seriesPow(s, 2);
+ seriesMod(s, 7);
+ dataFramePow(df, 2);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ seriesPow(s, 2);
+ seriesMod(s, 7);
+ dataFramePow(df, 2);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "pow_mod",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_read_html.ts b/benchmarks/tsb/bench_read_html.ts
new file mode 100644
index 00000000..3cbc7149
--- /dev/null
+++ b/benchmarks/tsb/bench_read_html.ts
@@ -0,0 +1,43 @@
+/**
+ * Benchmark: readHtml — parse HTML tables into DataFrames.
+ * Outputs JSON: {"function": "read_html", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { readHtml } from "../../src/index.js";
+
+const ROWS = 1_000;
+const WARMUP = 3;
+const ITERATIONS = 20;
+
+// Build a realistic HTML string with a 1000-row table.
+function buildHtml(rows: number): string {
+ const header = "| id | name | value | score |
";
+ const bodyRows: string[] = [];
+ for (let i = 0; i < rows; i++) {
+ bodyRows.push(
+ `| ${i} | item_${i % 100} | ${(i * 1.5).toFixed(2)} | ${Math.sin(i * 0.01).toFixed(6)} |
`,
+ );
+ }
+ return `${header}${bodyRows.join("")}
`;
+}
+
+const html = buildHtml(ROWS);
+
+// Warm-up
+for (let i = 0; i < WARMUP; i++) {
+ readHtml(html);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ readHtml(html);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "read_html",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_reduce_ops.ts b/benchmarks/tsb/bench_reduce_ops.ts
new file mode 100644
index 00000000..f2e524f7
--- /dev/null
+++ b/benchmarks/tsb/bench_reduce_ops.ts
@@ -0,0 +1,42 @@
+/**
+ * Benchmark: reduce_ops — nuniqueSeries / anySeries / allSeries / nunique(df) on 100k rows.
+ * Outputs JSON: {"function": "reduce_ops", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, DataFrame, nuniqueSeries, anySeries, allSeries, nunique } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => i % 1000) });
+const boolSeries = new Series({ data: Array.from({ length: SIZE }, (_, i) => i > 0) });
+const df = DataFrame.fromColumns({
+ a: Array.from({ length: SIZE }, (_, i) => i % 500),
+ b: Array.from({ length: SIZE }, (_, i) => i % 200),
+ c: Array.from({ length: SIZE }, (_, i) => i % 100),
+});
+
+for (let i = 0; i < WARMUP; i++) {
+ nuniqueSeries(s);
+ anySeries(boolSeries);
+ allSeries(boolSeries);
+ nunique(df);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ nuniqueSeries(s);
+ anySeries(boolSeries);
+ allSeries(boolSeries);
+ nunique(df);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "reduce_ops",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_rename_ops.ts b/benchmarks/tsb/bench_rename_ops.ts
new file mode 100644
index 00000000..9277e6e6
--- /dev/null
+++ b/benchmarks/tsb/bench_rename_ops.ts
@@ -0,0 +1,41 @@
+/**
+ * Benchmark: rename_ops — renameSeriesIndex / renameDataFrame / addPrefixDataFrame / addSuffixDataFrame on 100k rows.
+ * Outputs JSON: {"function": "rename_ops", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, DataFrame, renameSeriesIndex, renameDataFrame, addPrefixDataFrame, addSuffixDataFrame } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => i), index: Array.from({ length: SIZE }, (_, i) => `row_${i}`) });
+const df = DataFrame.fromColumns({
+ col_a: Array.from({ length: SIZE }, (_, i) => i),
+ col_b: Array.from({ length: SIZE }, (_, i) => i * 2),
+ col_c: Array.from({ length: SIZE }, (_, i) => i * 3),
+});
+
+for (let i = 0; i < WARMUP; i++) {
+ renameSeriesIndex(s, (lbl) => `new_${String(lbl)}`);
+ renameDataFrame(df, { columns: { col_a: "a", col_b: "b" } });
+ addPrefixDataFrame(df, "pre_");
+ addSuffixDataFrame(df, "_suf");
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ renameSeriesIndex(s, (lbl) => `new_${String(lbl)}`);
+ renameDataFrame(df, { columns: { col_a: "a", col_b: "b" } });
+ addPrefixDataFrame(df, "pre_");
+ addSuffixDataFrame(df, "_suf");
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "rename_ops",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_series_compare_pair.ts b/benchmarks/tsb/bench_series_compare_pair.ts
new file mode 100644
index 00000000..ddf56659
--- /dev/null
+++ b/benchmarks/tsb/bench_series_compare_pair.ts
@@ -0,0 +1,42 @@
+/**
+ * Benchmark: Series-to-Series comparison operations (seriesNe, seriesGt, seriesLe).
+ *
+ * The existing `compare` benchmark only tests scalar comparison (s.eq(500)).
+ * This benchmark tests element-wise comparison between two Series of 100k elements,
+ * mirroring pandas s1.ne(s2), s1.gt(s2), s1.le(s2).
+ *
+ * Outputs JSON: {"function": "series_compare_pair", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, seriesNe, seriesGt, seriesLe, seriesEq } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 100;
+
+const a = new Series({ data: Array.from({ length: SIZE }, (_, i) => (i * 1.7) % 1000) });
+const b = new Series({ data: Array.from({ length: SIZE }, (_, i) => (i * 2.3) % 1000) });
+
+for (let i = 0; i < WARMUP; i++) {
+ seriesNe(a, b);
+ seriesGt(a, b);
+ seriesLe(a, b);
+ seriesEq(a, b);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ seriesNe(a, b);
+ seriesGt(a, b);
+ seriesLe(a, b);
+ seriesEq(a, b);
+}
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "series_compare_pair",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms,
+ }),
+);
diff --git a/benchmarks/tsb/bench_series_format_table.ts b/benchmarks/tsb/bench_series_format_table.ts
new file mode 100644
index 00000000..11683ffc
--- /dev/null
+++ b/benchmarks/tsb/bench_series_format_table.ts
@@ -0,0 +1,44 @@
+/**
+ * Benchmark: seriesToMarkdown and seriesToLaTeX on a 500-element Series.
+ *
+ * Mirrors pandas Series.to_markdown() and Series.to_latex().
+ * Exercises table-rendering of both numeric and mixed-type series.
+ */
+import { Series, seriesToMarkdown, seriesToLaTeX } from "../../src/index.ts";
+import type { Scalar } from "../../src/types.ts";
+
+const N = 500;
+const WARMUP = 3;
+const ITERATIONS = 30;
+
+const numData: number[] = Array.from({ length: N }, (_, i) => Math.sin(i * 0.05) * 100);
+const strData: Scalar[] = Array.from({ length: N }, (_, i) => (i % 10 === 0 ? null : `item_${i}`));
+
+const numSeries = new Series({ data: numData });
+const strSeries = new Series({ data: strData });
+
+// Warm-up
+for (let i = 0; i < WARMUP; i++) {
+ seriesToMarkdown(numSeries);
+ seriesToLaTeX(numSeries);
+ seriesToMarkdown(strSeries);
+ seriesToLaTeX(strSeries);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ seriesToMarkdown(numSeries);
+ seriesToLaTeX(numSeries);
+ seriesToMarkdown(strSeries);
+ seriesToLaTeX(strSeries);
+}
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "series_format_table",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms,
+ }),
+);
diff --git a/benchmarks/tsb/bench_shift_diff.ts b/benchmarks/tsb/bench_shift_diff.ts
new file mode 100644
index 00000000..49a8ae4a
--- /dev/null
+++ b/benchmarks/tsb/bench_shift_diff.ts
@@ -0,0 +1,32 @@
+/**
+ * Benchmark: shiftSeries and diffSeries on 100k-element Series
+ */
+import { Series, shiftSeries, diffSeries } from "../../src/index.ts";
+
+const ROWS = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const data = Array.from({ length: ROWS }, (_, i) => i * 1.5);
+const s = new Series({ data });
+
+for (let i = 0; i < WARMUP; i++) {
+ shiftSeries(s, 1);
+ diffSeries(s, 1);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ shiftSeries(s, 1);
+ diffSeries(s, 1);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "shift_diff",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_sort_ops.ts b/benchmarks/tsb/bench_sort_ops.ts
new file mode 100644
index 00000000..684f1b6e
--- /dev/null
+++ b/benchmarks/tsb/bench_sort_ops.ts
@@ -0,0 +1,38 @@
+/**
+ * Benchmark: sortValuesSeries and sortValuesDataFrame on 100k rows
+ */
+import { Series, DataFrame, sortValuesSeries, sortValuesDataFrame } from "../../src/index.ts";
+
+const ROWS = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const data = Array.from({ length: ROWS }, (_, i) => Math.sin(i) * 1000);
+const s = new Series({ data });
+
+const dfData = {
+ a: Array.from({ length: ROWS }, (_, i) => Math.sin(i) * 1000),
+ b: Array.from({ length: ROWS }, (_, i) => Math.cos(i) * 500),
+};
+const df = new DataFrame(dfData);
+
+for (let i = 0; i < WARMUP; i++) {
+ sortValuesSeries(s);
+ sortValuesDataFrame(df, "a");
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ sortValuesSeries(s);
+ sortValuesDataFrame(df, "a");
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "sort_ops",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_str_findall_expand.ts b/benchmarks/tsb/bench_str_findall_expand.ts
new file mode 100644
index 00000000..4b4e5deb
--- /dev/null
+++ b/benchmarks/tsb/bench_str_findall_expand.ts
@@ -0,0 +1,42 @@
+/**
+ * Benchmark: strFindallExpand on a 5k-element string Series.
+ *
+ * Mirrors pandas Series.str.extract() with named capture groups.
+ * Each string has the form "name42 score88 level3" so the regex
+ * captures three named groups: word, number, and level.
+ */
+import { Series, strFindallExpand } from "../../src/index.ts";
+import type { Scalar } from "../../src/types.ts";
+
+const N = 5_000;
+const WARMUP = 3;
+const ITERATIONS = 20;
+
+const data: Scalar[] = Array.from(
+ { length: N },
+ (_, i) => (i % 20 === 0 ? null : `user${i} score${(i * 7) % 100} level${(i % 5) + 1}`),
+);
+const s = new Series({ data });
+
+// Named capture-group pattern: extract word, score, and level
+const pat = /(?[a-z]+)(?\d+)\s+score(?\d+)\s+level(?\d+)/;
+
+// Warm-up
+for (let i = 0; i < WARMUP; i++) {
+ strFindallExpand(s, pat);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ strFindallExpand(s, pat);
+}
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "str_findall_expand",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms,
+ }),
+);
diff --git a/benchmarks/tsb/bench_to_json_denormalize.ts b/benchmarks/tsb/bench_to_json_denormalize.ts
new file mode 100644
index 00000000..07a42f5f
--- /dev/null
+++ b/benchmarks/tsb/bench_to_json_denormalize.ts
@@ -0,0 +1,42 @@
+/**
+ * Benchmark: to_json_denormalize — toJsonDenormalize / toJsonRecords / toJsonSplit / toJsonIndex
+ * Outputs JSON: {"function": "to_json_denormalize", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { DataFrame, toJsonDenormalize, toJsonRecords, toJsonSplit, toJsonIndex } from "../../src/index.ts";
+
+const ROWS = 10_000;
+const WARMUP = 5;
+const ITERATIONS = 30;
+
+// Create a nested-structure-like DataFrame (address.city, address.zip pattern)
+const df = DataFrame.fromColumns({
+ "name": Array.from({ length: ROWS }, (_, i) => `user_${i}`),
+ "address.city": Array.from({ length: ROWS }, (_, i) => `city_${i % 100}`),
+ "address.zip": Array.from({ length: ROWS }, (_, i) => `${10000 + (i % 9000)}`),
+ "score": Float64Array.from({ length: ROWS }, (_, i) => i * 0.01),
+});
+
+for (let i = 0; i < WARMUP; i++) {
+ toJsonDenormalize(df);
+ toJsonRecords(df);
+ toJsonSplit(df);
+ toJsonIndex(df);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ toJsonDenormalize(df);
+ toJsonRecords(df);
+ toJsonSplit(df);
+ toJsonIndex(df);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "to_json_denormalize",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_truncate_df.ts b/benchmarks/tsb/bench_truncate_df.ts
new file mode 100644
index 00000000..f2661ce0
--- /dev/null
+++ b/benchmarks/tsb/bench_truncate_df.ts
@@ -0,0 +1,35 @@
+/**
+ * Benchmark: truncateDataFrame — slice rows by before/after labels on 100k-row DataFrame
+ * Outputs JSON: {"function": "truncate_df", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, DataFrame, truncateDataFrame } from "../../src/index.ts";
+
+const N = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const index = Array.from({ length: N }, (_, i) => i);
+const a = Array.from({ length: N }, (_, i) => i * 1.0);
+const b = Array.from({ length: N }, (_, i) => i * 2.0);
+const c = Array.from({ length: N }, (_, i) => i * 3.0);
+
+const df = DataFrame.fromColumns({ a, b, c }, { index });
+
+for (let i = 0; i < WARMUP; i++) {
+ truncateDataFrame(df, 10_000, 90_000);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ truncateDataFrame(df, 10_000, 90_000);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "truncate_df",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_value_counts_full.ts b/benchmarks/tsb/bench_value_counts_full.ts
new file mode 100644
index 00000000..d55b5b72
--- /dev/null
+++ b/benchmarks/tsb/bench_value_counts_full.ts
@@ -0,0 +1,32 @@
+/**
+ * Benchmark: value_counts_full — valueCountsBinned on 100k rows.
+ * Outputs JSON: {"function": "value_counts_full", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, valueCountsBinned } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+const s = new Series({ data: Array.from({ length: SIZE }, () => Math.random() * 100) });
+
+for (let i = 0; i < WARMUP; i++) {
+ valueCountsBinned(s, { bins: 10 });
+ valueCountsBinned(s, { bins: 20 });
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ valueCountsBinned(s, { bins: 10 });
+ valueCountsBinned(s, { bins: 20 });
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "value_counts_full",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_window_extended.ts b/benchmarks/tsb/bench_window_extended.ts
new file mode 100644
index 00000000..a4b933cb
--- /dev/null
+++ b/benchmarks/tsb/bench_window_extended.ts
@@ -0,0 +1,37 @@
+/**
+ * Benchmark: window_extended — rollingSem / rollingSkew / rollingKurt / rollingQuantile on 100k rows.
+ * Outputs JSON: {"function": "window_extended", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, rollingSem, rollingSkew, rollingKurt, rollingQuantile } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 20;
+const WINDOW = 10;
+
+const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => Math.sin(i / 100) * 100 + i * 0.001) });
+
+for (let i = 0; i < WARMUP; i++) {
+ rollingSem(s, WINDOW);
+ rollingSkew(s, WINDOW);
+ rollingKurt(s, WINDOW);
+ rollingQuantile(s, WINDOW, 0.5);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ rollingSem(s, WINDOW);
+ rollingSkew(s, WINDOW);
+ rollingKurt(s, WINDOW);
+ rollingQuantile(s, WINDOW, 0.5);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "window_extended",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_xs_series.ts b/benchmarks/tsb/bench_xs_series.ts
new file mode 100644
index 00000000..cb630e72
--- /dev/null
+++ b/benchmarks/tsb/bench_xs_series.ts
@@ -0,0 +1,46 @@
+/**
+ * Benchmark: xsSeries — cross-section lookup on Series.
+ *
+ * Mirrors pandas `Series.xs()`.
+ * Tests flat-index lookup (returns scalar) and MultiIndex lookup (returns sub-Series).
+ * Outputs JSON: {"function": "xs_series", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, MultiIndex, xsSeries } from "../../src/index.ts";
+
+const N = 1_000;
+const WARMUP = 10;
+const ITERATIONS = 5_000;
+
+// Flat-index Series: each key appears once → xsSeries returns a scalar.
+const flatData = Array.from({ length: N }, (_, i) => i * 1.5);
+const flatIdx = Array.from({ length: N }, (_, i) => `k${i}`);
+const flatSeries = new Series({ data: flatData, index: flatIdx, name: "flat" });
+
+// MultiIndex Series: 10 outer keys × 100 inner keys → xsSeries returns a sub-Series (100 rows).
+const outerKeys = Array.from({ length: N }, (_, i) => `g${Math.floor(i / 100)}`);
+const innerKeys = Array.from({ length: N }, (_, i) => i % 100);
+const multiIdx = MultiIndex.fromArrays([outerKeys, innerKeys], { names: ["outer", "inner"] });
+const multiData = Array.from({ length: N }, (_, i) => i * 2.0);
+const multiSeries = new Series({ data: multiData, index: multiIdx, name: "multi" });
+
+// Warm-up
+for (let i = 0; i < WARMUP; i++) {
+ xsSeries(flatSeries, `k${i % N}`);
+ xsSeries(multiSeries, `g${i % 10}`);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ xsSeries(flatSeries, `k${i % N}`);
+ xsSeries(multiSeries, `g${i % 10}`);
+}
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "xs_series",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total_ms,
+ }),
+);