diff --git a/benchmarks/pandas/bench_concat_many_frames.py b/benchmarks/pandas/bench_concat_many_frames.py new file mode 100644 index 00000000..ddd78796 --- /dev/null +++ b/benchmarks/pandas/bench_concat_many_frames.py @@ -0,0 +1,35 @@ +"""Benchmark: pd.concat() with 20 DataFrames — many-frame concatenation on 100k total rows.""" +import json +import time +import pandas as pd + +N_FRAMES = 20 +ROWS_EACH = 5_000 +WARMUP = 5 +ITERATIONS = 20 + +frames = [ + pd.DataFrame({ + "a": [float(f * ROWS_EACH + i) for i in range(ROWS_EACH)], + "b": [(f * ROWS_EACH + i) % 100 for i in range(ROWS_EACH)], + "c": [f"cat_{i % 20}" for i in range(ROWS_EACH)], + }) + for f in range(N_FRAMES) +] + +for _ in range(WARMUP): + pd.concat(frames) + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + pd.concat(frames) + times.append((time.perf_counter() - t0) * 1000) + +total = sum(times) +print(json.dumps({ + "function": "concat_many_frames", + "mean_ms": total / ITERATIONS, + "iterations": ITERATIONS, + "total_ms": total, +})) diff --git a/benchmarks/pandas/bench_dataframe_from_records.py b/benchmarks/pandas/bench_dataframe_from_records.py new file mode 100644 index 00000000..d12c7446 --- /dev/null +++ b/benchmarks/pandas/bench_dataframe_from_records.py @@ -0,0 +1,30 @@ +"""Benchmark: DataFrame.from_records() — construct a DataFrame from a list of dicts.""" +import json +import time +import pandas as pd + +ROWS = 20_000 +WARMUP = 5 +ITERATIONS = 20 + +records = [ + {"id": i, "value": i * 1.5, "category": f"cat_{i % 50}", "score": None if i % 2 == 0 else i * 0.1, "rank": i % 100} + for i in range(ROWS) +] + +for _ in range(WARMUP): + pd.DataFrame.from_records(records) + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + pd.DataFrame.from_records(records) + times.append((time.perf_counter() - t0) * 1000) + +total = sum(times) +print(json.dumps({ + "function": "dataframe_from_records", + "mean_ms": total / ITERATIONS, + "iterations": ITERATIONS, + "total_ms": total, +})) diff --git a/benchmarks/pandas/bench_dataframe_items.py b/benchmarks/pandas/bench_dataframe_items.py new file mode 100644 index 00000000..77d3f20a --- /dev/null +++ b/benchmarks/pandas/bench_dataframe_items.py @@ -0,0 +1,42 @@ +"""Benchmark: DataFrame.items() / iteritems() — iterate over (columnName, Series) pairs.""" +import json +import time +import pandas as pd + +ROWS = 50_000 +WARMUP = 5 +ITERATIONS = 50 + +df = pd.DataFrame({ + "a": [float(i) for i in range(ROWS)], + "b": [i % 500 for i in range(ROWS)], + "c": [f"cat_{i % 50}" for i in range(ROWS)], + "d": [i * 0.25 for i in range(ROWS)], + "e": [None if i % 2 == 0 else i * 1.5 for i in range(ROWS)], + "f": [i * 3 for i in range(ROWS)], +}) + +for _ in range(WARMUP): + n = 0 + for _name, _col in df.items(): + n += 1 + for _name, _col in df.iteritems() if hasattr(df, "iteritems") else df.items(): + n += 1 + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + n = 0 + for _name, _col in df.items(): + n += 1 + for _name, _col in df.iteritems() if hasattr(df, "iteritems") else df.items(): + n += 1 + times.append((time.perf_counter() - t0) * 1000) + +total = sum(times) +print(json.dumps({ + "function": "dataframe_items", + "mean_ms": total / ITERATIONS, + "iterations": ITERATIONS, + "total_ms": total, +})) diff --git a/benchmarks/pandas/bench_dataframe_iterrows.py b/benchmarks/pandas/bench_dataframe_iterrows.py new file mode 100644 index 00000000..97ea99e0 --- /dev/null +++ b/benchmarks/pandas/bench_dataframe_iterrows.py @@ -0,0 +1,37 @@ +"""Benchmark: DataFrame.iterrows() — iterate over (label, Series) pairs on a 3k-row DataFrame.""" +import json +import time +import pandas as pd + +ROWS = 3_000 +WARMUP = 5 +ITERATIONS = 30 + +df = pd.DataFrame({ + "a": [float(i) for i in range(ROWS)], + "b": [i % 100 for i in range(ROWS)], + "c": [f"cat_{i % 20}" for i in range(ROWS)], + "d": [None if i % 2 == 0 else i * 0.5 for i in range(ROWS)], + "e": [i * 2 for i in range(ROWS)], +}) + +for _ in range(WARMUP): + n = 0 + for _label, _row in df.iterrows(): + n += 1 + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + n = 0 + for _label, _row in df.iterrows(): + n += 1 + times.append((time.perf_counter() - t0) * 1000) + +total = sum(times) +print(json.dumps({ + "function": "dataframe_iterrows", + "mean_ms": total / ITERATIONS, + "iterations": ITERATIONS, + "total_ms": total, +})) diff --git a/benchmarks/pandas/bench_groupby_sum_many_groups.py b/benchmarks/pandas/bench_groupby_sum_many_groups.py new file mode 100644 index 00000000..2d2c93e5 --- /dev/null +++ b/benchmarks/pandas/bench_groupby_sum_many_groups.py @@ -0,0 +1,32 @@ +"""Benchmark: DataFrame.groupby().sum() with 1000 groups on a 100k-row DataFrame.""" +import json +import time +import pandas as pd + +ROWS = 100_000 +N_GROUPS = 1_000 +WARMUP = 3 +ITERATIONS = 10 + +df = pd.DataFrame({ + "key": [f"g{i % N_GROUPS}" for i in range(ROWS)], + "val1": [i * 0.5 for i in range(ROWS)], + "val2": [i % 200 for i in range(ROWS)], +}) + +for _ in range(WARMUP): + df.groupby("key").sum() + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + df.groupby("key").sum() + times.append((time.perf_counter() - t0) * 1000) + +total = sum(times) +print(json.dumps({ + "function": "groupby_sum_many_groups", + "mean_ms": total / ITERATIONS, + "iterations": ITERATIONS, + "total_ms": total, +})) diff --git a/benchmarks/pandas/bench_grouper_class.py b/benchmarks/pandas/bench_grouper_class.py new file mode 100644 index 00000000..7a255c9f --- /dev/null +++ b/benchmarks/pandas/bench_grouper_class.py @@ -0,0 +1,44 @@ +"""Benchmark: pd.Grouper construction and isinstance checks — 50k iterations.""" +import json +import time + +import pandas as pd + +WARMUP = 5 +ITERATIONS = 50_000 + + +def run_groupers() -> None: + g1 = pd.Grouper(key="col_a") + g2 = pd.Grouper(key="date", sort=True) + g3 = pd.Grouper(key="category", dropna=False) + + isinstance(g1, pd.Grouper) + isinstance(g2, pd.Grouper) + isinstance(g3, pd.Grouper) + isinstance("not_a_grouper", pd.Grouper) + isinstance(42, pd.Grouper) + + str(g1) + str(g2) + str(g3) + + +for _ in range(WARMUP): + run_groupers() + +start = time.perf_counter() +for _ in range(ITERATIONS): + run_groupers() +total = (time.perf_counter() - start) * 1000 + +print( + json.dumps( + { + "function": "grouper_class", + "mean_ms": total / ITERATIONS, + "iterations": ITERATIONS, + "total_ms": total, + } + ) +) diff --git a/benchmarks/pandas/bench_merge_ordered_by.py b/benchmarks/pandas/bench_merge_ordered_by.py new file mode 100644 index 00000000..4d1b959b --- /dev/null +++ b/benchmarks/pandas/bench_merge_ordered_by.py @@ -0,0 +1,41 @@ +"""Benchmark: pd.merge_ordered with left_by grouping — two 3k-row DataFrames, 10 groups.""" +import json +import time + +import pandas as pd + +N = 3_000 +GROUPS = 10 +PER_GROUP = N // GROUPS +WARMUP = 2 +ITERATIONS = 8 + +grp_left = [f"g{g}" for g in range(GROUPS) for _ in range(PER_GROUP)] +t_left = [j * 2 for _ in range(GROUPS) for j in range(PER_GROUP)] +v1 = [g * PER_GROUP + j for g in range(GROUPS) for j in range(PER_GROUP)] + +grp_right = [f"g{g}" for g in range(GROUPS) for _ in range(PER_GROUP)] +t_right = [j * 3 for _ in range(GROUPS) for j in range(PER_GROUP)] +v2 = [g * PER_GROUP + j for g in range(GROUPS) for j in range(PER_GROUP)] + +df1 = pd.DataFrame({"grp": grp_left, "t": t_left, "val1": v1}) +df2 = pd.DataFrame({"grp": grp_right, "t": t_right, "val2": v2}) + +for _ in range(WARMUP): + pd.merge_ordered(df1, df2, on="t", left_by="grp", right_by="grp") + +start = time.perf_counter() +for _ in range(ITERATIONS): + pd.merge_ordered(df1, df2, on="t", left_by="grp", right_by="grp") +total = (time.perf_counter() - start) * 1000 + +print( + json.dumps( + { + "function": "merge_ordered_by", + "mean_ms": total / ITERATIONS, + "iterations": ITERATIONS, + "total_ms": total, + } + ) +) diff --git a/benchmarks/pandas/bench_merge_ordered_ffill.py b/benchmarks/pandas/bench_merge_ordered_ffill.py new file mode 100644 index 00000000..7a325410 --- /dev/null +++ b/benchmarks/pandas/bench_merge_ordered_ffill.py @@ -0,0 +1,36 @@ +"""Benchmark: pd.merge_ordered with fill_method='ffill' — two 5k-row DataFrames.""" +import json +import time + +import pandas as pd + +N = 5_000 +WARMUP = 2 +ITERATIONS = 8 + +keys1 = list(range(0, N * 2, 2)) +vals1 = [i * 1.0 for i in range(N)] +keys2 = list(range(0, N * 3, 3)) +vals2 = [i * 2.0 for i in range(N)] + +df1 = pd.DataFrame({"key": keys1, "val1": vals1}) +df2 = pd.DataFrame({"key": keys2, "val2": vals2}) + +for _ in range(WARMUP): + pd.merge_ordered(df1, df2, on="key", fill_method="ffill") + +start = time.perf_counter() +for _ in range(ITERATIONS): + pd.merge_ordered(df1, df2, on="key", fill_method="ffill") +total = (time.perf_counter() - start) * 1000 + +print( + json.dumps( + { + "function": "merge_ordered_ffill", + "mean_ms": total / ITERATIONS, + "iterations": ITERATIONS, + "total_ms": total, + } + ) +) diff --git a/benchmarks/pandas/bench_series_str_replace_regex.py b/benchmarks/pandas/bench_series_str_replace_regex.py new file mode 100644 index 00000000..7d221e45 --- /dev/null +++ b/benchmarks/pandas/bench_series_str_replace_regex.py @@ -0,0 +1,28 @@ +"""Benchmark: Series.str.replace() with a regex pattern on 50k strings.""" +import json +import time +import pandas as pd + +ROWS = 50_000 +WARMUP = 5 +ITERATIONS = 30 + +data = [f"item_{i % 1000}_val{i % 50}" for i in range(ROWS)] +s = pd.Series(data) + +for _ in range(WARMUP): + s.str.replace(r"[0-9]+", "#", regex=True) + +times = [] +for _ in range(ITERATIONS): + t0 = time.perf_counter() + s.str.replace(r"[0-9]+", "#", regex=True) + times.append((time.perf_counter() - t0) * 1000) + +total = sum(times) +print(json.dumps({ + "function": "series_str_replace_regex", + "mean_ms": total / ITERATIONS, + "iterations": ITERATIONS, + "total_ms": total, +})) diff --git a/benchmarks/tsb/bench_concat_many_frames.ts b/benchmarks/tsb/bench_concat_many_frames.ts new file mode 100644 index 00000000..e4be77e6 --- /dev/null +++ b/benchmarks/tsb/bench_concat_many_frames.ts @@ -0,0 +1,39 @@ +/** + * Benchmark: concat() with 20 DataFrames — many-frame concatenation on 100k total rows. + * Outputs JSON: {"function": "concat_many_frames", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { DataFrame, concat } from "../../src/index.ts"; + +const N_FRAMES = 20; +const ROWS_EACH = 5_000; +const WARMUP = 5; +const ITERATIONS = 20; + +const frames = Array.from({ length: N_FRAMES }, (_, f) => + DataFrame.fromColumns({ + a: Array.from({ length: ROWS_EACH }, (_, i) => (f * ROWS_EACH + i) * 1.0), + b: Array.from({ length: ROWS_EACH }, (_, i) => (f * ROWS_EACH + i) % 100), + c: Array.from({ length: ROWS_EACH }, (_, i) => `cat_${i % 20}`), + }), +); + +for (let i = 0; i < WARMUP; i++) { + concat(frames); +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const t0 = performance.now(); + concat(frames); + times.push(performance.now() - t0); +} +const total = times.reduce((a, b) => a + b, 0); + +console.log( + JSON.stringify({ + function: "concat_many_frames", + mean_ms: total / ITERATIONS, + iterations: ITERATIONS, + total_ms: total, + }), +); diff --git a/benchmarks/tsb/bench_dataframe_from_records.ts b/benchmarks/tsb/bench_dataframe_from_records.ts new file mode 100644 index 00000000..14b447bb --- /dev/null +++ b/benchmarks/tsb/bench_dataframe_from_records.ts @@ -0,0 +1,38 @@ +/** + * Benchmark: DataFrame.fromRecords() — construct a DataFrame from an array of record objects. + * Outputs JSON: {"function": "dataframe_from_records", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { DataFrame } from "../../src/index.ts"; + +const ROWS = 20_000; +const WARMUP = 5; +const ITERATIONS = 20; + +const records = Array.from({ length: ROWS }, (_, i) => ({ + id: i, + value: i * 1.5, + category: `cat_${i % 50}`, + score: i % 2 === 0 ? null : i * 0.1, + rank: i % 100, +})); + +for (let i = 0; i < WARMUP; i++) { + DataFrame.fromRecords(records); +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const t0 = performance.now(); + DataFrame.fromRecords(records); + times.push(performance.now() - t0); +} +const total = times.reduce((a, b) => a + b, 0); + +console.log( + JSON.stringify({ + function: "dataframe_from_records", + mean_ms: total / ITERATIONS, + iterations: ITERATIONS, + total_ms: total, + }), +); diff --git a/benchmarks/tsb/bench_dataframe_items.ts b/benchmarks/tsb/bench_dataframe_items.ts new file mode 100644 index 00000000..4fb8be97 --- /dev/null +++ b/benchmarks/tsb/bench_dataframe_items.ts @@ -0,0 +1,51 @@ +/** + * Benchmark: DataFrame.items() / iteritems() — iterate over (columnName, Series) pairs. + * Outputs JSON: {"function": "dataframe_items", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { DataFrame } from "../../src/index.ts"; + +const ROWS = 50_000; +const WARMUP = 5; +const ITERATIONS = 50; + +const df = DataFrame.fromColumns({ + a: Array.from({ length: ROWS }, (_, i) => i * 1.0), + b: Array.from({ length: ROWS }, (_, i) => i % 500), + c: Array.from({ length: ROWS }, (_, i) => `cat_${i % 50}`), + d: Array.from({ length: ROWS }, (_, i) => i * 0.25), + e: Array.from({ length: ROWS }, (_, i) => i % 2 === 0 ? null : i * 1.5), + f: Array.from({ length: ROWS }, (_, i) => i * 3), +}); + +for (let i = 0; i < WARMUP; i++) { + let n = 0; + for (const [_name, _col] of df.items()) { + n++; + } + for (const [_name, _col] of df.iteritems()) { + n++; + } +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const t0 = performance.now(); + let n = 0; + for (const [_name, _col] of df.items()) { + n++; + } + for (const [_name, _col] of df.iteritems()) { + n++; + } + times.push(performance.now() - t0); +} +const total = times.reduce((a, b) => a + b, 0); + +console.log( + JSON.stringify({ + function: "dataframe_items", + mean_ms: total / ITERATIONS, + iterations: ITERATIONS, + total_ms: total, + }), +); diff --git a/benchmarks/tsb/bench_dataframe_iterrows.ts b/benchmarks/tsb/bench_dataframe_iterrows.ts new file mode 100644 index 00000000..196df794 --- /dev/null +++ b/benchmarks/tsb/bench_dataframe_iterrows.ts @@ -0,0 +1,44 @@ +/** + * Benchmark: DataFrame.iterrows() — iterate over (label, rowSeries) pairs on a 3k-row DataFrame. + * Outputs JSON: {"function": "dataframe_iterrows", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { DataFrame } from "../../src/index.ts"; + +const ROWS = 3_000; +const WARMUP = 5; +const ITERATIONS = 30; + +const df = DataFrame.fromColumns({ + a: Array.from({ length: ROWS }, (_, i) => i * 1.0), + b: Array.from({ length: ROWS }, (_, i) => i % 100), + c: Array.from({ length: ROWS }, (_, i) => `cat_${i % 20}`), + d: Array.from({ length: ROWS }, (_, i) => (i % 2 === 0 ? null : i * 0.5)), + e: Array.from({ length: ROWS }, (_, i) => i * 2), +}); + +for (let i = 0; i < WARMUP; i++) { + let n = 0; + for (const [_label, _row] of df.iterrows()) { + n++; + } +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const t0 = performance.now(); + let n = 0; + for (const [_label, _row] of df.iterrows()) { + n++; + } + times.push(performance.now() - t0); +} +const total = times.reduce((a, b) => a + b, 0); + +console.log( + JSON.stringify({ + function: "dataframe_iterrows", + mean_ms: total / ITERATIONS, + iterations: ITERATIONS, + total_ms: total, + }), +); diff --git a/benchmarks/tsb/bench_groupby_sum_many_groups.ts b/benchmarks/tsb/bench_groupby_sum_many_groups.ts new file mode 100644 index 00000000..a539d574 --- /dev/null +++ b/benchmarks/tsb/bench_groupby_sum_many_groups.ts @@ -0,0 +1,37 @@ +/** + * Benchmark: DataFrameGroupBy.sum() with 1000 groups on a 100k-row DataFrame. + * Outputs JSON: {"function": "groupby_sum_many_groups", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { DataFrame } from "../../src/index.ts"; + +const ROWS = 100_000; +const N_GROUPS = 1_000; +const WARMUP = 3; +const ITERATIONS = 10; + +const df = DataFrame.fromColumns({ + key: Array.from({ length: ROWS }, (_, i) => `g${i % N_GROUPS}`), + val1: Array.from({ length: ROWS }, (_, i) => i * 0.5), + val2: Array.from({ length: ROWS }, (_, i) => i % 200), +}); + +for (let i = 0; i < WARMUP; i++) { + df.groupby("key").sum(); +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const t0 = performance.now(); + df.groupby("key").sum(); + times.push(performance.now() - t0); +} +const total = times.reduce((a, b) => a + b, 0); + +console.log( + JSON.stringify({ + function: "groupby_sum_many_groups", + mean_ms: total / ITERATIONS, + iterations: ITERATIONS, + total_ms: total, + }), +); diff --git a/benchmarks/tsb/bench_grouper_class.ts b/benchmarks/tsb/bench_grouper_class.ts new file mode 100644 index 00000000..6a3cb20e --- /dev/null +++ b/benchmarks/tsb/bench_grouper_class.ts @@ -0,0 +1,44 @@ +/** + * Benchmark: Grouper class — construction, predicates, and isGrouper on 50k iterations. + * Outputs JSON: {"function": "grouper_class", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { Grouper, isGrouper } from "../../src/index.ts"; + +const WARMUP = 5; +const ITERATIONS = 50_000; + +function runGroupers(): void { + const g1 = new Grouper({ key: "col_a" }); + const g2 = new Grouper({ key: "date", sort: true }); + const g3 = new Grouper({ key: "category", dropna: false }); + + isGrouper(g1); + isGrouper(g2); + isGrouper(g3); + isGrouper("not_a_grouper"); + isGrouper(42); + + g1.isKeyGrouper(); + g2.isKeyGrouper(); + g3.isKeyGrouper(); + g1.isLevelGrouper(); + + g1.toString(); + g2.toString(); + g3.toString(); +} + +for (let i = 0; i < WARMUP; i++) runGroupers(); + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) runGroupers(); +const total = performance.now() - start; + +console.log( + JSON.stringify({ + function: "grouper_class", + mean_ms: total / ITERATIONS, + iterations: ITERATIONS, + total_ms: total, + }), +); diff --git a/benchmarks/tsb/bench_merge_ordered_by.ts b/benchmarks/tsb/bench_merge_ordered_by.ts new file mode 100644 index 00000000..e6a661e7 --- /dev/null +++ b/benchmarks/tsb/bench_merge_ordered_by.ts @@ -0,0 +1,56 @@ +/** + * Benchmark: mergeOrdered with left_by grouping — two 3k-row DataFrames, 10 groups. + * Outputs JSON: {"function": "merge_ordered_by", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { DataFrame, mergeOrdered } from "../../src/index.ts"; + +const N = 3_000; +const GROUPS = 10; +const PER_GROUP = N / GROUPS; +const WARMUP = 2; +const ITERATIONS = 8; + +// Build sorted data by (grp, t) +const grpLeft: string[] = []; +const tLeft: number[] = []; +const v1: number[] = []; +for (let g = 0; g < GROUPS; g++) { + for (let j = 0; j < PER_GROUP; j++) { + grpLeft.push(`g${g}`); + tLeft.push(j * 2); + v1.push(g * PER_GROUP + j); + } +} + +const grpRight: string[] = []; +const tRight: number[] = []; +const v2: number[] = []; +for (let g = 0; g < GROUPS; g++) { + for (let j = 0; j < PER_GROUP; j++) { + grpRight.push(`g${g}`); + tRight.push(j * 3); + v2.push(g * PER_GROUP + j); + } +} + +const df1 = DataFrame.fromColumns({ grp: grpLeft, t: tLeft, val1: v1 }); +const df2 = DataFrame.fromColumns({ grp: grpRight, t: tRight, val2: v2 }); + +for (let i = 0; i < WARMUP; i++) { + mergeOrdered(df1, df2, { on: "t", left_by: "grp", right_by: "grp" }); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + mergeOrdered(df1, df2, { on: "t", left_by: "grp", right_by: "grp" }); +} +const total = performance.now() - start; + +console.log( + JSON.stringify({ + function: "merge_ordered_by", + mean_ms: total / ITERATIONS, + iterations: ITERATIONS, + total_ms: total, + }), +); diff --git a/benchmarks/tsb/bench_merge_ordered_ffill.ts b/benchmarks/tsb/bench_merge_ordered_ffill.ts new file mode 100644 index 00000000..9efebf0e --- /dev/null +++ b/benchmarks/tsb/bench_merge_ordered_ffill.ts @@ -0,0 +1,37 @@ +/** + * Benchmark: mergeOrdered with fill_method "ffill" — two 5k-row DataFrames with interleaved keys. + * Outputs JSON: {"function": "merge_ordered_ffill", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { DataFrame, mergeOrdered } from "../../src/index.ts"; + +const N = 5_000; +const WARMUP = 2; +const ITERATIONS = 8; + +// Even-numbered keys on left, multiples-of-3 on right → many gaps filled by ffill +const keys1 = Array.from({ length: N }, (_, i) => i * 2); +const vals1 = Array.from({ length: N }, (_, i) => i * 1.0); +const keys2 = Array.from({ length: N }, (_, i) => i * 3); +const vals2 = Array.from({ length: N }, (_, i) => i * 2.0); + +const df1 = DataFrame.fromColumns({ key: keys1, val1: vals1 }); +const df2 = DataFrame.fromColumns({ key: keys2, val2: vals2 }); + +for (let i = 0; i < WARMUP; i++) { + mergeOrdered(df1, df2, { on: "key", fill_method: "ffill" }); +} + +const start = performance.now(); +for (let i = 0; i < ITERATIONS; i++) { + mergeOrdered(df1, df2, { on: "key", fill_method: "ffill" }); +} +const total = performance.now() - start; + +console.log( + JSON.stringify({ + function: "merge_ordered_ffill", + mean_ms: total / ITERATIONS, + iterations: ITERATIONS, + total_ms: total, + }), +); diff --git a/benchmarks/tsb/bench_series_str_replace_regex.ts b/benchmarks/tsb/bench_series_str_replace_regex.ts new file mode 100644 index 00000000..f37a4aae --- /dev/null +++ b/benchmarks/tsb/bench_series_str_replace_regex.ts @@ -0,0 +1,33 @@ +/** + * Benchmark: Series.str.replace() with a regex pattern on 50k strings. + * Outputs JSON: {"function": "series_str_replace_regex", "mean_ms": ..., "iterations": ..., "total_ms": ...} + */ +import { Series } from "../../src/index.ts"; + +const ROWS = 50_000; +const WARMUP = 5; +const ITERATIONS = 30; + +const data = Array.from({ length: ROWS }, (_, i) => `item_${i % 1000}_val${i % 50}`); +const s = new Series({ data }); + +for (let i = 0; i < WARMUP; i++) { + s.str.replace(/[0-9]+/, "#"); +} + +const times: number[] = []; +for (let i = 0; i < ITERATIONS; i++) { + const t0 = performance.now(); + s.str.replace(/[0-9]+/, "#"); + times.push(performance.now() - t0); +} +const total = times.reduce((a, b) => a + b, 0); + +console.log( + JSON.stringify({ + function: "series_str_replace_regex", + mean_ms: total / ITERATIONS, + iterations: ITERATIONS, + total_ms: total, + }), +);