Blosc · FrancescAlted · May 15, 2026 · May 15, 2026 · May 15, 2026 · May 15, 2026
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -51,6 +51,13 @@ add_custom_command(
   DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/blosc2/indexing_ext.pyx"
   VERBATIM)
 
+add_custom_command(
+  OUTPUT groupby_ext.c
+  COMMAND Python::Interpreter -m cython
+          "${CMAKE_CURRENT_SOURCE_DIR}/src/blosc2/groupby_ext.pyx" --output-file groupby_ext.c
+  DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/blosc2/groupby_ext.pyx"
+  VERBATIM)
+
 # ...and add it to the target
 Python_add_library(blosc2_ext MODULE blosc2_ext.c WITH_SOABI)
 target_sources(blosc2_ext PRIVATE src/blosc2/matmul_kernels.c)
@@ -59,10 +66,12 @@ if(UNIX)
   target_link_libraries(blosc2_ext PRIVATE ${CMAKE_DL_LIBS})
 endif()
 Python_add_library(indexing_ext MODULE indexing_ext.c WITH_SOABI)
+Python_add_library(groupby_ext MODULE groupby_ext.c WITH_SOABI)
 
 # We need to link against NumPy
 target_link_libraries(blosc2_ext PRIVATE Python::NumPy)
 target_link_libraries(indexing_ext PRIVATE Python::NumPy)
+target_link_libraries(groupby_ext PRIVATE Python::NumPy)
 
 # Fetch and build miniexpr library
 include(FetchContent)
@@ -99,6 +108,7 @@ endif()
 
 target_compile_features(blosc2_ext PRIVATE c_std_11)
 target_compile_features(indexing_ext PRIVATE c_std_11)
+target_compile_features(groupby_ext PRIVATE c_std_11)
 if(WIN32 AND CMAKE_C_COMPILER_ID STREQUAL "Clang")
     execute_process(
         COMMAND "${CMAKE_C_COMPILER}" -print-resource-dir
@@ -173,7 +183,7 @@ endif()
 
 # Python extension -> site-packages/blosc2
 install(
-  TARGETS blosc2_ext indexing_ext
+  TARGETS blosc2_ext indexing_ext groupby_ext
   LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/blosc2
 )
 

diff --git a/bench/ctable/bench_nested_filter_index.py b/bench/ctable/bench_nested_filter_index.py
@@ -0,0 +1,205 @@
+#!/usr/bin/env python
+#######################################################################
+# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+#######################################################################
+
+"""Benchmark nested leaf filter/index performance vs flat columns.
+
+Compares a CTable with flat column names against an equivalent one that uses
+dotted nested column names (physically stored under hierarchical _cols/ paths).
+Both tables hold the same data; each filter/index/aggregate operation is timed
+on both to show the overhead (or absence thereof) introduced by the nested layout.
+"""
+
+from __future__ import annotations
+
+import argparse
+import gc
+import time
+from dataclasses import dataclass
+
+import numpy as np
+
+import blosc2
+
+
+# ---------------------------------------------------------------------------
+# Schema helpers
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class FlatRow:
+    trip_begin_lon: float = blosc2.field(blosc2.float64())
+    trip_begin_lat: float = blosc2.field(blosc2.float64())
+    trip_end_lon: float = blosc2.field(blosc2.float64())
+    trip_end_lat: float = blosc2.field(blosc2.float64())
+    payment_fare: float = blosc2.field(blosc2.float64(ge=0))
+
+
+@dataclass
+class NestedRow:
+    """Same physical columns as FlatRow but accessed via dotted names after creation."""
+
+    trip_begin_lon: float = blosc2.field(blosc2.float64())
+    trip_begin_lat: float = blosc2.field(blosc2.float64())
+    trip_end_lon: float = blosc2.field(blosc2.float64())
+    trip_end_lat: float = blosc2.field(blosc2.float64())
+    payment_fare: float = blosc2.field(blosc2.float64(ge=0))
+
+
+def _build_data(n: int) -> dict:
+    rng = np.random.default_rng(42)
+    return {
+        "trip_begin_lon": rng.uniform(-88.0, -87.5, n).astype(np.float64),
+        "trip_begin_lat": rng.uniform(41.6, 42.0, n).astype(np.float64),
+        "trip_end_lon": rng.uniform(-88.0, -87.5, n).astype(np.float64),
+        "trip_end_lat": rng.uniform(41.6, 42.0, n).astype(np.float64),
+        "payment_fare": rng.uniform(3.0, 50.0, n).astype(np.float64),
+    }
+
+
+def _build_flat(data: dict, n: int) -> "blosc2.CTable":
+    t = blosc2.CTable(FlatRow, expected_size=n)
+    t.extend(data)
+    return t
+
+
+def _build_nested(data: dict, n: int) -> "blosc2.CTable":
+    t = blosc2.CTable(NestedRow, expected_size=n)
+    t.extend(data)
+    # Rename to dotted nested names
+    t.rename_column("trip_begin_lon", "trip.begin.lon")
+    t.rename_column("trip_begin_lat", "trip.begin.lat")
+    t.rename_column("trip_end_lon", "trip.end.lon")
+    t.rename_column("trip_end_lat", "trip.end.lat")
+    t.rename_column("payment_fare", "payment.fare")
+    return t
+
+
+# ---------------------------------------------------------------------------
+# Timing helper
+# ---------------------------------------------------------------------------
+
+
+def _timeit(fn, repeats: int = 5) -> float:
+    gc.collect()
+    times = []
+    for _ in range(repeats):
+        t0 = time.perf_counter()
+        fn()
+        times.append(time.perf_counter() - t0)
+    return min(times)
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+
+def main() -> None:
+    p = argparse.ArgumentParser(description="Benchmark nested vs flat column filter/index/aggregate")
+    p.add_argument("--rows", type=int, default=1_000_000, help="Number of rows (default: 1M)")
+    p.add_argument("--repeats", type=int, default=5, help="Timing repeats (default: 5)")
+    args = p.parse_args()
+
+    N = args.rows
+    R = args.repeats
+
+    print(f"Building tables with {N:,} rows …")
+    data = _build_data(N)
+    flat_data = data.copy()  # flat uses underscore names
+    nested_data = {
+        "trip_begin_lon": data["trip_begin_lon"],
+        "trip_begin_lat": data["trip_begin_lat"],
+        "trip_end_lon": data["trip_end_lon"],
+        "trip_end_lat": data["trip_end_lat"],
+        "payment_fare": data["payment_fare"],
+    }
+
+    tf = _build_flat(flat_data, N)
+    tn = _build_nested(nested_data, N)
+    print(f"  flat   col_names: {tf.col_names}")
+    print(f"  nested col_names: {tn.col_names}")
+    print()
+
+    # Build indexes on the fare column for index-accelerated queries
+    print("Building indexes …")
+    tf.create_index("payment_fare")
+    tn.create_index("payment.fare")
+    print()
+
+    header = f"{'Operation':<45} {'flat (ms)':>12} {'nested (ms)':>13} {'ratio':>8}"
+    print(header)
+    print("-" * len(header))
+
+    def bench(label, flat_fn, nested_fn):
+        t_flat = _timeit(flat_fn, R) * 1000
+        t_nested = _timeit(nested_fn, R) * 1000
+        ratio = t_nested / t_flat if t_flat > 0 else float("nan")
+        print(f"{label:<45} {t_flat:>12.3f} {t_nested:>13.3f} {ratio:>8.3f}x")
+
+    bench(
+        "where (string expr, full scan)",
+        lambda: tf.where("payment_fare > 20"),
+        lambda: tn.where("payment.fare > 20"),
+    )
+
+    bench(
+        "where (string expr, full scan, nrows)",
+        lambda: tf.where("payment_fare > 20").nrows,
+        lambda: tn.where("payment.fare > 20").nrows,
+    )
+
+    bench(
+        "where (LazyExpr, full scan)",
+        lambda: tf.where(tf["payment_fare"] > 20),
+        lambda: tn.where(tn["payment.fare"] > 20),
+    )
+
+    bench(
+        "where (auto index-accelerated, nrows)",
+        lambda: tf.where("payment_fare > 20").nrows,
+        lambda: tn.where("payment.fare > 20").nrows,
+    )
+
+    bench(
+        "column mean (full scan)",
+        lambda: tf["payment_fare"].mean(),
+        lambda: tn["payment.fare"].mean(),
+    )
+
+    bench(
+        "column sum (full scan)",
+        lambda: tf["payment_fare"].sum(),
+        lambda: tn["payment.fare"].sum(),
+    )
+
+    bench(
+        "column min (full scan)",
+        lambda: tf["trip_begin_lon"].min(),
+        lambda: tn["trip.begin.lon"].min(),
+    )
+
+    bench(
+        "multi-column where (string expr, nrows)",
+        lambda: tf.where("trip_begin_lon > -87.7 and payment_fare > 10").nrows,
+        lambda: tn.where("trip.begin.lon > -87.7 and payment.fare > 10").nrows,
+    )
+
+    bench(
+        "sort_by (single leaf)",
+        lambda: tf.sort_by("payment_fare"),
+        lambda: tn.sort_by("payment.fare"),
+    )
+
+    print()
+    print("ratio < 1 means nested is faster; ratio > 1 means flat is faster.")
+    print("Ratios close to 1.0 indicate the nested path adds negligible overhead.")
+
+
+if __name__ == "__main__":
+    main()