From bff2e437711f06f81d59a77b08520e8f8d3c629a Mon Sep 17 00:00:00 2001 From: Andy Stark Date: Thu, 18 Jun 2026 12:44:22 +0100 Subject: [PATCH 01/17] DOC-6763 verify tool for notebook generation --- build/jupyterize/verify.py | 254 +++++++++++++++++++++++++++++++++++++ 1 file changed, 254 insertions(+) create mode 100644 build/jupyterize/verify.py diff --git a/build/jupyterize/verify.py b/build/jupyterize/verify.py new file mode 100644 index 0000000000..4c650424d9 --- /dev/null +++ b/build/jupyterize/verify.py @@ -0,0 +1,254 @@ +#!/usr/bin/env python3 +""" +verify.py - PROTOTYPE notebook verification harness (Python only). + +Takes a Redis docs code-example source file, builds a *test notebook* in which +the REMOVE_START/END blocks (containing the real asserts) are kept as tagged +cells, executes that notebook inside the real BinderHub base image against the +bundled Redis, and reports pass/fail. The asserts are the oracle. + +It can also emit the *shipped* notebook (test cells stripped) so you can see +exactly what would land in binder-launchers. + +Host requirements: Docker + python3 stdlib only (no nbformat needed locally). +The base image supplies the kernel, redis-py, and redis-server. + +Usage: + python build/jupyterize/verify.py [--ship out/demo.ipynb] [--keep] + +This is a prototype to validate the loop end-to-end; the parsing logic would +later fold into jupyterize proper as a "test mode". +""" + +import argparse +import json +import os +import subprocess +import sys +import tempfile + +# Default = the exact digest the python-* launcher branches pin today. +DEFAULT_IMAGE = ( + "us-central1-docker.pkg.dev/redis-learning-378123/binderhub/" + "binder-python-base@sha256:" + "d28356e3f85b5d41c8324fcec7161b3a268a287b9025b590925829fda9aa71c1" +) + +# Markers (Python comment prefix only, for this prototype). +P = "#" +EXAMPLE, BINDER_ID, KERNEL_NAME = "EXAMPLE", "BINDER_ID", "KERNEL_NAME" +HIDE_START, HIDE_END = "HIDE_START", "HIDE_END" +REMOVE_START, REMOVE_END = "REMOVE_START", "REMOVE_END" +STEP_START, STEP_END = "STEP_START", "STEP_END" + + +def _is(line, marker): + s = line.strip() + return s == f"{P} {marker}" or s == f"{P}{marker}" or s.startswith(f"{P} {marker} ") + + +def parse_cells(path): + """Parse a source file into ordered cells. + + Returns a list of dicts: {"source": str, "step": str|None, "test": bool}. + - STEP blocks -> a cell carrying step metadata + - REMOVE blocks -> a cell tagged test=True (kept for verification, stripped on ship) + - everything else (incl. HIDE content) -> context cells (e.g. setup) + """ + with open(path, encoding="utf-8") as f: + lines = f.readlines() + + cells = [] + ctx, step_buf, rem_buf = [], [], [] + in_step, step_name, in_remove = False, None, False + + def flush_ctx(): + if any(ln.strip() for ln in ctx): + cells.append({"source": "".join(ctx).strip("\n"), + "step": None, "test": False}) + ctx.clear() + + for line in lines: + if _is(line, EXAMPLE) or _is(line, BINDER_ID) or _is(line, KERNEL_NAME): + continue + if _is(line, REMOVE_START): + flush_ctx() + in_remove, rem_buf = True, [] + continue + if _is(line, REMOVE_END): + in_remove = False + if any(ln.strip() for ln in rem_buf): + cells.append({"source": "".join(rem_buf).strip("\n"), + "step": None, "test": True}) + continue + if in_remove: + rem_buf.append(line) + continue + if _is(line, HIDE_START) or _is(line, HIDE_END): + continue + if _is(line, STEP_START): + flush_ctx() + in_step, step_name, step_buf = True, line.split(STEP_START, 1)[1].strip(), [] + continue + if _is(line, STEP_END): + if any(ln.strip() for ln in step_buf): + cells.append({"source": "".join(step_buf).strip("\n"), + "step": step_name, "test": False}) + in_step, step_name = False, None + continue + (step_buf if in_step else ctx).append(line) + + flush_ctx() + return cells + + +def to_notebook(cells, include_tests): + """Build an nbformat-4 notebook dict. If include_tests is False, drop test cells.""" + nb_cells = [] + for c in cells: + if c["test"] and not include_tests: + continue + meta = {} + if c["step"]: + meta["step"] = c["step"] + if c["test"]: + meta["tags"] = ["test"] + nb_cells.append({ + "cell_type": "code", + "metadata": meta, + "source": c["source"], + "outputs": [], + "execution_count": None, + }) + return { + "cells": nb_cells, + "metadata": { + "kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, + "language_info": {"name": "python"}, + }, + "nbformat": 4, + "nbformat_minor": 5, + } + + +# Kernel-less driver: exec each code cell in a shared namespace (same ordering +# and shared-state semantics a Jupyter kernel gives), capturing per-cell stdout +# and errors into the same executed-notebook shape report() expects. Used by +# --mode script, which avoids the Jupyter kernel entirely (the kernel's zmq +# handshake hangs under amd64 emulation on Apple Silicon; plain python is fine). +_DRIVER = r''' +import json, sys, io, contextlib, traceback +nb = json.load(open(sys.argv[1])) +ns = {} +for c in nb["cells"]: + if c["cell_type"] != "code": + continue + src = c["source"] + src = "".join(src) if isinstance(src, list) else src + buf = io.StringIO(); c["outputs"] = [] + try: + with contextlib.redirect_stdout(buf): + exec(compile(src, "", "exec"), ns) + except Exception as e: + if buf.getvalue(): + c["outputs"].append({"output_type": "stream", "name": "stdout", "text": buf.getvalue()}) + c["outputs"].append({"output_type": "error", "ename": type(e).__name__, + "evalue": str(e), "traceback": traceback.format_exc().splitlines()}) + continue + if buf.getvalue(): + c["outputs"].append({"output_type": "stream", "name": "stdout", "text": buf.getvalue()}) +json.dump(nb, open(sys.argv[2], "w")) +''' + +_KERNEL_CMD = ( + "jupyter nbconvert --to notebook --execute --allow-errors " + "--ExecutePreprocessor.startup_timeout=300 --ExecutePreprocessor.timeout=300 " + "--output executed.ipynb test.ipynb >/dev/null 2>&1" +) +_SCRIPT_CMD = "python /work/driver.py /work/test.ipynb /work/executed.ipynb" + +_START_REDIS = ( + "cd /usr/src/redis-src && ./redis-server ./redis.conf --daemonize yes " + ">/dev/null 2>&1 && sleep 1 && cd /work && " +) + + +def execute_in_image(notebook, image, mode): + """Run the notebook inside the base image; return the executed notebook dict.""" + with tempfile.TemporaryDirectory() as d: + with open(os.path.join(d, "test.ipynb"), "w", encoding="utf-8") as f: + json.dump(notebook, f) + if mode == "script": + with open(os.path.join(d, "driver.py"), "w", encoding="utf-8") as f: + f.write(_DRIVER) + script = _START_REDIS + (_SCRIPT_CMD if mode == "script" else _KERNEL_CMD) + cmd = [ + "docker", "run", "--rm", "--platform", "linux/amd64", + "-v", f"{d}:/work", image, "bash", "-c", script, + ] + subprocess.run(cmd, check=True) + with open(os.path.join(d, "executed.ipynb"), encoding="utf-8") as f: + return json.load(f) + + +def report(executed): + """Inspect executed cells; return (ok, failures). Print per-cell summary.""" + failures = [] + for i, c in enumerate(executed["cells"]): + if c["cell_type"] != "code": + continue + tags = c["metadata"].get("tags", []) + label = "TEST" if "test" in tags else (c["metadata"].get("step") or "setup") + err = next((o for o in c.get("outputs", []) if o.get("output_type") == "error"), None) + stdout = "".join( + "".join(o.get("text", "")) for o in c.get("outputs", []) + if o.get("output_type") == "stream" + ).strip() + if err: + failures.append((i, label, err["ename"], err.get("evalue", ""))) + print(f" ✗ cell {i:>2} [{label}] -> {err['ename']}: {err.get('evalue','')}") + else: + mark = "·" if "test" in tags else "✓" + extra = f" stdout: {stdout!r}" if stdout and "test" not in tags else "" + print(f" {mark} cell {i:>2} [{label}]{extra}") + return (not failures), failures + + +def main(): + ap = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) + ap.add_argument("source", help="example source .py") + ap.add_argument("--image", default=DEFAULT_IMAGE) + ap.add_argument("--mode", choices=["kernel", "script"], default="kernel", + help="kernel: real nbconvert (CI/amd64). " + "script: kernel-less exec (local/Apple Silicon).") + ap.add_argument("--ship", metavar="PATH", + help="also write the stripped (shipped) notebook here") + args = ap.parse_args() + + cells = parse_cells(args.source) + n_test = sum(c["test"] for c in cells) + print(f"Parsed {len(cells)} cells ({n_test} test, {len(cells)-n_test} shipped) from {args.source}") + + test_nb = to_notebook(cells, include_tests=True) + img_name = args.image.split('@')[0].split('/')[-1] + print(f"Executing test notebook in {img_name} (mode={args.mode}) ...") + executed = execute_in_image(test_nb, args.image, args.mode) + + ok, failures = report(executed) + + if args.ship: + os.makedirs(os.path.dirname(args.ship) or ".", exist_ok=True) + with open(args.ship, "w", encoding="utf-8") as f: + json.dump(to_notebook(cells, include_tests=False), f, indent=1) + print(f"Wrote shipped notebook -> {args.ship}") + + print() + if ok: + print("RESULT: PASS — notebook executes clean and all asserts hold.") + return 0 + print(f"RESULT: FAIL — {len(failures)} cell(s) errored.") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) From a3ab04355628f066b7e6c53061935a51dc22090d Mon Sep 17 00:00:00 2001 From: Andy Stark Date: Thu, 18 Jun 2026 13:38:36 +0100 Subject: [PATCH 02/17] DOC-6763 local dt_time_series.oy to test notebook generation --- .../redis-py/dt_time_series.py | 517 ++++++++++++++++++ 1 file changed, 517 insertions(+) create mode 100644 local_examples/time_series_tutorial/redis-py/dt_time_series.py diff --git a/local_examples/time_series_tutorial/redis-py/dt_time_series.py b/local_examples/time_series_tutorial/redis-py/dt_time_series.py new file mode 100644 index 0000000000..98a2a923cf --- /dev/null +++ b/local_examples/time_series_tutorial/redis-py/dt_time_series.py @@ -0,0 +1,517 @@ +# EXAMPLE: time_series_tutorial +# HIDE_START +""" +Code samples for time series page: + https://redis.io/docs/latest/develop/data-types/timeseries/ +""" + +import redis + +r = redis.Redis(decode_responses=True) +# HIDE_END + +# REMOVE_START +r.delete( + "thermometer:1", "thermometer:2", "thermometer:3", + "rg:1", "rg:2", "rg:3", "rg:4", + "sensor3", + "wind:1", "wind:2", "wind:3", "wind:4", + "hyg:1", "hyg:compacted" +) +# REMOVE_END + +# STEP_START create +res1 = r.ts().create("thermometer:1") +print(res1) # >>> True + +res2 = r.type("thermometer:1") +print(res2) # >>> TSDB-TYPE + +res3 = r.ts().info("thermometer:1") +print(res3) +# >>> {'rules': [], ... 'total_samples': 0, ... +# STEP_END +# REMOVE_START +assert res1 is True +assert res2 == "TSDB-TYPE" +assert res3["total_samples"] == 0 +# REMOVE_END + +# STEP_START create_retention +res4 = r.ts().add("thermometer:2", 1, 10.8, retention_msecs=100) +print(res4) # >>> 1 + +res5 = r.ts().info("thermometer:2") +print(res5) +# >>> {'rules': [], ... 'retention_msecs': 100, ... +# STEP_END +# REMOVE_START +assert res4 == 1 +assert res5["retention_msecs"] == 100 +# REMOVE_END + +# STEP_START create_labels +res6 = r.ts().create( + "thermometer:3", 1, 10.4, + labels={"location": "UK", "type": "Mercury"} +) +print(res6) # >>> 1 + +res7 = r.ts().info("thermometer:3") +print(res7) +# >>> {'rules': [], ... 'labels': {'location': 'UK', 'type': 'Mercury'}, ... +# STEP_END +# REMOVE_START +assert res6 == 1 +assert res7["labels"] == {"location": "UK", "type": "Mercury"} +# REMOVE_END + +# STEP_START madd +res8 = r.ts().madd([ + ("thermometer:1", 1, 9.2), + ("thermometer:1", 2, 9.9), + ("thermometer:2", 2, 10.3) +]) +print(res8) # >>> [1, 2, 2] +# STEP_END +# REMOVE_START +assert res8 == [1, 2, 2] +# REMOVE_END + +# STEP_START get +# The last recorded temperature for thermometer:2 +# was 10.3 at time 2. +res9 = r.ts().get("thermometer:2") +print(res9) # >>> (2, 10.3) +# STEP_END +# REMOVE_START +assert res9 == (2, 10.3) +# REMOVE_END + +# STEP_START range +# Add 5 data points to a time series named "rg:1". +res10 = r.ts().create("rg:1") +print(res10) # >>> True + +res11 = r.ts().madd([ + ("rg:1", 0, 18), + ("rg:1", 1, 14), + ("rg:1", 2, 22), + ("rg:1", 3, 18), + ("rg:1", 4, 24), +]) +print(res11) # >>> [0, 1, 2, 3, 4] + +# Retrieve all the data points in ascending order. +res12 = r.ts().range("rg:1", "-", "+") +print(res12) # >>> [(0, 18.0), (1, 14.0), (2, 22.0), (3, 18.0), (4, 24.0)] + +# Retrieve data points up to time 1 (inclusive). +res13 = r.ts().range("rg:1", "-", 1) +print(res13) # >>> [(0, 18.0), (1, 14.0)] + +# Retrieve data points from time 3 onwards. +res14 = r.ts().range("rg:1", 3, "+") +print(res14) # >>> [(3, 18.0), (4, 24.0)] + +# Retrieve all the data points in descending order. +res15 = r.ts().revrange("rg:1", "-", "+") +print(res15) # >>> [(4, 24.0), (3, 18.0), (2, 22.0), (1, 14.0), (0, 18.0)] + +# Retrieve data points up to time 1 (inclusive), but return them +# in descending order. +res16 = r.ts().revrange("rg:1", "-", 1) +print(res16) # >>> [(1, 14.0), (0, 18.0)] +# STEP_END +# REMOVE_START +assert res10 is True +assert res11 == [0, 1, 2, 3, 4] +assert res12 == [(0, 18.0), (1, 14.0), (2, 22.0), (3, 18.0), (4, 24.0)] +assert res13 == [(0, 18.0), (1, 14.0)] +assert res14 == [(3, 18.0), (4, 24.0)] +assert res15 == [(4, 24.0), (3, 18.0), (2, 22.0), (1, 14.0), (0, 18.0)] +assert res16 == [(1, 14.0), (0, 18.0)] +# REMOVE_END + +# STEP_START range_filter +res17 = r.ts().range("rg:1", "-", "+", filter_by_ts=[0, 2, 4]) +print(res17) # >>> [(0, 18.0), (2, 22.0), (4, 24.0)] + +res18 = r.ts().revrange( + "rg:1", "-", "+", + filter_by_ts=[0, 2, 4], + filter_by_min_value=20, + filter_by_max_value=25, +) +print(res18) # >>> [(4, 24.0), (2, 22.0)] + +res19 = r.ts().revrange( + "rg:1", "-", "+", + filter_by_ts=[0, 2, 4], + filter_by_min_value=22, + filter_by_max_value=22, + count=1, +) +print(res19) # >>> [(2, 22.0)] +# STEP_END +# REMOVE_START +assert res17 == [(0, 18.0), (2, 22.0), (4, 24.0)] +assert res18 == [(4, 24.0), (2, 22.0)] +assert res19 == [(2, 22.0)] +# REMOVE_END + +# STEP_START query_multi +# Create three new "rg:" time series (two in the US +# and one in the UK, with different units) and add some +# data points. +res20 = r.ts().create( + "rg:2", + labels={"location": "us", "unit": "cm"}, +) +print(res20) # >>> True + +res21 = r.ts().create( + "rg:3", + labels={"location": "us", "unit": "in"}, +) +print(res21) # >>> True + +res22 = r.ts().create( + "rg:4", + labels={"location": "uk", "unit": "mm"}, +) +print(res22) # >>> True + +res23 = r.ts().madd([ + ("rg:2", 0, 1.8), + ("rg:3", 0, 0.9), + ("rg:4", 0, 25), +]) +print(res23) # >>> [0, 0, 0] + +res24 = r.ts().madd([ + ("rg:2", 1, 2.1), + ("rg:3", 1, 0.77), + ("rg:4", 1, 18), +]) +print(res24) # >>> [1, 1, 1] + +res25 = r.ts().madd([ + ("rg:2", 2, 2.3), + ("rg:3", 2, 1.1), + ("rg:4", 2, 21), +]) +print(res25) # >>> [2, 2, 2] + +res26 = r.ts().madd([ + ("rg:2", 3, 1.9), + ("rg:3", 3, 0.81), + ("rg:4", 3, 19), +]) +print(res26) # >>> [3, 3, 3] + +res27 = r.ts().madd([ + ("rg:2", 4, 1.78), + ("rg:3", 4, 0.74), + ("rg:4", 4, 23), +]) +print(res27) # >>> [4, 4, 4] + +# Retrieve the last data point from each US time series. If +# you don't specify any labels, an empty array is returned +# for the labels. +res28 = r.ts().mget(["location=us"]) +print(res28) # >>> [{'rg:2': [{}, 4, 1.78]}, {'rg:3': [{}, 4, 0.74]}] + +# Retrieve the same data points, but include the `unit` +# label in the results. +res29 = r.ts().mget(["location=us"], select_labels=["unit"]) +print(res29) # >>> [{'unit': 'cm'}, (4, 1.78), {'unit': 'in'}, (4, 0.74)] + +# Retrieve data points up to time 2 (inclusive) from all +# time series that use millimeters as the unit. Include all +# labels in the results. +res30 = r.ts().mrange( + "-", 2, filters=["unit=mm"], with_labels=True +) +print(res30) +# >>> [{'rg:4': [{'location': 'uk', 'unit': 'mm'}, [(0, 25.4),... + +# Retrieve data points from time 1 to time 3 (inclusive) from +# all time series that use centimeters or millimeters as the unit, +# but only return the `location` label. Return the results +# in descending order of timestamp. +res31 = r.ts().mrevrange( + 1, 3, filters=["unit=(cm,mm)"], select_labels=["location"] +) +print(res31) +# >>> [[{'location': 'uk'}, (3, 19.0), (2, 21.0), (1, 18.0)],... +# STEP_END +# REMOVE_START +assert res20 is True +assert res21 is True +assert res22 is True +assert res23 == [0, 0, 0] +assert res24 == [1, 1, 1] +assert res25 == [2, 2, 2] +assert res26 == [3, 3, 3] +assert res27 == [4, 4, 4] +assert res28 == [{'rg:2': [{}, 4, 1.78]}, {'rg:3': [{}, 4, 0.74]}] +assert res29 == [ + {'rg:2': [{'unit': 'cm'}, 4, 1.78]}, + {'rg:3': [{'unit': 'in'}, 4, 0.74]} +] +assert res30 == [ + { + 'rg:4': [ + {'location': 'uk', 'unit': 'mm'}, + [(0, 25), (1, 18.0), (2, 21.0)] + ] + } +] +assert res31 == [ + {'rg:2': [{'location': 'us'}, [(3, 1.9), (2, 2.3), (1, 2.1)]]}, + {'rg:4': [{'location': 'uk'}, [(3, 19.0), (2, 21.0), (1, 18.0)]]} +] +# REMOVE_END + +# STEP_START agg +res32 = r.ts().range( + "rg:2", "-", "+", + aggregation_type="avg", + bucket_size_msec=2 +) +print(res32) +# >>> [(0, 1.9500000000000002), (2, 2.0999999999999996), (4, 1.78)] +# STEP_END +# REMOVE_START +assert res32 == [ + (0, 1.9500000000000002), (2, 2.0999999999999996), + (4, 1.78) +] +# REMOVE_END + +# STEP_START agg_bucket +res33 = r.ts().create("sensor3") +print(res33) # >>> True + +res34 = r.ts().madd([ + ("sensor3", 10, 1000), + ("sensor3", 20, 2000), + ("sensor3", 30, 3000), + ("sensor3", 40, 4000), + ("sensor3", 50, 5000), + ("sensor3", 60, 6000), + ("sensor3", 70, 7000), +]) +print(res34) # >>> [10, 20, 30, 40, 50, 60, 70] + +res35 = r.ts().range( + "sensor3", 10, 70, + aggregation_type="min", + bucket_size_msec=25 +) +print(res35) +# >>> [(0, 1000.0), (25, 3000.0), (50, 5000.0)] +# STEP_END +# REMOVE_START +assert res33 is True +assert res34 == [10, 20, 30, 40, 50, 60, 70] +assert res35 == [(0, 1000.0), (25, 3000.0), (50, 5000.0)] +# REMOVE_END + +# STEP_START agg_align +res36 = r.ts().range( + "sensor3", 10, 70, + aggregation_type="min", + bucket_size_msec=25, + align="START" +) +print(res36) +# >>> [(10, 1000.0), (35, 4000.0), (60, 6000.0)] +# STEP_END +# REMOVE_START +assert res36 == [(10, 1000.0), (35, 4000.0), (60, 6000.0)] +# REMOVE_END + +# STEP_START agg_multi +res37 = r.ts().create( + "wind:1", + labels={"country": "uk"} +) +print(res37) # >>> True + +res38 = r.ts().create( + "wind:2", + labels={"country": "uk"} +) +print(res38) # >>> True + +res39 = r.ts().create( + "wind:3", + labels={"country": "us"} +) +print(res39) # >>> True + +res40 = r.ts().create( + "wind:4", + labels={"country": "us"} +) +print(res40) # >>> True + +res41 = r.ts().madd([ + ("wind:1", 1, 12), + ("wind:2", 1, 18), + ("wind:3", 1, 5), + ("wind:4", 1, 20), +]) +print(res41) # >>> [1, 1, 1, 1] + +res42 = r.ts().madd([ + ("wind:1", 2, 14), + ("wind:2", 2, 21), + ("wind:3", 2, 4), + ("wind:4", 2, 25), +]) +print(res42) # >>> [2, 2, 2, 2] + +res43 = r.ts().madd([ + ("wind:1", 3, 10), + ("wind:2", 3, 24), + ("wind:3", 3, 8), + ("wind:4", 3, 18), +]) +print(res43) # >>> [3, 3, 3, 3] + +# The result pairs contain the timestamp and the maximum sample value +# for the country at that timestamp. +res44 = r.ts().mrange( + "-", "+", + filters=["country=(us,uk)"], + groupby="country", + reduce="max" +) +print(res44) +# >>> [{'country=uk': [{}, [(1, 18.0), (2, 21.0), (3, 24.0)]]}, ... + +# The result pairs contain the timestamp and the average sample value +# for the country at that timestamp. +res45 = r.ts().mrange( + "-", "+", + filters=["country=(us,uk)"], + groupby="country", + reduce="avg" +) +print(res45) +# >>> [{'country=uk': [{}, [(1, 15.0), (2, 17.5), (3, 17.0)]]}, ... +# STEP_END +# REMOVE_START +assert res37 is True +assert res38 is True +assert res39 is True +assert res40 is True +assert res41 == [1, 1, 1, 1] +assert res42 == [2, 2, 2, 2] +assert res43 == [3, 3, 3, 3] +assert res44 == [ + {'country=uk': [{}, [(1, 18.0), (2, 21.0), (3, 24.0)]]}, + {'country=us': [{}, [(1, 20.0), (2, 25.0), (3, 18.0)]]} +] +assert res45 == [ + {'country=uk': [{}, [(1, 15.0), (2, 17.5), (3, 17.0)]]}, + {'country=us': [{}, [(1, 12.5), (2, 14.5), (3, 13.0)]]} +] +# REMOVE_END + +# STEP_START create_compaction +res45 = r.ts().create("hyg:1") +print(res45) # >>> True + +res46 = r.ts().create("hyg:compacted") +print(res46) # >>> True + +res47 = r.ts().createrule("hyg:1", "hyg:compacted", "min", 3) +print(res47) # >>> True + +res48 = r.ts().info("hyg:1") +print(res48.rules) +# >>> [['hyg:compacted', 3, 'MIN', 0]] + +res49 = r.ts().info("hyg:compacted") +print(res49.source_key) # >>> 'hyg:1' +# STEP_END +# REMOVE_START +assert res45 is True +assert res46 is True +assert res47 is True +assert res48.rules == [['hyg:compacted', 3, 'MIN', 0]] +assert res49.source_key == 'hyg:1' +# REMOVE_END + +# STEP_START comp_add +res50 = r.ts().madd([ + ("hyg:1", 0, 75), + ("hyg:1", 1, 77), + ("hyg:1", 2, 78), +]) +print(res50) # >>> [0, 1, 2] + +res51 = r.ts().range("hyg:compacted", "-", "+") +print(res51) # >>> [] + +res52 = r.ts().add("hyg:1", 3, 79) +print(res52) # >>> 3 + +res53 = r.ts().range("hyg:compacted", "-", "+") +print(res53) # >>> [(0, 75.0)] +# STEP_END +# REMOVE_START +assert res50 == [0, 1, 2] +assert res51 == [] +assert res52 == 3 +assert res53 == [(0, 75.0)] +# REMOVE_END + +# STEP_START del +res54 = r.ts().info("thermometer:1") +print(res54.total_samples) # >>> 2 +print(res54.first_timestamp) # >>> 1 +print(res54.last_timestamp) # >>> 2 + +res55 = r.ts().add("thermometer:1", 3, 9.7) +print(res55) # >>> 3 + +res56 = r.ts().info("thermometer:1") +print(res56.total_samples) # >>> 3 +print(res56.first_timestamp) # >>> 1 +print(res56.last_timestamp) # >>> 3 + +res57 = r.ts().delete("thermometer:1", 1, 2) +print(res57) # >>> 2 + +res58 = r.ts().info("thermometer:1") +print(res58.total_samples) # >>> 1 +print(res58.first_timestamp) # >>> 3 +print(res58.last_timestamp) # >>> 3 + +res59 = r.ts().delete("thermometer:1", 3, 3) +print(res59) # >>> 1 + +res60 = r.ts().info("thermometer:1") +print(res60.total_samples) # >>> 0 +# STEP_END +# REMOVE_START +assert res54.total_samples == 2 +assert res54.first_timestamp == 1 +assert res54.last_timestamp == 2 +assert res55 == 3 +assert res56.total_samples == 3 +assert res56.first_timestamp == 1 +assert res56.last_timestamp == 3 +assert res57 == 2 +assert res58.total_samples == 1 +assert res58.first_timestamp == 3 +assert res58.last_timestamp == 3 +assert res59 == 1 +assert res60.total_samples == 0 +# REMOVE_END From 4c5a1b8266d716c79ba642ee3e0e9095f9cf2675 Mon Sep 17 00:00:00 2001 From: Andy Stark Date: Thu, 18 Jun 2026 13:47:54 +0100 Subject: [PATCH 03/17] DOC-6763 added Binder ID to time series example --- local_examples/time_series_tutorial/redis-py/dt_time_series.py | 1 + 1 file changed, 1 insertion(+) diff --git a/local_examples/time_series_tutorial/redis-py/dt_time_series.py b/local_examples/time_series_tutorial/redis-py/dt_time_series.py index 98a2a923cf..2f52eca166 100644 --- a/local_examples/time_series_tutorial/redis-py/dt_time_series.py +++ b/local_examples/time_series_tutorial/redis-py/dt_time_series.py @@ -1,4 +1,5 @@ # EXAMPLE: time_series_tutorial +# BINDER_ID python-dt-timeseries # HIDE_START """ Code samples for time series page: From dbe25721ba6b34312481c25c73b6ca95bbc641e7 Mon Sep 17 00:00:00 2001 From: Andy Stark Date: Thu, 18 Jun 2026 14:47:06 +0100 Subject: [PATCH 04/17] DOC-6763 update verify.py script --- build/jupyterize/verify.py | 68 ++++++++++++++++++++++++++++++++------ 1 file changed, 58 insertions(+), 10 deletions(-) diff --git a/build/jupyterize/verify.py b/build/jupyterize/verify.py index 4c650424d9..800f67d604 100644 --- a/build/jupyterize/verify.py +++ b/build/jupyterize/verify.py @@ -27,12 +27,40 @@ import sys import tempfile -# Default = the exact digest the python-* launcher branches pin today. -DEFAULT_IMAGE = ( - "us-central1-docker.pkg.dev/redis-learning-378123/binderhub/" - "binder-python-base@sha256:" - "d28356e3f85b5d41c8324fcec7161b3a268a287b9025b590925829fda9aa71c1" -) +# Source file extension -> language. Drives base-image selection. +EXT_LANGUAGE = { + ".py": "python", ".js": "node.js", ".go": "go", + ".java": "java", ".cs": "c#", ".php": "php", ".rb": "ruby", ".rs": "rust", +} + +# Language -> BinderHub base image. Pin a digest here once confirmed against the +# real image. Only python is verified today; other languages must be passed via +# --image until their binder--base digests are added here. +BASE_IMAGES = { + "python": ( + "us-central1-docker.pkg.dev/redis-learning-378123/binderhub/" + "binder-python-base@sha256:" + "d28356e3f85b5d41c8324fcec7161b3a268a287b9025b590925829fda9aa71c1" + ), +} + + +def detect_language(path): + return EXT_LANGUAGE.get(os.path.splitext(path)[1].lower()) + + +def resolve_image(path, override): + """Pick the base image: explicit --image wins, else map from source language.""" + if override: + return override + lang = detect_language(path) + image = BASE_IMAGES.get(lang) + if not image: + raise SystemExit( + f"No base image known for language {lang!r} ({path}). " + f"Pass --image, or add the digest to BASE_IMAGES." + ) + return image # Markers (Python comment prefix only, for this prototype). P = "#" @@ -47,6 +75,19 @@ def _is(line, marker): return s == f"{P} {marker}" or s == f"{P}{marker}" or s.startswith(f"{P} {marker} ") +def read_markers(path): + """Pull BINDER_ID / KERNEL_NAME from the source header (for reporting/targeting).""" + info = {"binder_id": None, "kernel_name": None} + with open(path, encoding="utf-8") as f: + for line in f: + s = line.strip() + if s.startswith(f"{P} {BINDER_ID} "): + info["binder_id"] = s.split(BINDER_ID, 1)[1].strip() + elif s.startswith(f"{P} {KERNEL_NAME} "): + info["kernel_name"] = s.split(KERNEL_NAME, 1)[1].strip() + return info + + def parse_cells(path): """Parse a source file into ordered cells. @@ -114,6 +155,7 @@ def to_notebook(cells, include_tests): if c["test"]: meta["tags"] = ["test"] nb_cells.append({ + "id": f"cell{len(nb_cells)}", "cell_type": "code", "metadata": meta, "source": c["source"], @@ -217,7 +259,8 @@ def report(executed): def main(): ap = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) ap.add_argument("source", help="example source .py") - ap.add_argument("--image", default=DEFAULT_IMAGE) + ap.add_argument("--image", default=None, + help="base image (defaults to the source language's image)") ap.add_argument("--mode", choices=["kernel", "script"], default="kernel", help="kernel: real nbconvert (CI/amd64). " "script: kernel-less exec (local/Apple Silicon).") @@ -225,14 +268,19 @@ def main(): help="also write the stripped (shipped) notebook here") args = ap.parse_args() + image = resolve_image(args.source, args.image) + markers = read_markers(args.source) cells = parse_cells(args.source) n_test = sum(c["test"] for c in cells) - print(f"Parsed {len(cells)} cells ({n_test} test, {len(cells)-n_test} shipped) from {args.source}") + print(f"Source: {args.source}") + print(f"Language: {detect_language(args.source)} | " + f"target branch: {markers['binder_id'] or '(no BINDER_ID)'}") + print(f"Parsed {len(cells)} cells ({n_test} test, {len(cells)-n_test} shipped)") test_nb = to_notebook(cells, include_tests=True) - img_name = args.image.split('@')[0].split('/')[-1] + img_name = image.split('@')[0].split('/')[-1] print(f"Executing test notebook in {img_name} (mode={args.mode}) ...") - executed = execute_in_image(test_nb, args.image, args.mode) + executed = execute_in_image(test_nb, image, args.mode) ok, failures = report(executed) From fd49185285d8a0a577685fb7887b3857da04e16b Mon Sep 17 00:00:00 2001 From: Andy Stark Date: Thu, 18 Jun 2026 15:04:15 +0100 Subject: [PATCH 05/17] DOC-6763 update jupyterize tool --- build/jupyterize/jupyterize.py | 16 +++++-- build/jupyterize/notebook_builder.py | 5 +++ build/jupyterize/parser.py | 26 +++++++++++- build/jupyterize/test_jupyterize.py | 63 ++++++++++++++++++++++++++++ 4 files changed, 106 insertions(+), 4 deletions(-) diff --git a/build/jupyterize/jupyterize.py b/build/jupyterize/jupyterize.py index 6b783f1413..1cc7989aa1 100755 --- a/build/jupyterize/jupyterize.py +++ b/build/jupyterize/jupyterize.py @@ -26,7 +26,7 @@ -def jupyterize(input_file, output_file=None, verbose=False): +def jupyterize(input_file, output_file=None, verbose=False, with_tests=False): """ Convert code example file to Jupyter notebook. @@ -34,6 +34,8 @@ def jupyterize(input_file, output_file=None, verbose=False): input_file: Path to input file output_file: Path to output file (default: same name with .ipynb extension) verbose: Enable verbose logging + with_tests: When True, keep REMOVE blocks as cells tagged 'test' (for a + verification/test notebook) instead of dropping them. Returns: str: Path to output file @@ -61,7 +63,7 @@ def jupyterize(input_file, output_file=None, verbose=False): validator.validate_file(input_file, language) # Parse file - parser = FileParser(language) + parser = FileParser(language, keep_tests=with_tests) parsed_blocks = parser.parse(input_file) if not parsed_blocks: @@ -122,13 +124,21 @@ def main(): help='Enable verbose logging' ) + parser.add_argument( + '--with-tests', + action='store_true', + help="Keep REMOVE blocks as cells tagged 'test' (for a verification " + "notebook) instead of dropping them" + ) + args = parser.parse_args() try: output_file = jupyterize( args.input_file, args.output_file, - args.verbose + args.verbose, + args.with_tests ) print(f"Successfully created: {output_file}") return 0 diff --git a/build/jupyterize/notebook_builder.py b/build/jupyterize/notebook_builder.py index 8c0c77b832..97e8e03cf4 100644 --- a/build/jupyterize/notebook_builder.py +++ b/build/jupyterize/notebook_builder.py @@ -121,6 +121,11 @@ def _create_cells(self, parsed_blocks): else: logging.debug(f"Created cell {i} (preamble)") + # Tag test cells (from REMOVE blocks in keep_tests mode) so they can + # be executed for verification and stripped before shipping. + if block.get('is_test'): + cell.metadata['tags'] = ['test'] + cells.append(cell) logging.info(f"Created {len(cells)} notebook cells") diff --git a/build/jupyterize/parser.py b/build/jupyterize/parser.py index 8cc8500ae5..61676c770a 100644 --- a/build/jupyterize/parser.py +++ b/build/jupyterize/parser.py @@ -39,15 +39,20 @@ def _check_marker(line, prefix, marker): class FileParser: """Parses source files with special comment markers.""" - def __init__(self, language): + def __init__(self, language, keep_tests=False): """ Initialize parser for a specific language. Args: language: Programming language (e.g., 'python', 'c#') + keep_tests: When True, REMOVE blocks are emitted as cells tagged + 'test' (in source order) instead of being dropped. Used to + build a test notebook whose asserts can be executed; strip the + tagged cells (e.g. nbconvert TagRemovePreprocessor) to ship. """ self.language = language self.prefix = PREFIXES[language.lower()] + self.keep_tests = keep_tests def parse(self, file_path): """ @@ -68,6 +73,7 @@ def parse(self, file_path): step_name = None step_lines = [] preamble_lines = [] + remove_lines = [] cells = [] seen_step_names = set() @@ -87,6 +93,18 @@ def parse(self, file_path): if _check_marker(line, self.prefix, REMOVE_START): if in_remove: logging.warning(f"Line {line_num}: Nested REMOVE_START detected") + if self.keep_tests: + # Flush pending code first so the test cell lands *after* + # the code it checks (asserts reference its variables). + if in_step and step_lines: + cells.append({'code': ''.join(step_lines), + 'step_name': step_name, 'is_test': False}) + step_lines = [] + elif preamble_lines: + cells.append({'code': ''.join(preamble_lines), + 'step_name': None, 'is_test': False}) + preamble_lines = [] + remove_lines = [] in_remove = True logging.debug(f"Line {line_num}: Entering REMOVE block") continue @@ -94,11 +112,17 @@ def parse(self, file_path): if _check_marker(line, self.prefix, REMOVE_END): if not in_remove: logging.warning(f"Line {line_num}: REMOVE_END without REMOVE_START") + if self.keep_tests and remove_lines: + cells.append({'code': ''.join(remove_lines), + 'step_name': None, 'is_test': True}) + remove_lines = [] in_remove = False logging.debug(f"Line {line_num}: Exiting REMOVE block") continue if in_remove: + if self.keep_tests: + remove_lines.append(line) continue # Skip HIDE markers (but include content) diff --git a/build/jupyterize/test_jupyterize.py b/build/jupyterize/test_jupyterize.py index 5563fb532a..3e3eaff92b 100644 --- a/build/jupyterize/test_jupyterize.py +++ b/build/jupyterize/test_jupyterize.py @@ -866,6 +866,7 @@ def main(): test_language_detection() test_basic_conversion() test_hide_remove_blocks() + test_keep_tests_mode() test_javascript_file() # Edge case tests @@ -1185,6 +1186,68 @@ def test_csharp_for_loop_braces(): os.unlink(output_file) +def test_keep_tests_mode(): + """Test that --with-tests keeps REMOVE blocks as tagged 'test' cells.""" + print("\nTesting keep-tests (test notebook) mode...") + + test_content = """# EXAMPLE: test_keep +# HIDE_START +import redis +r = redis.Redis() +# HIDE_END + +# REMOVE_START +r.delete("k") +# REMOVE_END + +# STEP_START setit +res = r.set("k", "v") +print(res) +# STEP_END +# REMOVE_START +assert res is True +# REMOVE_END +""" + + with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f: + f.write(test_content) + test_file = f.name + + ship_file = test_file.replace('.py', '.ipynb') + test_nb_file = test_file.replace('.py', '.test.ipynb') + try: + # Default (ship) mode: REMOVE content excluded, no test tags. + jupyterize(test_file, ship_file, verbose=False) + with open(ship_file) as f: + ship = json.load(f) + ship_src = ''.join(''.join(c['source']) for c in ship['cells']) + assert 'assert res is True' not in ship_src + assert 'r.delete' not in ship_src + assert all('test' not in c['metadata'].get('tags', []) for c in ship['cells']) + + # With tests: REMOVE blocks kept as cells tagged 'test', in order. + jupyterize(test_file, test_nb_file, verbose=False, with_tests=True) + with open(test_nb_file) as f: + tnb = json.load(f) + test_cells = [c for c in tnb['cells'] if 'test' in c['metadata'].get('tags', [])] + assert len(test_cells) == 2, f"expected 2 test cells, got {len(test_cells)}" + tnb_src = ''.join(''.join(c['source']) for c in tnb['cells']) + assert 'assert res is True' in tnb_src + assert 'r.delete' in tnb_src + # The shipped notebook is the test notebook minus the tagged cells. + non_test = [c for c in tnb['cells'] if 'test' not in c['metadata'].get('tags', [])] + assert len(non_test) == len(ship['cells']) + # Test cells carry no step metadata (so stripping leaves steps intact). + assert all('step' not in c['metadata'] for c in test_cells) + + print("✓ Keep-tests mode test passed") + + finally: + for p in (test_file, ship_file, test_nb_file): + if os.path.exists(p): + os.unlink(p) + + if __name__ == '__main__': sys.exit(main()) From 1b77675de303dd5a0d881629afaabc00b490da1e Mon Sep 17 00:00:00 2001 From: Andy Stark Date: Thu, 18 Jun 2026 15:30:43 +0100 Subject: [PATCH 06/17] DOC-6763 updates to verify script --- build/jupyterize/verify.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/jupyterize/verify.py b/build/jupyterize/verify.py index 800f67d604..0aa943548b 100644 --- a/build/jupyterize/verify.py +++ b/build/jupyterize/verify.py @@ -64,7 +64,7 @@ def resolve_image(path, override): # Markers (Python comment prefix only, for this prototype). P = "#" -EXAMPLE, BINDER_ID, KERNEL_NAME = "EXAMPLE", "BINDER_ID", "KERNEL_NAME" +EXAMPLE, BINDER_ID, KERNEL_NAME = "EXAMPLE:", "BINDER_ID", "KERNEL_NAME" HIDE_START, HIDE_END = "HIDE_START", "HIDE_END" REMOVE_START, REMOVE_END = "REMOVE_START", "REMOVE_END" STEP_START, STEP_END = "STEP_START", "STEP_END" From ce2457842e8d43ced77679e668b39cbb3c98f075 Mon Sep 17 00:00:00 2001 From: Andy Stark Date: Fri, 19 Jun 2026 10:18:05 +0100 Subject: [PATCH 07/17] DOC-6763 update verify script --- build/jupyterize/verify.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/build/jupyterize/verify.py b/build/jupyterize/verify.py index 0aa943548b..a0c531e38e 100644 --- a/build/jupyterize/verify.py +++ b/build/jupyterize/verify.py @@ -37,10 +37,11 @@ # real image. Only python is verified today; other languages must be passed via # --image until their binder--base digests are added here. BASE_IMAGES = { + # Current python base: redis-py 8.0.0 on Redis 8.2.2 (rebuilt 2026-06-19). "python": ( "us-central1-docker.pkg.dev/redis-learning-378123/binderhub/" "binder-python-base@sha256:" - "d28356e3f85b5d41c8324fcec7161b3a268a287b9025b590925829fda9aa71c1" + "bbb6b1f137115974f938f74acfcc50203565899343efe1dcfa5a72e48383f346" ), } From d4bd16f8d1cf95df976efa0201c0bfebe81488e3 Mon Sep 17 00:00:00 2001 From: Andy Stark Date: Fri, 19 Jun 2026 10:18:34 +0100 Subject: [PATCH 08/17] DOC-6763 add Binder ID to Python vec set example --- .../vecset_tutorial/redis-py/dt_vec_set.py | 287 ++++++++++++++++++ 1 file changed, 287 insertions(+) create mode 100644 local_examples/vecset_tutorial/redis-py/dt_vec_set.py diff --git a/local_examples/vecset_tutorial/redis-py/dt_vec_set.py b/local_examples/vecset_tutorial/redis-py/dt_vec_set.py new file mode 100644 index 0000000000..f626ed6714 --- /dev/null +++ b/local_examples/vecset_tutorial/redis-py/dt_vec_set.py @@ -0,0 +1,287 @@ +# EXAMPLE: vecset_tutorial +# BINDER_ID python-dt-vector-set +# HIDE_START +""" +Code samples for Vector set doc pages: + https://redis.io/docs/latest/develop/data-types/vector-sets/ +""" + +import redis + +from redis.commands.vectorset.commands import ( + QuantizationOptions +) + +r = redis.Redis(decode_responses=True) +# HIDE_END + +# REMOVE_START +r.delete( + "points", "quantSetQ8", "quantSetNoQ", + "quantSetBin", "setNotReduced", "setReduced" +) +# REMOVE_END + +# STEP_START vadd +res1 = r.vset().vadd("points", [1.0, 1.0], "pt:A") +print(res1) # >>> 1 + +res2 = r.vset().vadd("points", [-1.0, -1.0], "pt:B") +print(res2) # >>> 1 + +res3 = r.vset().vadd("points", [-1.0, 1.0], "pt:C") +print(res3) # >>> 1 + +res4 = r.vset().vadd("points", [1.0, -1.0], "pt:D") +print(res4) # >>> 1 + +res5 = r.vset().vadd("points", [1.0, 0], "pt:E") +print(res5) # >>> 1 + +res6 = r.type("points") +print(res6) # >>> vectorset +# STEP_END +# REMOVE_START +assert res1 == 1 +assert res2 == 1 +assert res3 == 1 +assert res4 == 1 +assert res5 == 1 + +assert res6 == "vectorset" +# REMOVE_END + +# STEP_START vcardvdim +res7 = r.vset().vcard("points") +print(res7) # >>> 5 + +res8 = r.vset().vdim("points") +print(res8) # >>> 2 +# STEP_END +# REMOVE_START +assert res7 == 5 +assert res8 == 2 +# REMOVE_END + +# STEP_START vemb +res9 = r.vset().vemb("points", "pt:A") +print(res9) # >>> [0.9999999403953552, 0.9999999403953552] + +res10 = r.vset().vemb("points", "pt:B") +print(res10) # >>> [-0.9999999403953552, -0.9999999403953552] + +res11 = r.vset().vemb("points", "pt:C") +print(res11) # >>> [-0.9999999403953552, 0.9999999403953552] + +res12 = r.vset().vemb("points", "pt:D") +print(res12) # >>> [0.9999999403953552, -0.9999999403953552] + +res13 = r.vset().vemb("points", "pt:E") +print(res13) # >>> [1, 0] +# STEP_END +# REMOVE_START +assert 1 - res9[0] < 0.001 +assert 1 - res9[1] < 0.001 +assert 1 + res10[0] < 0.001 +assert 1 + res10[1] < 0.001 +assert 1 + res11[0] < 0.001 +assert 1 - res11[1] < 0.001 +assert 1 - res12[0] < 0.001 +assert 1 + res12[1] < 0.001 +assert res13 == [1, 0] +# REMOVE_END + +# STEP_START attr +res14 = r.vset().vsetattr("points", "pt:A", { + "name": "Point A", + "description": "First point added" +}) +print(res14) # >>> 1 + +res15 = r.vset().vgetattr("points", "pt:A") +print(res15) +# >>> {'name': 'Point A', 'description': 'First point added'} + +res16 = r.vset().vsetattr("points", "pt:A", "") +print(res16) # >>> 1 + +res17 = r.vset().vgetattr("points", "pt:A") +print(res17) # >>> None +# STEP_END +# REMOVE_START +assert res14 == 1 +assert res15 == {"name": "Point A", "description": "First point added"} +assert res16 == 1 +assert res17 is None +# REMOVE_END + +# STEP_START vrem +res18 = r.vset().vadd("points", [0, 0], "pt:F") +print(res18) # >>> 1 + +res19 = r.vset().vcard("points") +print(res19) # >>> 6 + +res20 = r.vset().vrem("points", "pt:F") +print(res20) # >>> 1 + +res21 = r.vset().vcard("points") +print(res21) # >>> 5 +# STEP_END +# REMOVE_START +assert res18 == 1 +assert res19 == 6 +assert res20 == 1 +assert res21 == 5 +# REMOVE_END + +# STEP_START vsim_basic +res22 = r.vset().vsim("points", [0.9, 0.1]) +print(res22) +# >>> ['pt:E', 'pt:A', 'pt:D', 'pt:C', 'pt:B'] +# STEP_END +# REMOVE_START +assert res22 == ["pt:E", "pt:A", "pt:D", "pt:C", "pt:B"] +# REMOVE_END + +# STEP_START vsim_options +res23 = r.vset().vsim( + "points", "pt:A", + with_scores=True, + count=4 +) +print(res23) +# >>> {'pt:A': 1.0, 'pt:E': 0.8535534143447876, 'pt:D': 0.5, 'pt:C': 0.5} +# STEP_END +# REMOVE_START +assert res23["pt:A"] == 1.0 +assert res23["pt:C"] == 0.5 +assert res23["pt:D"] == 0.5 +assert res23["pt:E"] - 0.85 < 0.005 +# REMOVE_END + +# STEP_START vsim_filter +res24 = r.vset().vsetattr("points", "pt:A", { + "size": "large", + "price": 18.99 +}) +print(res24) # >>> 1 + +res25 = r.vset().vsetattr("points", "pt:B", { + "size": "large", + "price": 35.99 +}) +print(res25) # >>> 1 + +res26 = r.vset().vsetattr("points", "pt:C", { + "size": "large", + "price": 25.99 +}) +print(res26) # >>> 1 + +res27 = r.vset().vsetattr("points", "pt:D", { + "size": "small", + "price": 21.00 +}) +print(res27) # >>> 1 + +res28 = r.vset().vsetattr("points", "pt:E", { + "size": "small", + "price": 17.75 +}) +print(res28) # >>> 1 + +# Return elements in order of distance from point A whose +# `size` attribute is `large`. +res29 = r.vset().vsim( + "points", "pt:A", + filter='.size == "large"' +) +print(res29) # >>> ['pt:A', 'pt:C', 'pt:B'] + +# Return elements in order of distance from point A whose size is +# `large` and whose price is greater than 20.00. +res30 = r.vset().vsim( + "points", "pt:A", + filter='.size == "large" && .price > 20.00' +) +print(res30) # >>> ['pt:C', 'pt:B'] +# STEP_END +# REMOVE_START +assert res24 == 1 +assert res25 == 1 +assert res26 == 1 +assert res27 == 1 +assert res28 == 1 + +assert res30 == ['pt:C', 'pt:B'] +# REMOVE_END + +# STEP_START add_quant +# Import `QuantizationOptions` enum using: +# +# from redis.commands.vectorset.commands import ( +# QuantizationOptions +# ) +res31 = r.vset().vadd( + "quantSetQ8", [1.262185, 1.958231], + "quantElement", + quantization=QuantizationOptions.Q8 +) +print(res31) # >>> 1 + +res32 = r.vset().vemb("quantSetQ8", "quantElement") +print(f"Q8: {res32}") +# >>> Q8: [1.2643694877624512, 1.958230972290039] + +res33 = r.vset().vadd( + "quantSetNoQ", [1.262185, 1.958231], + "quantElement", + quantization=QuantizationOptions.NOQUANT +) +print(res33) # >>> 1 + +res34 = r.vset().vemb("quantSetNoQ", "quantElement") +print(f"NOQUANT: {res34}") +# >>> NOQUANT: [1.262184977531433, 1.958230972290039] + +res35 = r.vset().vadd( + "quantSetBin", [1.262185, 1.958231], + "quantElement", + quantization=QuantizationOptions.BIN +) +print(res35) # >>> 1 + +res36 = r.vset().vemb("quantSetBin", "quantElement") +print(f"BIN: {res36}") +# >>> BIN: [1, 1] +# STEP_END +# REMOVE_START +assert res31 == 1 +# REMOVE_END + +# STEP_START add_reduce +# Create a list of 300 arbitrary values. +values = [x / 299 for x in range(300)] + +res37 = r.vset().vadd( + "setNotReduced", + values, + "element" +) +print(res37) # >>> 1 + +res38 = r.vset().vdim("setNotReduced") +print(res38) # >>> 300 + +res39 = r.vset().vadd( + "setReduced", + values, + "element", + reduce_dim=100 +) +print(res39) # >>> 1 + +res40 = r.vset().vdim("setReduced") # >>> 100 +print(res40) +# STEP_END From 7c049636ad9fa1d667236649f710305d38989f1b Mon Sep 17 00:00:00 2001 From: Andy Stark Date: Fri, 19 Jun 2026 11:52:10 +0100 Subject: [PATCH 09/17] DOC-6763 build sync scripts --- build/jupyterize/notebook_builder.py | 5 + build/jupyterize/sync_notebook.py | 294 +++++++++++++++++++++++++++ 2 files changed, 299 insertions(+) create mode 100644 build/jupyterize/sync_notebook.py diff --git a/build/jupyterize/notebook_builder.py b/build/jupyterize/notebook_builder.py index 97e8e03cf4..14d1f94377 100644 --- a/build/jupyterize/notebook_builder.py +++ b/build/jupyterize/notebook_builder.py @@ -144,6 +144,11 @@ def _create_notebook(self, cells): nb = new_notebook() nb.cells = cells + # Deterministic cell ids: nbformat assigns random ids otherwise, which + # would make every regeneration of an unchanged example produce a diff. + for i, cell in enumerate(nb.cells): + cell['id'] = f"cell{i}" + # Set kernel metadata kernel_spec = get_kernel_spec(self.language) diff --git a/build/jupyterize/sync_notebook.py b/build/jupyterize/sync_notebook.py new file mode 100644 index 0000000000..25a96c1611 --- /dev/null +++ b/build/jupyterize/sync_notebook.py @@ -0,0 +1,294 @@ +#!/usr/bin/env python3 +""" +sync_notebook.py - generate notebook(s) from an example source and sync them to +the matching binder-launchers branch. + +Deterministic glue for the docs -> binder-launchers pipeline: + changed source file -> read BINDER_ID -> jupyterize ship + test notebooks + -> create/update the binder-launchers branch -> (optionally) push. + +Verification happens downstream in the binder-launchers verify gate; this script +ALSO runs a local pre-check (verify.py) and refuses to commit a notebook whose +asserts don't pass, so a broken example never gets synced. + +For an EXISTING branch it updates demo.ipynb + demo.test.ipynb and upgrades the +workflow/.dockerignore to the verify-gated versions, but leaves the Dockerfile +(and its pinned base-image digest) untouched. For a NEW branch it scaffolds the +full set, pinning the base image from LANG_BASE_IMAGE. + +Usage: + python build/jupyterize/sync_notebook.py [--repo PATH] + [--push] [--dry-run] [--no-verify] [--mode script|kernel] +""" + +import argparse +import os +import subprocess +import sys + +HERE = os.path.dirname(os.path.abspath(__file__)) +JUPYTERIZE = os.path.join(HERE, "jupyterize.py") +VERIFY = os.path.join(HERE, "verify.py") +# Default sibling clone: /binder-launchers next to /docs +DEFAULT_REPO = os.path.normpath(os.path.join(HERE, "..", "..", "..", "binder-launchers")) + +EXT_LANGUAGE = { + ".py": "python", ".js": "node.js", ".go": "go", ".java": "java", + ".cs": "c#", ".php": "php", ".rb": "ruby", ".rs": "rust", +} + +# Base image used only when SCAFFOLDING A NEW branch. Existing branches keep +# their own FROM line. Pin a digest here once confirmed for that language. +LANG_BASE_IMAGE = { + "python": ( + "us-central1-docker.pkg.dev/redis-learning-378123/binderhub/" + "binder-python-base@sha256:" + "bbb6b1f137115974f938f74acfcc50203565899343efe1dcfa5a72e48383f346" + ), +} + +DOCKERIGNORE = "Dockerfile\ngha-creds*\ndemo.test.ipynb\n" + +README = ( + "# Binder Launchers\n\n" + "This branch contains a Jupyter notebook environment that builds on a\n" + "pre-built Redis-enabled base image. The notebook is generated from the\n" + "matching example source in the redis/docs repo - do not edit it by hand;\n" + "regenerate it with build/jupyterize/sync_notebook.py.\n" +) + +# Verify-before-deploy workflow (gates the reusable build-and-deploy on a +# successful execution of demo.test.ipynb against the branch's base image). +WORKFLOW = """name: Build and deploy binder images + +on: + push: + branches-ignore: + - main + paths: + - 'Dockerfile' + - 'demo.ipynb' + - 'demo.test.ipynb' + - '.github/workflows/main.yml' + +jobs: + # Gate: execute the test notebook (which still contains the REMOVE-block + # asserts) inside the exact base image this branch ships on. Any cell error + # or failed assert fails the job, which blocks the build-and-deploy below. + # GitHub runners are amd64, so the Jupyter kernel runs natively (no emulation). + verify: + runs-on: ubuntu-latest + permissions: + contents: 'read' + id-token: 'write' + steps: + - name: 'Checkout' + uses: 'actions/checkout@v4' + + - name: 'Google auth' + uses: 'google-github-actions/auth@v2' + with: + project_id: '${{ secrets.PROJECT_ID }}' + service_account: '${{ secrets.SERVICE_ACCOUNT }}' + workload_identity_provider: '${{ secrets.WORKLOAD_IDENTITY_PROVIDER }}' + + - name: 'Set up Cloud SDK' + uses: 'google-github-actions/setup-gcloud@v2' + with: + project_id: '${{ secrets.PROJECT_ID }}' + + - name: 'Execute test notebook against the base image' + run: |- + set -euo pipefail + gcloud auth configure-docker us-central1-docker.pkg.dev --quiet + BASE=$(awk '/^FROM/ {print $2; exit}' Dockerfile) + echo "Verifying demo.test.ipynb against base image: ${BASE}" + docker pull "${BASE}" + docker run --rm -v "${PWD}:/work" "${BASE}" bash -c ' + cd /usr/src/redis-src && ./redis-server ./redis.conf --daemonize yes >/dev/null 2>&1 && sleep 1 + cd /work && jupyter nbconvert --to notebook --execute \\ + --ExecutePreprocessor.startup_timeout=120 \\ + --ExecutePreprocessor.timeout=120 \\ + --output /tmp/executed.ipynb demo.test.ipynb' + + call-reusable-workflow: + needs: verify + uses: redis/binder-launchers/.github/workflows/build-and-deploy.yml@main + with: + branch_name: ${{ github.ref_name }} + secrets: inherit +""" + + +def fail(msg): + print(f"ERROR: {msg}", file=sys.stderr) + sys.exit(1) + + +def git(repo, *args, capture=True): + return subprocess.run( + ["git", "-C", repo, *args], + check=True, text=True, + capture_output=capture, + ) + + +def detect_language(path): + return EXT_LANGUAGE.get(os.path.splitext(path)[1].lower()) + + +def read_binder_id(path): + """Read the BINDER_ID marker (works for # and // comment prefixes).""" + with open(path, encoding="utf-8") as f: + for line in f: + s = line.strip() + for pre in ("#", "//"): + if s.startswith(f"{pre} BINDER_ID "): + return s.split("BINDER_ID", 1)[1].strip() + return None + + +def remote_branch_exists(repo, branch): + r = subprocess.run( + ["git", "-C", repo, "ls-remote", "--heads", "origin", branch], + capture_output=True, text=True, + ) + return bool(r.stdout.strip()) + + +def local_verify(source, mode, image): + """Run verify.py on the source against the branch's base image.""" + print(f"\n--- Local pre-check: verify.py --mode {mode} ---") + cmd = [sys.executable, VERIFY, source, "--mode", mode] + if image: + cmd += ["--image", image] + r = subprocess.run(cmd) + return r.returncode == 0 + + +def read_from(dockerfile): + """Return the image ref on the Dockerfile's FROM line.""" + with open(dockerfile, encoding="utf-8") as f: + for line in f: + if line.strip().startswith("FROM "): + return line.strip().split(None, 1)[1].strip() + return None + + +def jupyterize(source, out_path, with_tests=False): + cmd = [sys.executable, JUPYTERIZE, source, "-o", out_path] + if with_tests: + cmd.append("--with-tests") + r = subprocess.run(cmd, capture_output=True, text=True) + if r.returncode != 0: + fail("jupyterize failed (is nbformat installed in this env?):\n" + + (r.stderr or r.stdout)) + + +def write(path, content): + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "w", encoding="utf-8") as f: + f.write(content) + + +def main(): + ap = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + ap.add_argument("source", help="example source file under local_examples/") + ap.add_argument("--repo", default=DEFAULT_REPO, + help=f"binder-launchers clone (default: {DEFAULT_REPO})") + ap.add_argument("--push", action="store_true", + help="push the branch to origin after committing") + ap.add_argument("--dry-run", action="store_true", + help="generate + verify + write files, but do not commit/push") + ap.add_argument("--no-verify", action="store_true", + help="skip the local verify.py pre-check") + ap.add_argument("--mode", choices=["kernel", "script"], default="script", + help="verify mode for the local pre-check (default: script)") + args = ap.parse_args() + + source = os.path.abspath(args.source) + repo = os.path.abspath(args.repo) + if not os.path.isfile(source): + fail(f"source not found: {source}") + if not os.path.isdir(os.path.join(repo, ".git")): + fail(f"not a git repo: {repo}") + + language = detect_language(source) + branch = read_binder_id(source) + if not branch: + fail(f"no BINDER_ID marker in {source}; cannot determine target branch") + print(f"Source: {source}") + print(f"Language: {language} Target branch: {branch}") + + # Guard against clobbering work in the binder-launchers clone. + status = git(repo, "status", "--porcelain").stdout.strip() + if status: + fail(f"binder-launchers working tree is dirty; commit/stash first:\n{status}") + + git(repo, "fetch", "--quiet", "origin") + is_new = not remote_branch_exists(repo, branch) + + if is_new: + print(f"Branch '{branch}' does not exist -> scaffolding a new one.") + base_image = LANG_BASE_IMAGE.get(language) + if not base_image: + fail(f"no base image known for language {language!r}; " + f"add it to LANG_BASE_IMAGE to scaffold new {language} branches") + git(repo, "switch", "--quiet", "-c", branch, "origin/main") + write(os.path.join(repo, "Dockerfile"), + f"FROM {base_image}\nADD demo.ipynb .\n") + write(os.path.join(repo, "README.md"), README) + else: + print(f"Branch '{branch}' exists -> updating (Dockerfile preserved).") + git(repo, "switch", "--quiet", branch) + git(repo, "reset", "--hard", "--quiet", f"origin/{branch}") + base_image = read_from(os.path.join(repo, "Dockerfile")) + if not base_image: + fail("could not read FROM line from the branch Dockerfile") + + # Local pre-check against the branch's ACTUAL base image: refuse to sync a + # notebook whose asserts don't pass on the image it will ship on. + if not args.no_verify: + if not local_verify(source, args.mode, base_image): + fail("local verification failed - not syncing") + print("--- Local pre-check PASSED ---") + + # Generate notebooks straight into the branch working tree. + jupyterize(source, os.path.join(repo, "demo.ipynb"), with_tests=False) + jupyterize(source, os.path.join(repo, "demo.test.ipynb"), with_tests=True) + # Always (re)apply the verify gate + ignore rules so existing plain branches + # get upgraded too. Dockerfile is left as-is for existing branches. + write(os.path.join(repo, ".github", "workflows", "main.yml"), WORKFLOW) + write(os.path.join(repo, ".dockerignore"), DOCKERIGNORE) + + git(repo, "add", "-A") + diff = git(repo, "status", "--porcelain").stdout.strip() + if not diff: + print("\nNothing changed - branch already up to date.") + return 0 + print(f"\nChanges staged on '{branch}':\n{diff}") + + if args.dry_run: + print("\n[dry-run] not committing or pushing.") + return 0 + + msg = (f"Sync {os.path.basename(source)} notebook via jupyterize\n\n" + f"Generated demo.ipynb + demo.test.ipynb from the docs example " + f"source and {'scaffolded' if is_new else 'updated'} this branch.") + git(repo, "commit", "--quiet", "-m", msg) + print(f"Committed to '{branch}'.") + + if args.push: + git(repo, "push", "--quiet", + *(["-u", "origin", branch] if is_new else []), capture=False) + print(f"Pushed '{branch}' to origin.") + else: + print("Not pushed (use --push). Review the commit, then push when ready.") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) From cd0e1ec2fe68877a33114c2f92054f5fdc9fbb67 Mon Sep 17 00:00:00 2001 From: Andy Stark Date: Fri, 19 Jun 2026 14:15:06 +0100 Subject: [PATCH 10/17] DOC-6763 improvements to jupyterize tool --- build/jupyterize/config.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/build/jupyterize/config.py b/build/jupyterize/config.py index 4f35ae8333..a32f7cde6d 100644 --- a/build/jupyterize/config.py +++ b/build/jupyterize/config.py @@ -25,8 +25,10 @@ } }, 'node.js': { - 'name': 'javascript', - 'display_name': 'JavaScript (Node.js)', + # 'jslab' is the kernel the binder-nodejs-base image actually installs + # (from the tslab package); the older 'javascript' name is not present. + 'name': 'jslab', + 'display_name': 'JavaScript', 'language': 'javascript', 'language_info': { 'name': 'javascript', From 8e9fcbe7003cbf99dedb010c24e6401ae0f29742 Mon Sep 17 00:00:00 2001 From: Andy Stark Date: Fri, 19 Jun 2026 15:24:35 +0100 Subject: [PATCH 11/17] DOC-6763 improvements to jupyterize tool --- build/jupyterize/config.py | 3 +- build/jupyterize/js-notebook-findings.md | 149 +++++++++++++++++++++++ build/jupyterize/test_jupyterize.py | 72 +++++++++++ build/jupyterize/unwrapper.py | 10 ++ 4 files changed, 233 insertions(+), 1 deletion(-) create mode 100644 build/jupyterize/js-notebook-findings.md diff --git a/build/jupyterize/config.py b/build/jupyterize/config.py index a32f7cde6d..8ac91a2cde 100644 --- a/build/jupyterize/config.py +++ b/build/jupyterize/config.py @@ -38,7 +38,8 @@ } }, 'go': { - 'name': 'gophernotes', + # binder-go-base installs GoNB (kernel name 'gonb'), not gophernotes. + 'name': 'gonb', 'display_name': 'Go', 'language': 'go', 'language_info': { diff --git a/build/jupyterize/js-notebook-findings.md b/build/jupyterize/js-notebook-findings.md new file mode 100644 index 0000000000..a545c8851c --- /dev/null +++ b/build/jupyterize/js-notebook-findings.md @@ -0,0 +1,149 @@ +# JavaScript (node-redis) notebook findings + +Investigation date: 2026-06-19. Context: extending the jupyterize → verify → +binder-launchers pipeline beyond Python, starting with node-redis (the +time-series tutorial, `dt-time-series.js` from the node-redis doctests). + +## TL;DR + +- **jupyterize itself handles JS fine.** node-redis examples are flat + top-level-`await` scripts with `//` markers — no unwrapping needed, just like + Python. Generation (ship + test notebooks, asserts retained) works. +- **One real jupyterize bug fixed:** it emitted kernel name `javascript`, but + the `binder-nodejs-base` image has no such kernel — its JS kernel is `jslab` + (from the `tslab` package). Changed `config.py` node.js → `jslab`. (Every + existing `nodejs-*` notebook declares the non-existent `javascript`, so they + are mis-kernel'd for automated execution too.) +- **The blocker is the kernel (`tslab`/`jslab`), not jupyterize.** It is a poor + fit for the automated nbconvert assert-gate, for two compounding reasons + (below). Recommended path: verify non-Python examples via their **native + harness** (`node script.js`), and treat notebook-kernel execution as a lighter + "does it run/display in Binder" check. + +## Environment facts (binder-nodejs-base @sha256:8c3563d8…) + +- JS kernels installed: `jslab` and `tslab` (both from the `tslab` npm package). + `jslab` = `tslab kernel --js`. There is **no** `javascript` or `ijavascript` + kernel. +- `jslab` **does** start and execute under amd64 emulation on Apple Silicon + (unlike Python's ipykernel, which hangs under qemu). So local JS testing is + possible — but see the reliability caveats below. +- node-redis is **v5.12.1** (ESM), installed at `/home/jovyan/node_modules` with + empty `NODE_PATH`. Node only resolves `redis` when the notebook runs from + `/home/jovyan` (which Binder does via `ADD demo.ipynb .` → WORKDIR). A verify + step that runs from elsewhere must `cd /home/jovyan` or set `NODE_PATH`. + +## Blocker 1 — tslab hardcodes type-checking + +`tslab` runs the TypeScript compiler over each JS cell and sets +`checkJs: true` **in code** (`converter.js:~221`), passed directly to the +compiler — it is **not** merged with any user `tsconfig.json`. Execution is +gated on pre-emit diagnostics (`converter.js:272`). + +Consequence: node-redis v5's heavily-generic return types don't survive tslab's +**cross-cell `.d.ts` declaration emission** (each cell's vars are emitted to a +declaration file the next cell imports). They collapse to `string`, so e.g. +`info.totalSamples` / `res.sourceKey` fail with *"Property does not exist on +type 'string'"* and the cell never runs. + +Things that do **not** fix it: +- A `tsconfig.json` with `checkJs:false` — ignored (hardcoded value wins). +- `// @ts-nocheck` per cell — the bad type lives in the emitted dependency + `.d.ts`, not the annotated cell. + +What does get past it: patching the vendored file in the image +(`sed -i 's/checkJs: true/checkJs: false/' …/tslab/dist/converter.js`, needs +root at build time). Cells then execute. But that exposes Blocker 2. + +## Blocker 2 — tslab's error reporting through nbconvert is unreliable + +With `checkJs:false`, runtime behaviour through nbconvert is inconsistent: +- A **standalone failing assert** correctly raises `CellExecutionError` + (nbconvert exits non-zero) — so the gate *can* catch errors. +- But the **correct** full notebook *also* fails without `--allow-errors` + (some cell returns an error-status reply), while *with* `--allow-errors` it + shows **no error outputs at all** and step cells emit **no stdout**. + +So there is no clean "good → pass / broken → fail" signal from tslab+nbconvert. +This is the real reason node verification via the notebook kernel isn't viable +as-is. It is a tslab limitation; Python's ipykernel (the reference kernel) +reports errors and outputs cleanly, which is why Python "just worked". + +## Recommendation for non-Python verification + +Verify the example in its **native test harness** rather than through the +notebook kernel. The source files *are* the client repos' doctests, designed to +run as `node script.js` / `go test` / etc., where asserts gate via process exit. +Split the two concerns: + +1. **Correctness gate** = native runner (reliable assert gating). +2. **Notebook check** = "executes/displays in the Binder kernel" (lighter; for + JS still needs the `checkJs:false` image patch so tslab doesn't reject valid + JS). + +jupyterize (generation) is unaffected and remains the deterministic core. + +## Cross-client probe (2026-06-19): which kernels gate? + +Ran a two-question probe (does jupyterize's kernel name match the image; does a +deliberately-failing cell gate through nbconvert) against all four base images. + +| Client | Kernel (image) | jupyterize name | Name OK? | Good cell runs? | Failing cell gates? | +|--------|----------------|-----------------|----------|-----------------|---------------------| +| Python (redis-py) | ipykernel `python3` | `python3` | ✓ | ✓ | ✓ exit 1, `error` output | +| Java (Jedis) | IJava `java` | `java` | ✓ | ✓ (prints 42) | ✓ exit 1, `EvalException` | +| C# (NRedisStack) | .NET Interactive `.net-csharp` | `.net-csharp` | ✓ | ✓ (prints 42) | ✓ exit 1, `Error` | +| Go (go-redis) | GoNB `gonb` | ~~`gophernotes`~~ → fixed to `gonb` | was ✗, now ✓ | ✓ (prints 42) | ✗ panic → stream, **exit 0** | +| Node (node-redis) | tslab `jslab` | ~~`javascript`~~ → fixed to `jslab` | was ✗, now ✓ | ✓ | ✗ error → stream, **exit 0** | + +**Pattern:** in-process kernels (IPython, JShell/IJava, .NET Interactive) raise +proper Jupyter `error` messages, so the nbconvert assert-gate works. Kernels +that compile-and-run a subprocess (tslab→node, GoNB→go) capture the subprocess +stderr as a *stream* and don't propagate failure status — so the gate is hollow. + +**Implications:** +- **Java & C#**: the notebook-kernel verify gate works, same as Python. Remaining + risk is jupyterize's regex *unwrapper* (these examples are wrapped in + class/method scaffolding, unlike the flat Python/Node scripts) — a + generation-correctness question, not a kernel one. +- **Go & Node**: notebook-kernel gating does not work. Verify via the native + harness (`go test`, `node script.js`) instead; treat notebook execution as a + lighter "displays/runs in Binder" check (Node also needs the tslab + `checkJs:false` image patch). +- Kernel-name fixes applied in `config.py`: node.js → `jslab`, go → `gonb`. + +## Java / Jedis end-to-end attempt (2026-06-19) + +Ran the time-series example (`TimeSeriesTutorialExample.java` from the jedis +doctests) through the full workflow. Two findings: + +1. **The unwrapper works well.** From a `public class { @Test public void run() + { … } }` wrapper, jupyterize correctly stripped the class / `@Test` / method / + `package` lines and the junit asserts, and hoisted the real imports + (`RedisClient`, `timeseries.*`, `java.util.*`) to the top — producing clean + flat JShell statements matching the existing `jedis-dt-list` notebook shape. +2. **Two real issues:** + - **Blocker — jedis version lag.** binder-java-base ships **jedis 5.1.0** + (has `UnifiedJedis`, NOT `RedisClient`). The example uses `RedisClient` + (jedis 6.x), so cell 0's `import redis.clients.jedis.RedisClient` fails with + "cannot find symbol" and cascades to all cells. Needs a base-image jedis + bump to 6.x — same shape as the Python AR*/redis-py version lags. + - **Unwrapper bug — trailing close braces.** The wrapper's closing `}` (method) + and `}` (class) are in the LAST cell, but jupyterize unwraps each cell + independently and the opening `{`s are in cell 0 — so its brace-balancing + can't pair them, and the final cell keeps `}\n}`. This breaks the last + cell's compile even after a jedis bump. Affects all wrapped languages + (Java/C#/Go). Go's config has a `closing_braces` pattern that strips + orphan `}` lines; Java/C# need the same (or a global trailing-brace pass). + +Net: Jedis is pipeline-ready *pending* (a) a base-image jedis 6.x bump and (b) +the trailing-brace unwrapper fix. The hard parts — unwrapping and IJava error +gating — are sound. Branch NOT created (would be red on both counts). + +## Open questions + +- Why was `ijavascript` rejected? If those reasons don't extend to a **Deno** + Jupyter kernel, Deno runs JS/TS without tslab's checking quirks and may be a + cleaner kernel choice. +- Whether to patch `checkJs:false` into `binder-nodejs-base` regardless, since + notebooks won't even *display*/run in the kernel without it. diff --git a/build/jupyterize/test_jupyterize.py b/build/jupyterize/test_jupyterize.py index 3e3eaff92b..63d078be8f 100644 --- a/build/jupyterize/test_jupyterize.py +++ b/build/jupyterize/test_jupyterize.py @@ -867,6 +867,7 @@ def main(): test_basic_conversion() test_hide_remove_blocks() test_keep_tests_mode() + test_trailing_brace_orphans() test_javascript_file() # Edge case tests @@ -1248,6 +1249,77 @@ def test_keep_tests_mode(): os.unlink(p) +def test_trailing_brace_orphans(): + """Orphan wrapper close-braces (in a later cell) are stripped, but balanced + block braces in the same example are preserved.""" + print("\nTesting orphan trailing-brace removal across cells...") + + # The class/method wrapper opens in the first cell; a teardown statement and + # the wrapper's closing braces land in a trailing context cell (not a + # braces-only cell, so it isn't skipped). A balanced for-loop sits in a step. + test_content = """// EXAMPLE: test_trailing_braces +import redis.clients.jedis.UnifiedJedis; + +public class TrailingBraceExample { + public void run() { + UnifiedJedis jedis = new UnifiedJedis("redis://localhost:6379"); + + // STEP_START loop + for (int i = 0; i < 2; i++) { + System.out.println(i); + } + // STEP_END + + jedis.close(); + } +} +""" + + with tempfile.NamedTemporaryFile(mode='w', suffix='.java', delete=False) as f: + f.write(test_content) + test_file = f.name + + try: + output_file = test_file.replace('.java', '.ipynb') + jupyterize(test_file, output_file, verbose=False) + + with open(output_file) as f: + nb = json.load(f) + + # No wrapper scaffolding survives anywhere. + all_src = '\n'.join(''.join(c['source']) for c in nb['cells']) + assert 'public class' not in all_src + assert 'public void run' not in all_src + + # The balanced for-loop keeps its own closing brace. + loop_cell = next(c for c in nb['cells'] + if 'for (int i' in ''.join(c['source'])) + loop_src = ''.join(loop_cell['source']) + assert loop_src.count('{') == loop_src.count('}'), \ + f"balanced loop braces altered: {loop_src!r}" + + # The teardown cell keeps jedis.close() but loses the orphan wrapper '}'. + close_cell = next(c for c in nb['cells'] + if 'jedis.close()' in ''.join(c['source'])) + close_src = ''.join(close_cell['source']).rstrip() + assert close_src.endswith('jedis.close();'), \ + f"orphan braces not stripped: {close_src!r}" + + # No kept cell is left brace-positive (orphan trailing closes). + for c in nb['cells']: + src = ''.join(c['source']) + assert src.count('}') <= src.count('{'), \ + f"cell has orphan closing braces: {src!r}" + + print("✓ Orphan trailing-brace removal test passed") + + finally: + if os.path.exists(test_file): + os.unlink(test_file) + if os.path.exists(output_file): + os.unlink(output_file) + + if __name__ == '__main__': sys.exit(main()) diff --git a/build/jupyterize/unwrapper.py b/build/jupyterize/unwrapper.py index e58a3bb4ad..cb9e7b5f2c 100644 --- a/build/jupyterize/unwrapper.py +++ b/build/jupyterize/unwrapper.py @@ -223,5 +223,15 @@ def unwrap(self, code): logging.debug(f"Removing {braces_removed} trailing closing braces") code = _remove_trailing_braces(code, braces_removed) + # Strip any remaining orphan trailing closing braces. A class/method + # wrapper spans cells (opening braces in the first cell, closing braces + # in the last), so per-cell removal above leaves the trailing closes + # behind. Bound the strip to this cell's net brace imbalance so balanced + # bodies (for/foreach/lambda blocks) keep their own closing braces. + net_orphans = code.count('}') - code.count('{') + if net_orphans > 0: + logging.debug(f"Removing {net_orphans} orphan trailing closing braces") + code = _remove_trailing_braces(code, net_orphans) + return code From d35d6bfc089409cdfbac5bc2869b92d13ef2ce6e Mon Sep 17 00:00:00 2001 From: Andy Stark Date: Fri, 19 Jun 2026 15:41:16 +0100 Subject: [PATCH 12/17] OOC-6763 Bugbot fixes --- build/jupyterize/sync_notebook.py | 42 ++++++++++++++----- build/jupyterize/verify.py | 32 +++++++++++++- .../vecset_tutorial/redis-py/dt_vec_set.py | 1 + 3 files changed, 62 insertions(+), 13 deletions(-) diff --git a/build/jupyterize/sync_notebook.py b/build/jupyterize/sync_notebook.py index 25a96c1611..11f18097e5 100644 --- a/build/jupyterize/sync_notebook.py +++ b/build/jupyterize/sync_notebook.py @@ -156,16 +156,28 @@ def remote_branch_exists(repo, branch): return bool(r.stdout.strip()) -def local_verify(source, mode, image): - """Run verify.py on the source against the branch's base image.""" - print(f"\n--- Local pre-check: verify.py --mode {mode} ---") - cmd = [sys.executable, VERIFY, source, "--mode", mode] +def local_verify(notebook, mode, image): + """Execute the generated notebook in the base image; True if it passes.""" + print(f"\n--- Local pre-check: verify.py --notebook --mode {mode} ---") + cmd = [sys.executable, VERIFY, "--notebook", notebook, "--mode", mode] if image: cmd += ["--image", image] r = subprocess.run(cmd) return r.returncode == 0 +def restore_clone(repo, orig_branch, target, is_new): + """Return the binder-launchers clone to its pre-run clean state (used on + non-committing exits so a dry-run/failed sync doesn't block the next run).""" + git(repo, "reset", "--hard", "--quiet") + subprocess.run(["git", "-C", repo, "clean", "-fdq"], check=False) + if orig_branch and orig_branch != target: + git(repo, "switch", "--quiet", orig_branch) + if is_new: + subprocess.run(["git", "-C", repo, "branch", "-D", target], + capture_output=True) + + def read_from(dockerfile): """Return the image ref on the Dockerfile's FROM line.""" with open(dockerfile, encoding="utf-8") as f: @@ -228,6 +240,8 @@ def main(): if status: fail(f"binder-launchers working tree is dirty; commit/stash first:\n{status}") + # Remember where the clone started so non-committing exits can restore it. + orig_branch = git(repo, "rev-parse", "--abbrev-ref", "HEAD").stdout.strip() git(repo, "fetch", "--quiet", "origin") is_new = not remote_branch_exists(repo, branch) @@ -249,16 +263,20 @@ def main(): if not base_image: fail("could not read FROM line from the branch Dockerfile") - # Local pre-check against the branch's ACTUAL base image: refuse to sync a - # notebook whose asserts don't pass on the image it will ship on. + # Generate notebooks straight into the branch working tree. + test_nb_path = os.path.join(repo, "demo.test.ipynb") + jupyterize(source, os.path.join(repo, "demo.ipynb"), with_tests=False) + jupyterize(source, test_nb_path, with_tests=True) + + # Local pre-check against the branch's ACTUAL base image, run on the + # GENERATED test notebook (not a re-parse), so the exact artifact that ships + # is what gets verified. Refuse to sync if its asserts don't pass. if not args.no_verify: - if not local_verify(source, args.mode, base_image): + if not local_verify(test_nb_path, args.mode, base_image): + restore_clone(repo, orig_branch, branch, is_new) fail("local verification failed - not syncing") print("--- Local pre-check PASSED ---") - # Generate notebooks straight into the branch working tree. - jupyterize(source, os.path.join(repo, "demo.ipynb"), with_tests=False) - jupyterize(source, os.path.join(repo, "demo.test.ipynb"), with_tests=True) # Always (re)apply the verify gate + ignore rules so existing plain branches # get upgraded too. Dockerfile is left as-is for existing branches. write(os.path.join(repo, ".github", "workflows", "main.yml"), WORKFLOW) @@ -268,11 +286,13 @@ def main(): diff = git(repo, "status", "--porcelain").stdout.strip() if not diff: print("\nNothing changed - branch already up to date.") + restore_clone(repo, orig_branch, branch, is_new) return 0 print(f"\nChanges staged on '{branch}':\n{diff}") if args.dry_run: - print("\n[dry-run] not committing or pushing.") + restore_clone(repo, orig_branch, branch, is_new) + print("\n[dry-run] not committing or pushing; clone restored.") return 0 msg = (f"Sync {os.path.basename(source)} notebook via jupyterize\n\n" diff --git a/build/jupyterize/verify.py b/build/jupyterize/verify.py index a0c531e38e..d175709b49 100644 --- a/build/jupyterize/verify.py +++ b/build/jupyterize/verify.py @@ -114,7 +114,14 @@ def flush_ctx(): if _is(line, EXAMPLE) or _is(line, BINDER_ID) or _is(line, KERNEL_NAME): continue if _is(line, REMOVE_START): - flush_ctx() + # Flush an open step first so the REMOVE test cell lands *after* the + # step code that defines the variables its asserts reference. + if in_step and any(ln.strip() for ln in step_buf): + cells.append({"source": "".join(step_buf).strip("\n"), + "step": step_name, "test": False}) + step_buf = [] + else: + flush_ctx() in_remove, rem_buf = True, [] continue if _is(line, REMOVE_END): @@ -259,7 +266,11 @@ def report(executed): def main(): ap = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) - ap.add_argument("source", help="example source .py") + ap.add_argument("source", nargs="?", help="example source .py") + ap.add_argument("--notebook", metavar="PATH", + help="verify a prebuilt .ipynb directly (e.g. jupyterize's " + "output) instead of re-parsing a source file; needs " + "--image") ap.add_argument("--image", default=None, help="base image (defaults to the source language's image)") ap.add_argument("--mode", choices=["kernel", "script"], default="kernel", @@ -269,6 +280,23 @@ def main(): help="also write the stripped (shipped) notebook here") args = ap.parse_args() + # Notebook mode: execute a prebuilt notebook as-is (no re-parsing), so the + # exact artifact that ships is what gets verified. + if args.notebook: + if not args.image: + raise SystemExit("--image is required with --notebook") + with open(args.notebook, encoding="utf-8") as f: + test_nb = json.load(f) + img_name = args.image.split('@')[0].split('/')[-1] + print(f"Verifying {args.notebook} in {img_name} (mode={args.mode}) ...") + executed = execute_in_image(test_nb, args.image, args.mode) + ok, _ = report(executed) + print() + print("RESULT: PASS" if ok else "RESULT: FAIL") + return 0 if ok else 1 + + if not args.source: + raise SystemExit("provide a source file, or --notebook PATH") image = resolve_image(args.source, args.image) markers = read_markers(args.source) cells = parse_cells(args.source) diff --git a/local_examples/vecset_tutorial/redis-py/dt_vec_set.py b/local_examples/vecset_tutorial/redis-py/dt_vec_set.py index f626ed6714..c40227b295 100644 --- a/local_examples/vecset_tutorial/redis-py/dt_vec_set.py +++ b/local_examples/vecset_tutorial/redis-py/dt_vec_set.py @@ -214,6 +214,7 @@ assert res27 == 1 assert res28 == 1 +assert res29 == ['pt:A', 'pt:C', 'pt:B'] assert res30 == ['pt:C', 'pt:B'] # REMOVE_END From 3381dfbb047524df378a062f2673dfd294d24e61 Mon Sep 17 00:00:00 2001 From: Andy Stark Date: Fri, 19 Jun 2026 16:08:14 +0100 Subject: [PATCH 13/17] DOC-6763 more from the Bugbot --- build/jupyterize/parser.py | 23 +++++++++------ build/jupyterize/sync_notebook.py | 47 +++++++++++++++++++++++++++---- build/jupyterize/unwrapper.py | 35 +++++++++++++++++++---- 3 files changed, 85 insertions(+), 20 deletions(-) diff --git a/build/jupyterize/parser.py b/build/jupyterize/parser.py index 61676c770a..9baa12254c 100644 --- a/build/jupyterize/parser.py +++ b/build/jupyterize/parser.py @@ -68,7 +68,7 @@ def parse(self, file_path): lines = f.readlines() # State tracking - in_remove = False + remove_depth = 0 in_step = False step_name = None step_lines = [] @@ -89,10 +89,14 @@ def parse(self, file_path): logging.debug(f"Line {line_num}: Skipping BINDER_ID marker") continue - # Handle REMOVE blocks + # Handle REMOVE blocks. Nested markers are absorbed into the + # outer block (track depth) so a nested REMOVE_START doesn't discard + # the lines collected for the outer block. if _check_marker(line, self.prefix, REMOVE_START): - if in_remove: + if remove_depth > 0: logging.warning(f"Line {line_num}: Nested REMOVE_START detected") + remove_depth += 1 + continue if self.keep_tests: # Flush pending code first so the test cell lands *after* # the code it checks (asserts reference its variables). @@ -105,22 +109,25 @@ def parse(self, file_path): 'step_name': None, 'is_test': False}) preamble_lines = [] remove_lines = [] - in_remove = True + remove_depth = 1 logging.debug(f"Line {line_num}: Entering REMOVE block") continue if _check_marker(line, self.prefix, REMOVE_END): - if not in_remove: + if remove_depth == 0: logging.warning(f"Line {line_num}: REMOVE_END without REMOVE_START") + continue + remove_depth -= 1 + if remove_depth > 0: + continue # closing a nested block; keep collecting if self.keep_tests and remove_lines: cells.append({'code': ''.join(remove_lines), 'step_name': None, 'is_test': True}) remove_lines = [] - in_remove = False logging.debug(f"Line {line_num}: Exiting REMOVE block") continue - if in_remove: + if remove_depth > 0: if self.keep_tests: remove_lines.append(line) continue @@ -194,7 +201,7 @@ def parse(self, file_path): logging.debug(f"Saved final preamble cell ({len(preamble_lines)} lines)") # Check for unclosed blocks - if in_remove: + if remove_depth > 0: logging.warning("File ended with unclosed REMOVE block") if in_step: logging.warning("File ended with unclosed STEP block") diff --git a/build/jupyterize/sync_notebook.py b/build/jupyterize/sync_notebook.py index 11f18097e5..ffccbd8bd7 100644 --- a/build/jupyterize/sync_notebook.py +++ b/build/jupyterize/sync_notebook.py @@ -47,6 +47,17 @@ ), } +# Languages whose Jupyter kernel surfaces runtime errors as proper Jupyter +# errors, so the notebook verify gate (and the local pre-check) actually catch +# failing asserts. Compile-and-subprocess kernels (Go gonb, Node jslab) report +# runtime errors as stream output and can exit 0 - the gate only catches their +# compile/import errors, so verify those clients via a native harness. +GATING_LANGUAGES = {"python", "java", "c#"} + +# verify.py's kernel-less --mode script driver executes Python; it is only valid +# for Python notebooks. Other languages must use --mode kernel. +SCRIPT_MODE_LANGUAGES = {"python"} + DOCKERIGNORE = "Dockerfile\ngha-creds*\ndemo.test.ipynb\n" README = ( @@ -73,9 +84,15 @@ jobs: # Gate: execute the test notebook (which still contains the REMOVE-block - # asserts) inside the exact base image this branch ships on. Any cell error - # or failed assert fails the job, which blocks the build-and-deploy below. - # GitHub runners are amd64, so the Jupyter kernel runs natively (no emulation). + # asserts) inside the exact base image this branch ships on, before deploy. + # GitHub runners are amd64, so the Jupyter kernel runs natively. + # + # NOTE: this reliably gates failing asserts only for IN-PROCESS kernels + # (Python/Java/C#), which surface errors as Jupyter error messages. Compile- + # and-subprocess kernels (Go `gonb`, Node `jslab`) report runtime errors as + # stream output and can still exit 0, so for those this catches compile/import + # errors but NOT runtime assert failures - verify those with a native harness + # (`go test`, `node script.js`). See build/jupyterize/js-notebook-findings.md. verify: runs-on: ubuntu-latest permissions: @@ -217,8 +234,11 @@ def main(): help="generate + verify + write files, but do not commit/push") ap.add_argument("--no-verify", action="store_true", help="skip the local verify.py pre-check") - ap.add_argument("--mode", choices=["kernel", "script"], default="script", - help="verify mode for the local pre-check (default: script)") + ap.add_argument("--mode", choices=["kernel", "script", "auto"], + default="auto", + help="verify mode for the local pre-check. 'auto' (default) " + "picks script for Python, kernel for other languages " + "(the script driver only runs Python).") args = ap.parse_args() source = os.path.abspath(args.source) @@ -235,6 +255,21 @@ def main(): print(f"Source: {source}") print(f"Language: {language} Target branch: {branch}") + # Resolve the pre-check mode: the kernel-less script driver only runs Python. + mode = args.mode + if mode == "auto": + mode = "script" if language in SCRIPT_MODE_LANGUAGES else "kernel" + elif mode == "script" and language not in SCRIPT_MODE_LANGUAGES: + fail(f"--mode script only works for Python; {language!r} needs " + f"--mode kernel (the script driver executes Python).") + + # Warn when the kernel can't gate runtime errors (asserts won't fail CI). + if language not in GATING_LANGUAGES: + print(f"WARNING: {language}'s kernel reports runtime errors as stream " + f"output, so neither this pre-check nor the CI gate catches " + f"failing asserts (only compile/import errors). Verify {language} " + f"examples with a native harness.") + # Guard against clobbering work in the binder-launchers clone. status = git(repo, "status", "--porcelain").stdout.strip() if status: @@ -272,7 +307,7 @@ def main(): # GENERATED test notebook (not a re-parse), so the exact artifact that ships # is what gets verified. Refuse to sync if its asserts don't pass. if not args.no_verify: - if not local_verify(test_nb_path, args.mode, base_image): + if not local_verify(test_nb_path, mode, base_image): restore_clone(repo, orig_branch, branch, is_new) fail("local verification failed - not syncing") print("--- Local pre-check PASSED ---") diff --git a/build/jupyterize/unwrapper.py b/build/jupyterize/unwrapper.py index cb9e7b5f2c..1c79c5e508 100644 --- a/build/jupyterize/unwrapper.py +++ b/build/jupyterize/unwrapper.py @@ -141,6 +141,32 @@ def _remove_trailing_braces(code, count): return '\n'.join(result) +def _strip_trailing_orphan_braces(code): + """ + Strip orphan closing braces left when a class/method wrapper's opening was + removed from an earlier cell. + + Only CONTIGUOUS trailing lone-'}' lines are removed (stopping at the first + real content line), and at most as many as the cell has unmatched closes + (`}` minus `{`). This preserves the closing braces of balanced blocks + (for/foreach/lambda bodies) that legitimately sit inside the cell. + """ + net = code.count('}') - code.count('{') + if net <= 0: + return code + + lines = code.split('\n') + while net > 0 and lines: + if lines[-1].strip() == '': + lines.pop() # drop trailing blank lines + elif re.match(r'^\s*\}\s*$', lines[-1]): + lines.pop() # drop an orphan closing brace + net -= 1 + else: + break # hit real content; stop + return '\n'.join(lines) + + class CodeUnwrapper: """Removes language-specific structural wrappers from code.""" @@ -226,12 +252,9 @@ def unwrap(self, code): # Strip any remaining orphan trailing closing braces. A class/method # wrapper spans cells (opening braces in the first cell, closing braces # in the last), so per-cell removal above leaves the trailing closes - # behind. Bound the strip to this cell's net brace imbalance so balanced - # bodies (for/foreach/lambda blocks) keep their own closing braces. - net_orphans = code.count('}') - code.count('{') - if net_orphans > 0: - logging.debug(f"Removing {net_orphans} orphan trailing closing braces") - code = _remove_trailing_braces(code, net_orphans) + # behind. Only contiguous trailing lone-'}' lines are removed, bounded by + # the cell's net brace imbalance, so balanced bodies keep their braces. + code = _strip_trailing_orphan_braces(code) return code From a93ee9557b4d018bf00f02ac29b00b4021eb5548 Mon Sep 17 00:00:00 2001 From: Andy Stark Date: Fri, 19 Jun 2026 16:18:16 +0100 Subject: [PATCH 14/17] DOC-6763 more from the Bugbot --- build/jupyterize/sync_notebook.py | 37 +++++++++++++++++-- .../redis-py/dt_time_series.py | 3 +- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/build/jupyterize/sync_notebook.py b/build/jupyterize/sync_notebook.py index ffccbd8bd7..7d1c2f3d0f 100644 --- a/build/jupyterize/sync_notebook.py +++ b/build/jupyterize/sync_notebook.py @@ -173,6 +173,25 @@ def remote_branch_exists(repo, branch): return bool(r.stdout.strip()) +def local_branch_exists(repo, branch): + r = subprocess.run( + ["git", "-C", repo, "rev-parse", "--verify", "--quiet", + f"refs/heads/{branch}"], + capture_output=True, text=True, + ) + return r.returncode == 0 + + +def commits_ahead(repo, branch): + """How many commits local `branch` has that origin/`branch` does not.""" + r = subprocess.run( + ["git", "-C", repo, "rev-list", "--count", + f"origin/{branch}..{branch}"], + capture_output=True, text=True, + ) + return int(r.stdout.strip() or 0) if r.returncode == 0 else 0 + + def local_verify(notebook, mode, image): """Execute the generated notebook in the base image; True if it passes.""" print(f"\n--- Local pre-check: verify.py --notebook --mode {mode} ---") @@ -278,7 +297,9 @@ def main(): # Remember where the clone started so non-committing exits can restore it. orig_branch = git(repo, "rev-parse", "--abbrev-ref", "HEAD").stdout.strip() git(repo, "fetch", "--quiet", "origin") - is_new = not remote_branch_exists(repo, branch) + on_origin = remote_branch_exists(repo, branch) + on_local = local_branch_exists(repo, branch) + is_new = not (on_origin or on_local) if is_new: print(f"Branch '{branch}' does not exist -> scaffolding a new one.") @@ -292,8 +313,18 @@ def main(): write(os.path.join(repo, "README.md"), README) else: print(f"Branch '{branch}' exists -> updating (Dockerfile preserved).") - git(repo, "switch", "--quiet", branch) - git(repo, "reset", "--hard", "--quiet", f"origin/{branch}") + # Check out the branch (creating a local ref from origin if needed). + if on_local: + git(repo, "switch", "--quiet", branch) + else: + git(repo, "switch", "--quiet", "-c", branch, f"origin/{branch}") + # Re-sync to origin, but never silently drop local commits not on origin. + if on_origin: + ahead = commits_ahead(repo, branch) + if ahead: + fail(f"branch '{branch}' has {ahead} local commit(s) not on " + f"origin; push or discard them before syncing") + git(repo, "reset", "--hard", "--quiet", f"origin/{branch}") base_image = read_from(os.path.join(repo, "Dockerfile")) if not base_image: fail("could not read FROM line from the branch Dockerfile") diff --git a/local_examples/time_series_tutorial/redis-py/dt_time_series.py b/local_examples/time_series_tutorial/redis-py/dt_time_series.py index 2f52eca166..4eed2a19b3 100644 --- a/local_examples/time_series_tutorial/redis-py/dt_time_series.py +++ b/local_examples/time_series_tutorial/redis-py/dt_time_series.py @@ -227,7 +227,8 @@ # Retrieve the same data points, but include the `unit` # label in the results. res29 = r.ts().mget(["location=us"], select_labels=["unit"]) -print(res29) # >>> [{'unit': 'cm'}, (4, 1.78), {'unit': 'in'}, (4, 0.74)] +print(res29) +# >>> [{'rg:2': [{'unit': 'cm'}, 4, 1.78]}, {'rg:3': [{'unit': 'in'}, 4, 0.74]}] # Retrieve data points up to time 2 (inclusive) from all # time series that use millimeters as the unit. Include all From d0f8e203b19944fdd8d1ecba599f428f8814feb0 Mon Sep 17 00:00:00 2001 From: Andy Stark Date: Fri, 19 Jun 2026 16:32:39 +0100 Subject: [PATCH 15/17] DOC-6763 more Bugbot stuff --- build/jupyterize/verify.py | 254 +++++-------------------------------- 1 file changed, 31 insertions(+), 223 deletions(-) diff --git a/build/jupyterize/verify.py b/build/jupyterize/verify.py index d175709b49..bd18aad729 100644 --- a/build/jupyterize/verify.py +++ b/build/jupyterize/verify.py @@ -1,23 +1,22 @@ #!/usr/bin/env python3 """ -verify.py - PROTOTYPE notebook verification harness (Python only). +verify.py - notebook verification harness. -Takes a Redis docs code-example source file, builds a *test notebook* in which -the REMOVE_START/END blocks (containing the real asserts) are kept as tagged -cells, executes that notebook inside the real BinderHub base image against the -bundled Redis, and reports pass/fail. The asserts are the oracle. +Executes a prebuilt Jupyter notebook (e.g. jupyterize's --with-tests output, in +which the REMOVE-block asserts are kept as 'test'-tagged cells) inside the real +BinderHub base image against the bundled Redis, and reports pass/fail. The +asserts are the oracle. -It can also emit the *shipped* notebook (test cells stripped) so you can see -exactly what would land in binder-launchers. +verify.py does NOT parse source files - jupyterize is the single source of truth +for source -> notebook. Generate the notebook with jupyterize, then verify it +here, so what gets verified is exactly what ships. -Host requirements: Docker + python3 stdlib only (no nbformat needed locally). -The base image supplies the kernel, redis-py, and redis-server. +Host requirements: Docker + python3 stdlib only. The base image supplies the +kernel, redis-py, and redis-server. Usage: - python build/jupyterize/verify.py [--ship out/demo.ipynb] [--keep] - -This is a prototype to validate the loop end-to-end; the parsing logic would -later fold into jupyterize proper as a "test mode". + python build/jupyterize/verify.py --notebook demo.test.ipynb --image + [--mode kernel|script] """ import argparse @@ -27,160 +26,6 @@ import sys import tempfile -# Source file extension -> language. Drives base-image selection. -EXT_LANGUAGE = { - ".py": "python", ".js": "node.js", ".go": "go", - ".java": "java", ".cs": "c#", ".php": "php", ".rb": "ruby", ".rs": "rust", -} - -# Language -> BinderHub base image. Pin a digest here once confirmed against the -# real image. Only python is verified today; other languages must be passed via -# --image until their binder--base digests are added here. -BASE_IMAGES = { - # Current python base: redis-py 8.0.0 on Redis 8.2.2 (rebuilt 2026-06-19). - "python": ( - "us-central1-docker.pkg.dev/redis-learning-378123/binderhub/" - "binder-python-base@sha256:" - "bbb6b1f137115974f938f74acfcc50203565899343efe1dcfa5a72e48383f346" - ), -} - - -def detect_language(path): - return EXT_LANGUAGE.get(os.path.splitext(path)[1].lower()) - - -def resolve_image(path, override): - """Pick the base image: explicit --image wins, else map from source language.""" - if override: - return override - lang = detect_language(path) - image = BASE_IMAGES.get(lang) - if not image: - raise SystemExit( - f"No base image known for language {lang!r} ({path}). " - f"Pass --image, or add the digest to BASE_IMAGES." - ) - return image - -# Markers (Python comment prefix only, for this prototype). -P = "#" -EXAMPLE, BINDER_ID, KERNEL_NAME = "EXAMPLE:", "BINDER_ID", "KERNEL_NAME" -HIDE_START, HIDE_END = "HIDE_START", "HIDE_END" -REMOVE_START, REMOVE_END = "REMOVE_START", "REMOVE_END" -STEP_START, STEP_END = "STEP_START", "STEP_END" - - -def _is(line, marker): - s = line.strip() - return s == f"{P} {marker}" or s == f"{P}{marker}" or s.startswith(f"{P} {marker} ") - - -def read_markers(path): - """Pull BINDER_ID / KERNEL_NAME from the source header (for reporting/targeting).""" - info = {"binder_id": None, "kernel_name": None} - with open(path, encoding="utf-8") as f: - for line in f: - s = line.strip() - if s.startswith(f"{P} {BINDER_ID} "): - info["binder_id"] = s.split(BINDER_ID, 1)[1].strip() - elif s.startswith(f"{P} {KERNEL_NAME} "): - info["kernel_name"] = s.split(KERNEL_NAME, 1)[1].strip() - return info - - -def parse_cells(path): - """Parse a source file into ordered cells. - - Returns a list of dicts: {"source": str, "step": str|None, "test": bool}. - - STEP blocks -> a cell carrying step metadata - - REMOVE blocks -> a cell tagged test=True (kept for verification, stripped on ship) - - everything else (incl. HIDE content) -> context cells (e.g. setup) - """ - with open(path, encoding="utf-8") as f: - lines = f.readlines() - - cells = [] - ctx, step_buf, rem_buf = [], [], [] - in_step, step_name, in_remove = False, None, False - - def flush_ctx(): - if any(ln.strip() for ln in ctx): - cells.append({"source": "".join(ctx).strip("\n"), - "step": None, "test": False}) - ctx.clear() - - for line in lines: - if _is(line, EXAMPLE) or _is(line, BINDER_ID) or _is(line, KERNEL_NAME): - continue - if _is(line, REMOVE_START): - # Flush an open step first so the REMOVE test cell lands *after* the - # step code that defines the variables its asserts reference. - if in_step and any(ln.strip() for ln in step_buf): - cells.append({"source": "".join(step_buf).strip("\n"), - "step": step_name, "test": False}) - step_buf = [] - else: - flush_ctx() - in_remove, rem_buf = True, [] - continue - if _is(line, REMOVE_END): - in_remove = False - if any(ln.strip() for ln in rem_buf): - cells.append({"source": "".join(rem_buf).strip("\n"), - "step": None, "test": True}) - continue - if in_remove: - rem_buf.append(line) - continue - if _is(line, HIDE_START) or _is(line, HIDE_END): - continue - if _is(line, STEP_START): - flush_ctx() - in_step, step_name, step_buf = True, line.split(STEP_START, 1)[1].strip(), [] - continue - if _is(line, STEP_END): - if any(ln.strip() for ln in step_buf): - cells.append({"source": "".join(step_buf).strip("\n"), - "step": step_name, "test": False}) - in_step, step_name = False, None - continue - (step_buf if in_step else ctx).append(line) - - flush_ctx() - return cells - - -def to_notebook(cells, include_tests): - """Build an nbformat-4 notebook dict. If include_tests is False, drop test cells.""" - nb_cells = [] - for c in cells: - if c["test"] and not include_tests: - continue - meta = {} - if c["step"]: - meta["step"] = c["step"] - if c["test"]: - meta["tags"] = ["test"] - nb_cells.append({ - "id": f"cell{len(nb_cells)}", - "cell_type": "code", - "metadata": meta, - "source": c["source"], - "outputs": [], - "execution_count": None, - }) - return { - "cells": nb_cells, - "metadata": { - "kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, - "language_info": {"name": "python"}, - }, - "nbformat": 4, - "nbformat_minor": 5, - } - - # Kernel-less driver: exec each code cell in a shared namespace (same ordering # and shared-state semantics a Jupyter kernel gives), capturing per-cell stdout # and errors into the same executed-notebook shape report() expects. Used by @@ -265,66 +110,29 @@ def report(executed): def main(): - ap = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) - ap.add_argument("source", nargs="?", help="example source .py") - ap.add_argument("--notebook", metavar="PATH", - help="verify a prebuilt .ipynb directly (e.g. jupyterize's " - "output) instead of re-parsing a source file; needs " - "--image") - ap.add_argument("--image", default=None, - help="base image (defaults to the source language's image)") + ap = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + ap.add_argument("--notebook", metavar="PATH", required=True, + help="the .ipynb to execute and verify (e.g. jupyterize's " + "--with-tests output)") + ap.add_argument("--image", required=True, + help="base image to run the notebook in (the launcher " + "branch's Dockerfile FROM)") ap.add_argument("--mode", choices=["kernel", "script"], default="kernel", - help="kernel: real nbconvert (CI/amd64). " - "script: kernel-less exec (local/Apple Silicon).") - ap.add_argument("--ship", metavar="PATH", - help="also write the stripped (shipped) notebook here") + help="kernel: real nbconvert (CI/amd64). script: kernel-less " + "Python exec (local/Apple Silicon; Python notebooks only).") args = ap.parse_args() - # Notebook mode: execute a prebuilt notebook as-is (no re-parsing), so the - # exact artifact that ships is what gets verified. - if args.notebook: - if not args.image: - raise SystemExit("--image is required with --notebook") - with open(args.notebook, encoding="utf-8") as f: - test_nb = json.load(f) - img_name = args.image.split('@')[0].split('/')[-1] - print(f"Verifying {args.notebook} in {img_name} (mode={args.mode}) ...") - executed = execute_in_image(test_nb, args.image, args.mode) - ok, _ = report(executed) - print() - print("RESULT: PASS" if ok else "RESULT: FAIL") - return 0 if ok else 1 - - if not args.source: - raise SystemExit("provide a source file, or --notebook PATH") - image = resolve_image(args.source, args.image) - markers = read_markers(args.source) - cells = parse_cells(args.source) - n_test = sum(c["test"] for c in cells) - print(f"Source: {args.source}") - print(f"Language: {detect_language(args.source)} | " - f"target branch: {markers['binder_id'] or '(no BINDER_ID)'}") - print(f"Parsed {len(cells)} cells ({n_test} test, {len(cells)-n_test} shipped)") - - test_nb = to_notebook(cells, include_tests=True) - img_name = image.split('@')[0].split('/')[-1] - print(f"Executing test notebook in {img_name} (mode={args.mode}) ...") - executed = execute_in_image(test_nb, image, args.mode) - - ok, failures = report(executed) - - if args.ship: - os.makedirs(os.path.dirname(args.ship) or ".", exist_ok=True) - with open(args.ship, "w", encoding="utf-8") as f: - json.dump(to_notebook(cells, include_tests=False), f, indent=1) - print(f"Wrote shipped notebook -> {args.ship}") - + with open(args.notebook, encoding="utf-8") as f: + test_nb = json.load(f) + img_name = args.image.split('@')[0].split('/')[-1] + print(f"Verifying {args.notebook} in {img_name} (mode={args.mode}) ...") + executed = execute_in_image(test_nb, args.image, args.mode) + ok, _ = report(executed) print() - if ok: - print("RESULT: PASS — notebook executes clean and all asserts hold.") - return 0 - print(f"RESULT: FAIL — {len(failures)} cell(s) errored.") - return 1 + print("RESULT: PASS" if ok else "RESULT: FAIL") + return 0 if ok else 1 if __name__ == "__main__": From 75b7e67610c14f4c98c5aac50f904ba2d85e3bff Mon Sep 17 00:00:00 2001 From: Andy Stark Date: Fri, 19 Jun 2026 16:42:13 +0100 Subject: [PATCH 16/17] DOC-6763 still more from the Bugbot --- build/jupyterize/sync_notebook.py | 6 +++-- build/jupyterize/test_jupyterize.py | 26 ++++++++++++++++++ build/jupyterize/unwrapper.py | 41 ++++++++++++++++++++++++++--- 3 files changed, 68 insertions(+), 5 deletions(-) diff --git a/build/jupyterize/sync_notebook.py b/build/jupyterize/sync_notebook.py index 7d1c2f3d0f..9e0b642d01 100644 --- a/build/jupyterize/sync_notebook.py +++ b/build/jupyterize/sync_notebook.py @@ -32,9 +32,11 @@ # Default sibling clone: /binder-launchers next to /docs DEFAULT_REPO = os.path.normpath(os.path.join(HERE, "..", "..", "..", "binder-launchers")) +# Mirror of build/local_examples.py's EXTENSION_TO_LANGUAGE (kept local to avoid +# importing that module's heavy dependency chain). Keep the two in sync. EXT_LANGUAGE = { - ".py": "python", ".js": "node.js", ".go": "go", ".java": "java", - ".cs": "c#", ".php": "php", ".rb": "ruby", ".rs": "rust", + ".py": "python", ".js": "node.js", ".go": "go", ".c": "c", ".h": "c", + ".cs": "c#", ".java": "java", ".php": "php", ".rb": "ruby", ".rs": "rust", } # Base image used only when SCAFFOLDING A NEW branch. Existing branches keep diff --git a/build/jupyterize/test_jupyterize.py b/build/jupyterize/test_jupyterize.py index 63d078be8f..f24c611472 100644 --- a/build/jupyterize/test_jupyterize.py +++ b/build/jupyterize/test_jupyterize.py @@ -868,6 +868,7 @@ def main(): test_hide_remove_blocks() test_keep_tests_mode() test_trailing_brace_orphans() + test_orphan_braces_ignore_strings() test_javascript_file() # Edge case tests @@ -1320,6 +1321,31 @@ def test_trailing_brace_orphans(): os.unlink(output_file) +def test_orphan_braces_ignore_strings(): + """Braces inside string/char literals must not skew orphan-brace removal.""" + print("\nTesting orphan-brace removal ignores string-literal braces...") + + from unwrapper import _net_braces, _strip_trailing_orphan_braces + + # A '}' in a string/char/comment is not a structural brace. + assert _net_braces('System.out.println("}");') == 0 + assert _net_braces("char c = '}';") == 0 + assert _net_braces('x = 1; // closes the } block') == 0 + # Real structural imbalance is still counted. + assert _net_braces('foo();\n}\n}') == 2 + + # A balanced loop whose body prints a '}' keeps its own closing brace. + balanced = 'for (int i = 0; i < 2; i++) {\n print("}");\n}' + assert _strip_trailing_orphan_braces(balanced) == balanced + + # Genuine orphan wrapper closes (with a string brace earlier) are stripped, + # but only the unmatched ones. + orphan = 'print("}");\njedis.close();\n}\n}' + assert _strip_trailing_orphan_braces(orphan) == 'print("}");\njedis.close();' + + print("✓ Orphan-brace string-literal test passed") + + if __name__ == '__main__': sys.exit(main()) diff --git a/build/jupyterize/unwrapper.py b/build/jupyterize/unwrapper.py index 1c79c5e508..260bdb597a 100644 --- a/build/jupyterize/unwrapper.py +++ b/build/jupyterize/unwrapper.py @@ -141,6 +141,40 @@ def _remove_trailing_braces(code, count): return '\n'.join(result) +def _net_braces(code): + """ + Return ('}' count) - ('{' count), IGNORING braces inside string/char + literals ('...', "...", `...`) and line comments (# or //). A scanner rather + than a raw count, so a brace in a string (e.g. a JSON literal) doesn't skew + the balance and cause a real closing brace to be stripped. + """ + net = 0 + i, n = 0, len(code) + quote = None + while i < n: + ch = code[i] + if quote: + if ch == '\\' and quote != '`': + i += 2 + continue + if ch == quote: + quote = None + i += 1 + continue + if ch in ('"', "'", '`'): + quote = ch + elif ch == '#' or (ch == '/' and i + 1 < n and code[i + 1] == '/'): + while i < n and code[i] != '\n': # skip to end of line comment + i += 1 + continue + elif ch == '{': + net -= 1 + elif ch == '}': + net += 1 + i += 1 + return net + + def _strip_trailing_orphan_braces(code): """ Strip orphan closing braces left when a class/method wrapper's opening was @@ -148,10 +182,11 @@ def _strip_trailing_orphan_braces(code): Only CONTIGUOUS trailing lone-'}' lines are removed (stopping at the first real content line), and at most as many as the cell has unmatched closes - (`}` minus `{`). This preserves the closing braces of balanced blocks - (for/foreach/lambda bodies) that legitimately sit inside the cell. + (counted by _net_braces, which ignores braces in strings/comments). This + preserves the closing braces of balanced blocks (for/foreach/lambda bodies) + that legitimately sit inside the cell. """ - net = code.count('}') - code.count('{') + net = _net_braces(code) if net <= 0: return code From 49c9bdeacd9b713f0f399b0596ec1d150d5053e5 Mon Sep 17 00:00:00 2001 From: Andy Stark Date: Fri, 19 Jun 2026 16:58:21 +0100 Subject: [PATCH 17/17] DOC_6763 more de la Bugbot --- build/jupyterize/sync_notebook.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/build/jupyterize/sync_notebook.py b/build/jupyterize/sync_notebook.py index 9e0b642d01..0cedad7722 100644 --- a/build/jupyterize/sync_notebook.py +++ b/build/jupyterize/sync_notebook.py @@ -126,8 +126,8 @@ docker run --rm -v "${PWD}:/work" "${BASE}" bash -c ' cd /usr/src/redis-src && ./redis-server ./redis.conf --daemonize yes >/dev/null 2>&1 && sleep 1 cd /work && jupyter nbconvert --to notebook --execute \\ - --ExecutePreprocessor.startup_timeout=120 \\ - --ExecutePreprocessor.timeout=120 \\ + --ExecutePreprocessor.startup_timeout=300 \\ + --ExecutePreprocessor.timeout=300 \\ --output /tmp/executed.ipynb demo.test.ipynb' call-reusable-workflow: @@ -370,8 +370,10 @@ def main(): print(f"Committed to '{branch}'.") if args.push: - git(repo, "push", "--quiet", - *(["-u", "origin", branch] if is_new else []), capture=False) + # Always target origin/ explicitly and (re)set the upstream. + # A scaffolded branch is created from origin/main, so a plain `git push` + # would otherwise follow that upstream and fail or push to the wrong ref. + git(repo, "push", "--quiet", "-u", "origin", branch, capture=False) print(f"Pushed '{branch}' to origin.") else: print("Not pushed (use --push). Review the commit, then push when ready.")