diff --git a/build/jupyterize/config.py b/build/jupyterize/config.py index 4f35ae8333..8ac91a2cde 100644 --- a/build/jupyterize/config.py +++ b/build/jupyterize/config.py @@ -25,8 +25,10 @@ } }, 'node.js': { - 'name': 'javascript', - 'display_name': 'JavaScript (Node.js)', + # 'jslab' is the kernel the binder-nodejs-base image actually installs + # (from the tslab package); the older 'javascript' name is not present. + 'name': 'jslab', + 'display_name': 'JavaScript', 'language': 'javascript', 'language_info': { 'name': 'javascript', @@ -36,7 +38,8 @@ } }, 'go': { - 'name': 'gophernotes', + # binder-go-base installs GoNB (kernel name 'gonb'), not gophernotes. + 'name': 'gonb', 'display_name': 'Go', 'language': 'go', 'language_info': { diff --git a/build/jupyterize/js-notebook-findings.md b/build/jupyterize/js-notebook-findings.md new file mode 100644 index 0000000000..a545c8851c --- /dev/null +++ b/build/jupyterize/js-notebook-findings.md @@ -0,0 +1,149 @@ +# JavaScript (node-redis) notebook findings + +Investigation date: 2026-06-19. Context: extending the jupyterize → verify → +binder-launchers pipeline beyond Python, starting with node-redis (the +time-series tutorial, `dt-time-series.js` from the node-redis doctests). + +## TL;DR + +- **jupyterize itself handles JS fine.** node-redis examples are flat + top-level-`await` scripts with `//` markers — no unwrapping needed, just like + Python. Generation (ship + test notebooks, asserts retained) works. +- **One real jupyterize bug fixed:** it emitted kernel name `javascript`, but + the `binder-nodejs-base` image has no such kernel — its JS kernel is `jslab` + (from the `tslab` package). Changed `config.py` node.js → `jslab`. (Every + existing `nodejs-*` notebook declares the non-existent `javascript`, so they + are mis-kernel'd for automated execution too.) +- **The blocker is the kernel (`tslab`/`jslab`), not jupyterize.** It is a poor + fit for the automated nbconvert assert-gate, for two compounding reasons + (below). Recommended path: verify non-Python examples via their **native + harness** (`node script.js`), and treat notebook-kernel execution as a lighter + "does it run/display in Binder" check. + +## Environment facts (binder-nodejs-base @sha256:8c3563d8…) + +- JS kernels installed: `jslab` and `tslab` (both from the `tslab` npm package). + `jslab` = `tslab kernel --js`. There is **no** `javascript` or `ijavascript` + kernel. +- `jslab` **does** start and execute under amd64 emulation on Apple Silicon + (unlike Python's ipykernel, which hangs under qemu). So local JS testing is + possible — but see the reliability caveats below. +- node-redis is **v5.12.1** (ESM), installed at `/home/jovyan/node_modules` with + empty `NODE_PATH`. Node only resolves `redis` when the notebook runs from + `/home/jovyan` (which Binder does via `ADD demo.ipynb .` → WORKDIR). A verify + step that runs from elsewhere must `cd /home/jovyan` or set `NODE_PATH`. + +## Blocker 1 — tslab hardcodes type-checking + +`tslab` runs the TypeScript compiler over each JS cell and sets +`checkJs: true` **in code** (`converter.js:~221`), passed directly to the +compiler — it is **not** merged with any user `tsconfig.json`. Execution is +gated on pre-emit diagnostics (`converter.js:272`). + +Consequence: node-redis v5's heavily-generic return types don't survive tslab's +**cross-cell `.d.ts` declaration emission** (each cell's vars are emitted to a +declaration file the next cell imports). They collapse to `string`, so e.g. +`info.totalSamples` / `res.sourceKey` fail with *"Property does not exist on +type 'string'"* and the cell never runs. + +Things that do **not** fix it: +- A `tsconfig.json` with `checkJs:false` — ignored (hardcoded value wins). +- `// @ts-nocheck` per cell — the bad type lives in the emitted dependency + `.d.ts`, not the annotated cell. + +What does get past it: patching the vendored file in the image +(`sed -i 's/checkJs: true/checkJs: false/' …/tslab/dist/converter.js`, needs +root at build time). Cells then execute. But that exposes Blocker 2. + +## Blocker 2 — tslab's error reporting through nbconvert is unreliable + +With `checkJs:false`, runtime behaviour through nbconvert is inconsistent: +- A **standalone failing assert** correctly raises `CellExecutionError` + (nbconvert exits non-zero) — so the gate *can* catch errors. +- But the **correct** full notebook *also* fails without `--allow-errors` + (some cell returns an error-status reply), while *with* `--allow-errors` it + shows **no error outputs at all** and step cells emit **no stdout**. + +So there is no clean "good → pass / broken → fail" signal from tslab+nbconvert. +This is the real reason node verification via the notebook kernel isn't viable +as-is. It is a tslab limitation; Python's ipykernel (the reference kernel) +reports errors and outputs cleanly, which is why Python "just worked". + +## Recommendation for non-Python verification + +Verify the example in its **native test harness** rather than through the +notebook kernel. The source files *are* the client repos' doctests, designed to +run as `node script.js` / `go test` / etc., where asserts gate via process exit. +Split the two concerns: + +1. **Correctness gate** = native runner (reliable assert gating). +2. **Notebook check** = "executes/displays in the Binder kernel" (lighter; for + JS still needs the `checkJs:false` image patch so tslab doesn't reject valid + JS). + +jupyterize (generation) is unaffected and remains the deterministic core. + +## Cross-client probe (2026-06-19): which kernels gate? + +Ran a two-question probe (does jupyterize's kernel name match the image; does a +deliberately-failing cell gate through nbconvert) against all four base images. + +| Client | Kernel (image) | jupyterize name | Name OK? | Good cell runs? | Failing cell gates? | +|--------|----------------|-----------------|----------|-----------------|---------------------| +| Python (redis-py) | ipykernel `python3` | `python3` | ✓ | ✓ | ✓ exit 1, `error` output | +| Java (Jedis) | IJava `java` | `java` | ✓ | ✓ (prints 42) | ✓ exit 1, `EvalException` | +| C# (NRedisStack) | .NET Interactive `.net-csharp` | `.net-csharp` | ✓ | ✓ (prints 42) | ✓ exit 1, `Error` | +| Go (go-redis) | GoNB `gonb` | ~~`gophernotes`~~ → fixed to `gonb` | was ✗, now ✓ | ✓ (prints 42) | ✗ panic → stream, **exit 0** | +| Node (node-redis) | tslab `jslab` | ~~`javascript`~~ → fixed to `jslab` | was ✗, now ✓ | ✓ | ✗ error → stream, **exit 0** | + +**Pattern:** in-process kernels (IPython, JShell/IJava, .NET Interactive) raise +proper Jupyter `error` messages, so the nbconvert assert-gate works. Kernels +that compile-and-run a subprocess (tslab→node, GoNB→go) capture the subprocess +stderr as a *stream* and don't propagate failure status — so the gate is hollow. + +**Implications:** +- **Java & C#**: the notebook-kernel verify gate works, same as Python. Remaining + risk is jupyterize's regex *unwrapper* (these examples are wrapped in + class/method scaffolding, unlike the flat Python/Node scripts) — a + generation-correctness question, not a kernel one. +- **Go & Node**: notebook-kernel gating does not work. Verify via the native + harness (`go test`, `node script.js`) instead; treat notebook execution as a + lighter "displays/runs in Binder" check (Node also needs the tslab + `checkJs:false` image patch). +- Kernel-name fixes applied in `config.py`: node.js → `jslab`, go → `gonb`. + +## Java / Jedis end-to-end attempt (2026-06-19) + +Ran the time-series example (`TimeSeriesTutorialExample.java` from the jedis +doctests) through the full workflow. Two findings: + +1. **The unwrapper works well.** From a `public class { @Test public void run() + { … } }` wrapper, jupyterize correctly stripped the class / `@Test` / method / + `package` lines and the junit asserts, and hoisted the real imports + (`RedisClient`, `timeseries.*`, `java.util.*`) to the top — producing clean + flat JShell statements matching the existing `jedis-dt-list` notebook shape. +2. **Two real issues:** + - **Blocker — jedis version lag.** binder-java-base ships **jedis 5.1.0** + (has `UnifiedJedis`, NOT `RedisClient`). The example uses `RedisClient` + (jedis 6.x), so cell 0's `import redis.clients.jedis.RedisClient` fails with + "cannot find symbol" and cascades to all cells. Needs a base-image jedis + bump to 6.x — same shape as the Python AR*/redis-py version lags. + - **Unwrapper bug — trailing close braces.** The wrapper's closing `}` (method) + and `}` (class) are in the LAST cell, but jupyterize unwraps each cell + independently and the opening `{`s are in cell 0 — so its brace-balancing + can't pair them, and the final cell keeps `}\n}`. This breaks the last + cell's compile even after a jedis bump. Affects all wrapped languages + (Java/C#/Go). Go's config has a `closing_braces` pattern that strips + orphan `}` lines; Java/C# need the same (or a global trailing-brace pass). + +Net: Jedis is pipeline-ready *pending* (a) a base-image jedis 6.x bump and (b) +the trailing-brace unwrapper fix. The hard parts — unwrapping and IJava error +gating — are sound. Branch NOT created (would be red on both counts). + +## Open questions + +- Why was `ijavascript` rejected? If those reasons don't extend to a **Deno** + Jupyter kernel, Deno runs JS/TS without tslab's checking quirks and may be a + cleaner kernel choice. +- Whether to patch `checkJs:false` into `binder-nodejs-base` regardless, since + notebooks won't even *display*/run in the kernel without it. diff --git a/build/jupyterize/jupyterize.py b/build/jupyterize/jupyterize.py index 6b783f1413..1cc7989aa1 100755 --- a/build/jupyterize/jupyterize.py +++ b/build/jupyterize/jupyterize.py @@ -26,7 +26,7 @@ -def jupyterize(input_file, output_file=None, verbose=False): +def jupyterize(input_file, output_file=None, verbose=False, with_tests=False): """ Convert code example file to Jupyter notebook. @@ -34,6 +34,8 @@ def jupyterize(input_file, output_file=None, verbose=False): input_file: Path to input file output_file: Path to output file (default: same name with .ipynb extension) verbose: Enable verbose logging + with_tests: When True, keep REMOVE blocks as cells tagged 'test' (for a + verification/test notebook) instead of dropping them. Returns: str: Path to output file @@ -61,7 +63,7 @@ def jupyterize(input_file, output_file=None, verbose=False): validator.validate_file(input_file, language) # Parse file - parser = FileParser(language) + parser = FileParser(language, keep_tests=with_tests) parsed_blocks = parser.parse(input_file) if not parsed_blocks: @@ -122,13 +124,21 @@ def main(): help='Enable verbose logging' ) + parser.add_argument( + '--with-tests', + action='store_true', + help="Keep REMOVE blocks as cells tagged 'test' (for a verification " + "notebook) instead of dropping them" + ) + args = parser.parse_args() try: output_file = jupyterize( args.input_file, args.output_file, - args.verbose + args.verbose, + args.with_tests ) print(f"Successfully created: {output_file}") return 0 diff --git a/build/jupyterize/notebook_builder.py b/build/jupyterize/notebook_builder.py index 8c0c77b832..14d1f94377 100644 --- a/build/jupyterize/notebook_builder.py +++ b/build/jupyterize/notebook_builder.py @@ -121,6 +121,11 @@ def _create_cells(self, parsed_blocks): else: logging.debug(f"Created cell {i} (preamble)") + # Tag test cells (from REMOVE blocks in keep_tests mode) so they can + # be executed for verification and stripped before shipping. + if block.get('is_test'): + cell.metadata['tags'] = ['test'] + cells.append(cell) logging.info(f"Created {len(cells)} notebook cells") @@ -139,6 +144,11 @@ def _create_notebook(self, cells): nb = new_notebook() nb.cells = cells + # Deterministic cell ids: nbformat assigns random ids otherwise, which + # would make every regeneration of an unchanged example produce a diff. + for i, cell in enumerate(nb.cells): + cell['id'] = f"cell{i}" + # Set kernel metadata kernel_spec = get_kernel_spec(self.language) diff --git a/build/jupyterize/parser.py b/build/jupyterize/parser.py index 8cc8500ae5..9baa12254c 100644 --- a/build/jupyterize/parser.py +++ b/build/jupyterize/parser.py @@ -39,15 +39,20 @@ def _check_marker(line, prefix, marker): class FileParser: """Parses source files with special comment markers.""" - def __init__(self, language): + def __init__(self, language, keep_tests=False): """ Initialize parser for a specific language. Args: language: Programming language (e.g., 'python', 'c#') + keep_tests: When True, REMOVE blocks are emitted as cells tagged + 'test' (in source order) instead of being dropped. Used to + build a test notebook whose asserts can be executed; strip the + tagged cells (e.g. nbconvert TagRemovePreprocessor) to ship. """ self.language = language self.prefix = PREFIXES[language.lower()] + self.keep_tests = keep_tests def parse(self, file_path): """ @@ -63,11 +68,12 @@ def parse(self, file_path): lines = f.readlines() # State tracking - in_remove = False + remove_depth = 0 in_step = False step_name = None step_lines = [] preamble_lines = [] + remove_lines = [] cells = [] seen_step_names = set() @@ -83,22 +89,47 @@ def parse(self, file_path): logging.debug(f"Line {line_num}: Skipping BINDER_ID marker") continue - # Handle REMOVE blocks + # Handle REMOVE blocks. Nested markers are absorbed into the + # outer block (track depth) so a nested REMOVE_START doesn't discard + # the lines collected for the outer block. if _check_marker(line, self.prefix, REMOVE_START): - if in_remove: + if remove_depth > 0: logging.warning(f"Line {line_num}: Nested REMOVE_START detected") - in_remove = True + remove_depth += 1 + continue + if self.keep_tests: + # Flush pending code first so the test cell lands *after* + # the code it checks (asserts reference its variables). + if in_step and step_lines: + cells.append({'code': ''.join(step_lines), + 'step_name': step_name, 'is_test': False}) + step_lines = [] + elif preamble_lines: + cells.append({'code': ''.join(preamble_lines), + 'step_name': None, 'is_test': False}) + preamble_lines = [] + remove_lines = [] + remove_depth = 1 logging.debug(f"Line {line_num}: Entering REMOVE block") continue if _check_marker(line, self.prefix, REMOVE_END): - if not in_remove: + if remove_depth == 0: logging.warning(f"Line {line_num}: REMOVE_END without REMOVE_START") - in_remove = False + continue + remove_depth -= 1 + if remove_depth > 0: + continue # closing a nested block; keep collecting + if self.keep_tests and remove_lines: + cells.append({'code': ''.join(remove_lines), + 'step_name': None, 'is_test': True}) + remove_lines = [] logging.debug(f"Line {line_num}: Exiting REMOVE block") continue - if in_remove: + if remove_depth > 0: + if self.keep_tests: + remove_lines.append(line) continue # Skip HIDE markers (but include content) @@ -170,7 +201,7 @@ def parse(self, file_path): logging.debug(f"Saved final preamble cell ({len(preamble_lines)} lines)") # Check for unclosed blocks - if in_remove: + if remove_depth > 0: logging.warning("File ended with unclosed REMOVE block") if in_step: logging.warning("File ended with unclosed STEP block") diff --git a/build/jupyterize/sync_notebook.py b/build/jupyterize/sync_notebook.py new file mode 100644 index 0000000000..0cedad7722 --- /dev/null +++ b/build/jupyterize/sync_notebook.py @@ -0,0 +1,384 @@ +#!/usr/bin/env python3 +""" +sync_notebook.py - generate notebook(s) from an example source and sync them to +the matching binder-launchers branch. + +Deterministic glue for the docs -> binder-launchers pipeline: + changed source file -> read BINDER_ID -> jupyterize ship + test notebooks + -> create/update the binder-launchers branch -> (optionally) push. + +Verification happens downstream in the binder-launchers verify gate; this script +ALSO runs a local pre-check (verify.py) and refuses to commit a notebook whose +asserts don't pass, so a broken example never gets synced. + +For an EXISTING branch it updates demo.ipynb + demo.test.ipynb and upgrades the +workflow/.dockerignore to the verify-gated versions, but leaves the Dockerfile +(and its pinned base-image digest) untouched. For a NEW branch it scaffolds the +full set, pinning the base image from LANG_BASE_IMAGE. + +Usage: + python build/jupyterize/sync_notebook.py [--repo PATH] + [--push] [--dry-run] [--no-verify] [--mode script|kernel] +""" + +import argparse +import os +import subprocess +import sys + +HERE = os.path.dirname(os.path.abspath(__file__)) +JUPYTERIZE = os.path.join(HERE, "jupyterize.py") +VERIFY = os.path.join(HERE, "verify.py") +# Default sibling clone: /binder-launchers next to /docs +DEFAULT_REPO = os.path.normpath(os.path.join(HERE, "..", "..", "..", "binder-launchers")) + +# Mirror of build/local_examples.py's EXTENSION_TO_LANGUAGE (kept local to avoid +# importing that module's heavy dependency chain). Keep the two in sync. +EXT_LANGUAGE = { + ".py": "python", ".js": "node.js", ".go": "go", ".c": "c", ".h": "c", + ".cs": "c#", ".java": "java", ".php": "php", ".rb": "ruby", ".rs": "rust", +} + +# Base image used only when SCAFFOLDING A NEW branch. Existing branches keep +# their own FROM line. Pin a digest here once confirmed for that language. +LANG_BASE_IMAGE = { + "python": ( + "us-central1-docker.pkg.dev/redis-learning-378123/binderhub/" + "binder-python-base@sha256:" + "bbb6b1f137115974f938f74acfcc50203565899343efe1dcfa5a72e48383f346" + ), +} + +# Languages whose Jupyter kernel surfaces runtime errors as proper Jupyter +# errors, so the notebook verify gate (and the local pre-check) actually catch +# failing asserts. Compile-and-subprocess kernels (Go gonb, Node jslab) report +# runtime errors as stream output and can exit 0 - the gate only catches their +# compile/import errors, so verify those clients via a native harness. +GATING_LANGUAGES = {"python", "java", "c#"} + +# verify.py's kernel-less --mode script driver executes Python; it is only valid +# for Python notebooks. Other languages must use --mode kernel. +SCRIPT_MODE_LANGUAGES = {"python"} + +DOCKERIGNORE = "Dockerfile\ngha-creds*\ndemo.test.ipynb\n" + +README = ( + "# Binder Launchers\n\n" + "This branch contains a Jupyter notebook environment that builds on a\n" + "pre-built Redis-enabled base image. The notebook is generated from the\n" + "matching example source in the redis/docs repo - do not edit it by hand;\n" + "regenerate it with build/jupyterize/sync_notebook.py.\n" +) + +# Verify-before-deploy workflow (gates the reusable build-and-deploy on a +# successful execution of demo.test.ipynb against the branch's base image). +WORKFLOW = """name: Build and deploy binder images + +on: + push: + branches-ignore: + - main + paths: + - 'Dockerfile' + - 'demo.ipynb' + - 'demo.test.ipynb' + - '.github/workflows/main.yml' + +jobs: + # Gate: execute the test notebook (which still contains the REMOVE-block + # asserts) inside the exact base image this branch ships on, before deploy. + # GitHub runners are amd64, so the Jupyter kernel runs natively. + # + # NOTE: this reliably gates failing asserts only for IN-PROCESS kernels + # (Python/Java/C#), which surface errors as Jupyter error messages. Compile- + # and-subprocess kernels (Go `gonb`, Node `jslab`) report runtime errors as + # stream output and can still exit 0, so for those this catches compile/import + # errors but NOT runtime assert failures - verify those with a native harness + # (`go test`, `node script.js`). See build/jupyterize/js-notebook-findings.md. + verify: + runs-on: ubuntu-latest + permissions: + contents: 'read' + id-token: 'write' + steps: + - name: 'Checkout' + uses: 'actions/checkout@v4' + + - name: 'Google auth' + uses: 'google-github-actions/auth@v2' + with: + project_id: '${{ secrets.PROJECT_ID }}' + service_account: '${{ secrets.SERVICE_ACCOUNT }}' + workload_identity_provider: '${{ secrets.WORKLOAD_IDENTITY_PROVIDER }}' + + - name: 'Set up Cloud SDK' + uses: 'google-github-actions/setup-gcloud@v2' + with: + project_id: '${{ secrets.PROJECT_ID }}' + + - name: 'Execute test notebook against the base image' + run: |- + set -euo pipefail + gcloud auth configure-docker us-central1-docker.pkg.dev --quiet + BASE=$(awk '/^FROM/ {print $2; exit}' Dockerfile) + echo "Verifying demo.test.ipynb against base image: ${BASE}" + docker pull "${BASE}" + docker run --rm -v "${PWD}:/work" "${BASE}" bash -c ' + cd /usr/src/redis-src && ./redis-server ./redis.conf --daemonize yes >/dev/null 2>&1 && sleep 1 + cd /work && jupyter nbconvert --to notebook --execute \\ + --ExecutePreprocessor.startup_timeout=300 \\ + --ExecutePreprocessor.timeout=300 \\ + --output /tmp/executed.ipynb demo.test.ipynb' + + call-reusable-workflow: + needs: verify + uses: redis/binder-launchers/.github/workflows/build-and-deploy.yml@main + with: + branch_name: ${{ github.ref_name }} + secrets: inherit +""" + + +def fail(msg): + print(f"ERROR: {msg}", file=sys.stderr) + sys.exit(1) + + +def git(repo, *args, capture=True): + return subprocess.run( + ["git", "-C", repo, *args], + check=True, text=True, + capture_output=capture, + ) + + +def detect_language(path): + return EXT_LANGUAGE.get(os.path.splitext(path)[1].lower()) + + +def read_binder_id(path): + """Read the BINDER_ID marker (works for # and // comment prefixes).""" + with open(path, encoding="utf-8") as f: + for line in f: + s = line.strip() + for pre in ("#", "//"): + if s.startswith(f"{pre} BINDER_ID "): + return s.split("BINDER_ID", 1)[1].strip() + return None + + +def remote_branch_exists(repo, branch): + r = subprocess.run( + ["git", "-C", repo, "ls-remote", "--heads", "origin", branch], + capture_output=True, text=True, + ) + return bool(r.stdout.strip()) + + +def local_branch_exists(repo, branch): + r = subprocess.run( + ["git", "-C", repo, "rev-parse", "--verify", "--quiet", + f"refs/heads/{branch}"], + capture_output=True, text=True, + ) + return r.returncode == 0 + + +def commits_ahead(repo, branch): + """How many commits local `branch` has that origin/`branch` does not.""" + r = subprocess.run( + ["git", "-C", repo, "rev-list", "--count", + f"origin/{branch}..{branch}"], + capture_output=True, text=True, + ) + return int(r.stdout.strip() or 0) if r.returncode == 0 else 0 + + +def local_verify(notebook, mode, image): + """Execute the generated notebook in the base image; True if it passes.""" + print(f"\n--- Local pre-check: verify.py --notebook --mode {mode} ---") + cmd = [sys.executable, VERIFY, "--notebook", notebook, "--mode", mode] + if image: + cmd += ["--image", image] + r = subprocess.run(cmd) + return r.returncode == 0 + + +def restore_clone(repo, orig_branch, target, is_new): + """Return the binder-launchers clone to its pre-run clean state (used on + non-committing exits so a dry-run/failed sync doesn't block the next run).""" + git(repo, "reset", "--hard", "--quiet") + subprocess.run(["git", "-C", repo, "clean", "-fdq"], check=False) + if orig_branch and orig_branch != target: + git(repo, "switch", "--quiet", orig_branch) + if is_new: + subprocess.run(["git", "-C", repo, "branch", "-D", target], + capture_output=True) + + +def read_from(dockerfile): + """Return the image ref on the Dockerfile's FROM line.""" + with open(dockerfile, encoding="utf-8") as f: + for line in f: + if line.strip().startswith("FROM "): + return line.strip().split(None, 1)[1].strip() + return None + + +def jupyterize(source, out_path, with_tests=False): + cmd = [sys.executable, JUPYTERIZE, source, "-o", out_path] + if with_tests: + cmd.append("--with-tests") + r = subprocess.run(cmd, capture_output=True, text=True) + if r.returncode != 0: + fail("jupyterize failed (is nbformat installed in this env?):\n" + + (r.stderr or r.stdout)) + + +def write(path, content): + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "w", encoding="utf-8") as f: + f.write(content) + + +def main(): + ap = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + ap.add_argument("source", help="example source file under local_examples/") + ap.add_argument("--repo", default=DEFAULT_REPO, + help=f"binder-launchers clone (default: {DEFAULT_REPO})") + ap.add_argument("--push", action="store_true", + help="push the branch to origin after committing") + ap.add_argument("--dry-run", action="store_true", + help="generate + verify + write files, but do not commit/push") + ap.add_argument("--no-verify", action="store_true", + help="skip the local verify.py pre-check") + ap.add_argument("--mode", choices=["kernel", "script", "auto"], + default="auto", + help="verify mode for the local pre-check. 'auto' (default) " + "picks script for Python, kernel for other languages " + "(the script driver only runs Python).") + args = ap.parse_args() + + source = os.path.abspath(args.source) + repo = os.path.abspath(args.repo) + if not os.path.isfile(source): + fail(f"source not found: {source}") + if not os.path.isdir(os.path.join(repo, ".git")): + fail(f"not a git repo: {repo}") + + language = detect_language(source) + branch = read_binder_id(source) + if not branch: + fail(f"no BINDER_ID marker in {source}; cannot determine target branch") + print(f"Source: {source}") + print(f"Language: {language} Target branch: {branch}") + + # Resolve the pre-check mode: the kernel-less script driver only runs Python. + mode = args.mode + if mode == "auto": + mode = "script" if language in SCRIPT_MODE_LANGUAGES else "kernel" + elif mode == "script" and language not in SCRIPT_MODE_LANGUAGES: + fail(f"--mode script only works for Python; {language!r} needs " + f"--mode kernel (the script driver executes Python).") + + # Warn when the kernel can't gate runtime errors (asserts won't fail CI). + if language not in GATING_LANGUAGES: + print(f"WARNING: {language}'s kernel reports runtime errors as stream " + f"output, so neither this pre-check nor the CI gate catches " + f"failing asserts (only compile/import errors). Verify {language} " + f"examples with a native harness.") + + # Guard against clobbering work in the binder-launchers clone. + status = git(repo, "status", "--porcelain").stdout.strip() + if status: + fail(f"binder-launchers working tree is dirty; commit/stash first:\n{status}") + + # Remember where the clone started so non-committing exits can restore it. + orig_branch = git(repo, "rev-parse", "--abbrev-ref", "HEAD").stdout.strip() + git(repo, "fetch", "--quiet", "origin") + on_origin = remote_branch_exists(repo, branch) + on_local = local_branch_exists(repo, branch) + is_new = not (on_origin or on_local) + + if is_new: + print(f"Branch '{branch}' does not exist -> scaffolding a new one.") + base_image = LANG_BASE_IMAGE.get(language) + if not base_image: + fail(f"no base image known for language {language!r}; " + f"add it to LANG_BASE_IMAGE to scaffold new {language} branches") + git(repo, "switch", "--quiet", "-c", branch, "origin/main") + write(os.path.join(repo, "Dockerfile"), + f"FROM {base_image}\nADD demo.ipynb .\n") + write(os.path.join(repo, "README.md"), README) + else: + print(f"Branch '{branch}' exists -> updating (Dockerfile preserved).") + # Check out the branch (creating a local ref from origin if needed). + if on_local: + git(repo, "switch", "--quiet", branch) + else: + git(repo, "switch", "--quiet", "-c", branch, f"origin/{branch}") + # Re-sync to origin, but never silently drop local commits not on origin. + if on_origin: + ahead = commits_ahead(repo, branch) + if ahead: + fail(f"branch '{branch}' has {ahead} local commit(s) not on " + f"origin; push or discard them before syncing") + git(repo, "reset", "--hard", "--quiet", f"origin/{branch}") + base_image = read_from(os.path.join(repo, "Dockerfile")) + if not base_image: + fail("could not read FROM line from the branch Dockerfile") + + # Generate notebooks straight into the branch working tree. + test_nb_path = os.path.join(repo, "demo.test.ipynb") + jupyterize(source, os.path.join(repo, "demo.ipynb"), with_tests=False) + jupyterize(source, test_nb_path, with_tests=True) + + # Local pre-check against the branch's ACTUAL base image, run on the + # GENERATED test notebook (not a re-parse), so the exact artifact that ships + # is what gets verified. Refuse to sync if its asserts don't pass. + if not args.no_verify: + if not local_verify(test_nb_path, mode, base_image): + restore_clone(repo, orig_branch, branch, is_new) + fail("local verification failed - not syncing") + print("--- Local pre-check PASSED ---") + + # Always (re)apply the verify gate + ignore rules so existing plain branches + # get upgraded too. Dockerfile is left as-is for existing branches. + write(os.path.join(repo, ".github", "workflows", "main.yml"), WORKFLOW) + write(os.path.join(repo, ".dockerignore"), DOCKERIGNORE) + + git(repo, "add", "-A") + diff = git(repo, "status", "--porcelain").stdout.strip() + if not diff: + print("\nNothing changed - branch already up to date.") + restore_clone(repo, orig_branch, branch, is_new) + return 0 + print(f"\nChanges staged on '{branch}':\n{diff}") + + if args.dry_run: + restore_clone(repo, orig_branch, branch, is_new) + print("\n[dry-run] not committing or pushing; clone restored.") + return 0 + + msg = (f"Sync {os.path.basename(source)} notebook via jupyterize\n\n" + f"Generated demo.ipynb + demo.test.ipynb from the docs example " + f"source and {'scaffolded' if is_new else 'updated'} this branch.") + git(repo, "commit", "--quiet", "-m", msg) + print(f"Committed to '{branch}'.") + + if args.push: + # Always target origin/ explicitly and (re)set the upstream. + # A scaffolded branch is created from origin/main, so a plain `git push` + # would otherwise follow that upstream and fail or push to the wrong ref. + git(repo, "push", "--quiet", "-u", "origin", branch, capture=False) + print(f"Pushed '{branch}' to origin.") + else: + print("Not pushed (use --push). Review the commit, then push when ready.") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/build/jupyterize/test_jupyterize.py b/build/jupyterize/test_jupyterize.py index 5563fb532a..f24c611472 100644 --- a/build/jupyterize/test_jupyterize.py +++ b/build/jupyterize/test_jupyterize.py @@ -866,6 +866,9 @@ def main(): test_language_detection() test_basic_conversion() test_hide_remove_blocks() + test_keep_tests_mode() + test_trailing_brace_orphans() + test_orphan_braces_ignore_strings() test_javascript_file() # Edge case tests @@ -1185,6 +1188,164 @@ def test_csharp_for_loop_braces(): os.unlink(output_file) +def test_keep_tests_mode(): + """Test that --with-tests keeps REMOVE blocks as tagged 'test' cells.""" + print("\nTesting keep-tests (test notebook) mode...") + + test_content = """# EXAMPLE: test_keep +# HIDE_START +import redis +r = redis.Redis() +# HIDE_END + +# REMOVE_START +r.delete("k") +# REMOVE_END + +# STEP_START setit +res = r.set("k", "v") +print(res) +# STEP_END +# REMOVE_START +assert res is True +# REMOVE_END +""" + + with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f: + f.write(test_content) + test_file = f.name + + ship_file = test_file.replace('.py', '.ipynb') + test_nb_file = test_file.replace('.py', '.test.ipynb') + try: + # Default (ship) mode: REMOVE content excluded, no test tags. + jupyterize(test_file, ship_file, verbose=False) + with open(ship_file) as f: + ship = json.load(f) + ship_src = ''.join(''.join(c['source']) for c in ship['cells']) + assert 'assert res is True' not in ship_src + assert 'r.delete' not in ship_src + assert all('test' not in c['metadata'].get('tags', []) for c in ship['cells']) + + # With tests: REMOVE blocks kept as cells tagged 'test', in order. + jupyterize(test_file, test_nb_file, verbose=False, with_tests=True) + with open(test_nb_file) as f: + tnb = json.load(f) + test_cells = [c for c in tnb['cells'] if 'test' in c['metadata'].get('tags', [])] + assert len(test_cells) == 2, f"expected 2 test cells, got {len(test_cells)}" + tnb_src = ''.join(''.join(c['source']) for c in tnb['cells']) + assert 'assert res is True' in tnb_src + assert 'r.delete' in tnb_src + # The shipped notebook is the test notebook minus the tagged cells. + non_test = [c for c in tnb['cells'] if 'test' not in c['metadata'].get('tags', [])] + assert len(non_test) == len(ship['cells']) + # Test cells carry no step metadata (so stripping leaves steps intact). + assert all('step' not in c['metadata'] for c in test_cells) + + print("✓ Keep-tests mode test passed") + + finally: + for p in (test_file, ship_file, test_nb_file): + if os.path.exists(p): + os.unlink(p) + + +def test_trailing_brace_orphans(): + """Orphan wrapper close-braces (in a later cell) are stripped, but balanced + block braces in the same example are preserved.""" + print("\nTesting orphan trailing-brace removal across cells...") + + # The class/method wrapper opens in the first cell; a teardown statement and + # the wrapper's closing braces land in a trailing context cell (not a + # braces-only cell, so it isn't skipped). A balanced for-loop sits in a step. + test_content = """// EXAMPLE: test_trailing_braces +import redis.clients.jedis.UnifiedJedis; + +public class TrailingBraceExample { + public void run() { + UnifiedJedis jedis = new UnifiedJedis("redis://localhost:6379"); + + // STEP_START loop + for (int i = 0; i < 2; i++) { + System.out.println(i); + } + // STEP_END + + jedis.close(); + } +} +""" + + with tempfile.NamedTemporaryFile(mode='w', suffix='.java', delete=False) as f: + f.write(test_content) + test_file = f.name + + try: + output_file = test_file.replace('.java', '.ipynb') + jupyterize(test_file, output_file, verbose=False) + + with open(output_file) as f: + nb = json.load(f) + + # No wrapper scaffolding survives anywhere. + all_src = '\n'.join(''.join(c['source']) for c in nb['cells']) + assert 'public class' not in all_src + assert 'public void run' not in all_src + + # The balanced for-loop keeps its own closing brace. + loop_cell = next(c for c in nb['cells'] + if 'for (int i' in ''.join(c['source'])) + loop_src = ''.join(loop_cell['source']) + assert loop_src.count('{') == loop_src.count('}'), \ + f"balanced loop braces altered: {loop_src!r}" + + # The teardown cell keeps jedis.close() but loses the orphan wrapper '}'. + close_cell = next(c for c in nb['cells'] + if 'jedis.close()' in ''.join(c['source'])) + close_src = ''.join(close_cell['source']).rstrip() + assert close_src.endswith('jedis.close();'), \ + f"orphan braces not stripped: {close_src!r}" + + # No kept cell is left brace-positive (orphan trailing closes). + for c in nb['cells']: + src = ''.join(c['source']) + assert src.count('}') <= src.count('{'), \ + f"cell has orphan closing braces: {src!r}" + + print("✓ Orphan trailing-brace removal test passed") + + finally: + if os.path.exists(test_file): + os.unlink(test_file) + if os.path.exists(output_file): + os.unlink(output_file) + + +def test_orphan_braces_ignore_strings(): + """Braces inside string/char literals must not skew orphan-brace removal.""" + print("\nTesting orphan-brace removal ignores string-literal braces...") + + from unwrapper import _net_braces, _strip_trailing_orphan_braces + + # A '}' in a string/char/comment is not a structural brace. + assert _net_braces('System.out.println("}");') == 0 + assert _net_braces("char c = '}';") == 0 + assert _net_braces('x = 1; // closes the } block') == 0 + # Real structural imbalance is still counted. + assert _net_braces('foo();\n}\n}') == 2 + + # A balanced loop whose body prints a '}' keeps its own closing brace. + balanced = 'for (int i = 0; i < 2; i++) {\n print("}");\n}' + assert _strip_trailing_orphan_braces(balanced) == balanced + + # Genuine orphan wrapper closes (with a string brace earlier) are stripped, + # but only the unmatched ones. + orphan = 'print("}");\njedis.close();\n}\n}' + assert _strip_trailing_orphan_braces(orphan) == 'print("}");\njedis.close();' + + print("✓ Orphan-brace string-literal test passed") + + if __name__ == '__main__': sys.exit(main()) diff --git a/build/jupyterize/unwrapper.py b/build/jupyterize/unwrapper.py index e58a3bb4ad..260bdb597a 100644 --- a/build/jupyterize/unwrapper.py +++ b/build/jupyterize/unwrapper.py @@ -141,6 +141,67 @@ def _remove_trailing_braces(code, count): return '\n'.join(result) +def _net_braces(code): + """ + Return ('}' count) - ('{' count), IGNORING braces inside string/char + literals ('...', "...", `...`) and line comments (# or //). A scanner rather + than a raw count, so a brace in a string (e.g. a JSON literal) doesn't skew + the balance and cause a real closing brace to be stripped. + """ + net = 0 + i, n = 0, len(code) + quote = None + while i < n: + ch = code[i] + if quote: + if ch == '\\' and quote != '`': + i += 2 + continue + if ch == quote: + quote = None + i += 1 + continue + if ch in ('"', "'", '`'): + quote = ch + elif ch == '#' or (ch == '/' and i + 1 < n and code[i + 1] == '/'): + while i < n and code[i] != '\n': # skip to end of line comment + i += 1 + continue + elif ch == '{': + net -= 1 + elif ch == '}': + net += 1 + i += 1 + return net + + +def _strip_trailing_orphan_braces(code): + """ + Strip orphan closing braces left when a class/method wrapper's opening was + removed from an earlier cell. + + Only CONTIGUOUS trailing lone-'}' lines are removed (stopping at the first + real content line), and at most as many as the cell has unmatched closes + (counted by _net_braces, which ignores braces in strings/comments). This + preserves the closing braces of balanced blocks (for/foreach/lambda bodies) + that legitimately sit inside the cell. + """ + net = _net_braces(code) + if net <= 0: + return code + + lines = code.split('\n') + while net > 0 and lines: + if lines[-1].strip() == '': + lines.pop() # drop trailing blank lines + elif re.match(r'^\s*\}\s*$', lines[-1]): + lines.pop() # drop an orphan closing brace + net -= 1 + else: + break # hit real content; stop + return '\n'.join(lines) + + class CodeUnwrapper: """Removes language-specific structural wrappers from code.""" @@ -223,5 +284,12 @@ def unwrap(self, code): logging.debug(f"Removing {braces_removed} trailing closing braces") code = _remove_trailing_braces(code, braces_removed) + # Strip any remaining orphan trailing closing braces. A class/method + # wrapper spans cells (opening braces in the first cell, closing braces + # in the last), so per-cell removal above leaves the trailing closes + # behind. Only contiguous trailing lone-'}' lines are removed, bounded by + # the cell's net brace imbalance, so balanced bodies keep their braces. + code = _strip_trailing_orphan_braces(code) + return code diff --git a/build/jupyterize/verify.py b/build/jupyterize/verify.py new file mode 100644 index 0000000000..bd18aad729 --- /dev/null +++ b/build/jupyterize/verify.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 +""" +verify.py - notebook verification harness. + +Executes a prebuilt Jupyter notebook (e.g. jupyterize's --with-tests output, in +which the REMOVE-block asserts are kept as 'test'-tagged cells) inside the real +BinderHub base image against the bundled Redis, and reports pass/fail. The +asserts are the oracle. + +verify.py does NOT parse source files - jupyterize is the single source of truth +for source -> notebook. Generate the notebook with jupyterize, then verify it +here, so what gets verified is exactly what ships. + +Host requirements: Docker + python3 stdlib only. The base image supplies the +kernel, redis-py, and redis-server. + +Usage: + python build/jupyterize/verify.py --notebook demo.test.ipynb --image + [--mode kernel|script] +""" + +import argparse +import json +import os +import subprocess +import sys +import tempfile + +# Kernel-less driver: exec each code cell in a shared namespace (same ordering +# and shared-state semantics a Jupyter kernel gives), capturing per-cell stdout +# and errors into the same executed-notebook shape report() expects. Used by +# --mode script, which avoids the Jupyter kernel entirely (the kernel's zmq +# handshake hangs under amd64 emulation on Apple Silicon; plain python is fine). +_DRIVER = r''' +import json, sys, io, contextlib, traceback +nb = json.load(open(sys.argv[1])) +ns = {} +for c in nb["cells"]: + if c["cell_type"] != "code": + continue + src = c["source"] + src = "".join(src) if isinstance(src, list) else src + buf = io.StringIO(); c["outputs"] = [] + try: + with contextlib.redirect_stdout(buf): + exec(compile(src, "", "exec"), ns) + except Exception as e: + if buf.getvalue(): + c["outputs"].append({"output_type": "stream", "name": "stdout", "text": buf.getvalue()}) + c["outputs"].append({"output_type": "error", "ename": type(e).__name__, + "evalue": str(e), "traceback": traceback.format_exc().splitlines()}) + continue + if buf.getvalue(): + c["outputs"].append({"output_type": "stream", "name": "stdout", "text": buf.getvalue()}) +json.dump(nb, open(sys.argv[2], "w")) +''' + +_KERNEL_CMD = ( + "jupyter nbconvert --to notebook --execute --allow-errors " + "--ExecutePreprocessor.startup_timeout=300 --ExecutePreprocessor.timeout=300 " + "--output executed.ipynb test.ipynb >/dev/null 2>&1" +) +_SCRIPT_CMD = "python /work/driver.py /work/test.ipynb /work/executed.ipynb" + +_START_REDIS = ( + "cd /usr/src/redis-src && ./redis-server ./redis.conf --daemonize yes " + ">/dev/null 2>&1 && sleep 1 && cd /work && " +) + + +def execute_in_image(notebook, image, mode): + """Run the notebook inside the base image; return the executed notebook dict.""" + with tempfile.TemporaryDirectory() as d: + with open(os.path.join(d, "test.ipynb"), "w", encoding="utf-8") as f: + json.dump(notebook, f) + if mode == "script": + with open(os.path.join(d, "driver.py"), "w", encoding="utf-8") as f: + f.write(_DRIVER) + script = _START_REDIS + (_SCRIPT_CMD if mode == "script" else _KERNEL_CMD) + cmd = [ + "docker", "run", "--rm", "--platform", "linux/amd64", + "-v", f"{d}:/work", image, "bash", "-c", script, + ] + subprocess.run(cmd, check=True) + with open(os.path.join(d, "executed.ipynb"), encoding="utf-8") as f: + return json.load(f) + + +def report(executed): + """Inspect executed cells; return (ok, failures). Print per-cell summary.""" + failures = [] + for i, c in enumerate(executed["cells"]): + if c["cell_type"] != "code": + continue + tags = c["metadata"].get("tags", []) + label = "TEST" if "test" in tags else (c["metadata"].get("step") or "setup") + err = next((o for o in c.get("outputs", []) if o.get("output_type") == "error"), None) + stdout = "".join( + "".join(o.get("text", "")) for o in c.get("outputs", []) + if o.get("output_type") == "stream" + ).strip() + if err: + failures.append((i, label, err["ename"], err.get("evalue", ""))) + print(f" ✗ cell {i:>2} [{label}] -> {err['ename']}: {err.get('evalue','')}") + else: + mark = "·" if "test" in tags else "✓" + extra = f" stdout: {stdout!r}" if stdout and "test" not in tags else "" + print(f" {mark} cell {i:>2} [{label}]{extra}") + return (not failures), failures + + +def main(): + ap = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + ap.add_argument("--notebook", metavar="PATH", required=True, + help="the .ipynb to execute and verify (e.g. jupyterize's " + "--with-tests output)") + ap.add_argument("--image", required=True, + help="base image to run the notebook in (the launcher " + "branch's Dockerfile FROM)") + ap.add_argument("--mode", choices=["kernel", "script"], default="kernel", + help="kernel: real nbconvert (CI/amd64). script: kernel-less " + "Python exec (local/Apple Silicon; Python notebooks only).") + args = ap.parse_args() + + with open(args.notebook, encoding="utf-8") as f: + test_nb = json.load(f) + img_name = args.image.split('@')[0].split('/')[-1] + print(f"Verifying {args.notebook} in {img_name} (mode={args.mode}) ...") + executed = execute_in_image(test_nb, args.image, args.mode) + ok, _ = report(executed) + print() + print("RESULT: PASS" if ok else "RESULT: FAIL") + return 0 if ok else 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/local_examples/time_series_tutorial/redis-py/dt_time_series.py b/local_examples/time_series_tutorial/redis-py/dt_time_series.py new file mode 100644 index 0000000000..4eed2a19b3 --- /dev/null +++ b/local_examples/time_series_tutorial/redis-py/dt_time_series.py @@ -0,0 +1,519 @@ +# EXAMPLE: time_series_tutorial +# BINDER_ID python-dt-timeseries +# HIDE_START +""" +Code samples for time series page: + https://redis.io/docs/latest/develop/data-types/timeseries/ +""" + +import redis + +r = redis.Redis(decode_responses=True) +# HIDE_END + +# REMOVE_START +r.delete( + "thermometer:1", "thermometer:2", "thermometer:3", + "rg:1", "rg:2", "rg:3", "rg:4", + "sensor3", + "wind:1", "wind:2", "wind:3", "wind:4", + "hyg:1", "hyg:compacted" +) +# REMOVE_END + +# STEP_START create +res1 = r.ts().create("thermometer:1") +print(res1) # >>> True + +res2 = r.type("thermometer:1") +print(res2) # >>> TSDB-TYPE + +res3 = r.ts().info("thermometer:1") +print(res3) +# >>> {'rules': [], ... 'total_samples': 0, ... +# STEP_END +# REMOVE_START +assert res1 is True +assert res2 == "TSDB-TYPE" +assert res3["total_samples"] == 0 +# REMOVE_END + +# STEP_START create_retention +res4 = r.ts().add("thermometer:2", 1, 10.8, retention_msecs=100) +print(res4) # >>> 1 + +res5 = r.ts().info("thermometer:2") +print(res5) +# >>> {'rules': [], ... 'retention_msecs': 100, ... +# STEP_END +# REMOVE_START +assert res4 == 1 +assert res5["retention_msecs"] == 100 +# REMOVE_END + +# STEP_START create_labels +res6 = r.ts().create( + "thermometer:3", 1, 10.4, + labels={"location": "UK", "type": "Mercury"} +) +print(res6) # >>> 1 + +res7 = r.ts().info("thermometer:3") +print(res7) +# >>> {'rules': [], ... 'labels': {'location': 'UK', 'type': 'Mercury'}, ... +# STEP_END +# REMOVE_START +assert res6 == 1 +assert res7["labels"] == {"location": "UK", "type": "Mercury"} +# REMOVE_END + +# STEP_START madd +res8 = r.ts().madd([ + ("thermometer:1", 1, 9.2), + ("thermometer:1", 2, 9.9), + ("thermometer:2", 2, 10.3) +]) +print(res8) # >>> [1, 2, 2] +# STEP_END +# REMOVE_START +assert res8 == [1, 2, 2] +# REMOVE_END + +# STEP_START get +# The last recorded temperature for thermometer:2 +# was 10.3 at time 2. +res9 = r.ts().get("thermometer:2") +print(res9) # >>> (2, 10.3) +# STEP_END +# REMOVE_START +assert res9 == (2, 10.3) +# REMOVE_END + +# STEP_START range +# Add 5 data points to a time series named "rg:1". +res10 = r.ts().create("rg:1") +print(res10) # >>> True + +res11 = r.ts().madd([ + ("rg:1", 0, 18), + ("rg:1", 1, 14), + ("rg:1", 2, 22), + ("rg:1", 3, 18), + ("rg:1", 4, 24), +]) +print(res11) # >>> [0, 1, 2, 3, 4] + +# Retrieve all the data points in ascending order. +res12 = r.ts().range("rg:1", "-", "+") +print(res12) # >>> [(0, 18.0), (1, 14.0), (2, 22.0), (3, 18.0), (4, 24.0)] + +# Retrieve data points up to time 1 (inclusive). +res13 = r.ts().range("rg:1", "-", 1) +print(res13) # >>> [(0, 18.0), (1, 14.0)] + +# Retrieve data points from time 3 onwards. +res14 = r.ts().range("rg:1", 3, "+") +print(res14) # >>> [(3, 18.0), (4, 24.0)] + +# Retrieve all the data points in descending order. +res15 = r.ts().revrange("rg:1", "-", "+") +print(res15) # >>> [(4, 24.0), (3, 18.0), (2, 22.0), (1, 14.0), (0, 18.0)] + +# Retrieve data points up to time 1 (inclusive), but return them +# in descending order. +res16 = r.ts().revrange("rg:1", "-", 1) +print(res16) # >>> [(1, 14.0), (0, 18.0)] +# STEP_END +# REMOVE_START +assert res10 is True +assert res11 == [0, 1, 2, 3, 4] +assert res12 == [(0, 18.0), (1, 14.0), (2, 22.0), (3, 18.0), (4, 24.0)] +assert res13 == [(0, 18.0), (1, 14.0)] +assert res14 == [(3, 18.0), (4, 24.0)] +assert res15 == [(4, 24.0), (3, 18.0), (2, 22.0), (1, 14.0), (0, 18.0)] +assert res16 == [(1, 14.0), (0, 18.0)] +# REMOVE_END + +# STEP_START range_filter +res17 = r.ts().range("rg:1", "-", "+", filter_by_ts=[0, 2, 4]) +print(res17) # >>> [(0, 18.0), (2, 22.0), (4, 24.0)] + +res18 = r.ts().revrange( + "rg:1", "-", "+", + filter_by_ts=[0, 2, 4], + filter_by_min_value=20, + filter_by_max_value=25, +) +print(res18) # >>> [(4, 24.0), (2, 22.0)] + +res19 = r.ts().revrange( + "rg:1", "-", "+", + filter_by_ts=[0, 2, 4], + filter_by_min_value=22, + filter_by_max_value=22, + count=1, +) +print(res19) # >>> [(2, 22.0)] +# STEP_END +# REMOVE_START +assert res17 == [(0, 18.0), (2, 22.0), (4, 24.0)] +assert res18 == [(4, 24.0), (2, 22.0)] +assert res19 == [(2, 22.0)] +# REMOVE_END + +# STEP_START query_multi +# Create three new "rg:" time series (two in the US +# and one in the UK, with different units) and add some +# data points. +res20 = r.ts().create( + "rg:2", + labels={"location": "us", "unit": "cm"}, +) +print(res20) # >>> True + +res21 = r.ts().create( + "rg:3", + labels={"location": "us", "unit": "in"}, +) +print(res21) # >>> True + +res22 = r.ts().create( + "rg:4", + labels={"location": "uk", "unit": "mm"}, +) +print(res22) # >>> True + +res23 = r.ts().madd([ + ("rg:2", 0, 1.8), + ("rg:3", 0, 0.9), + ("rg:4", 0, 25), +]) +print(res23) # >>> [0, 0, 0] + +res24 = r.ts().madd([ + ("rg:2", 1, 2.1), + ("rg:3", 1, 0.77), + ("rg:4", 1, 18), +]) +print(res24) # >>> [1, 1, 1] + +res25 = r.ts().madd([ + ("rg:2", 2, 2.3), + ("rg:3", 2, 1.1), + ("rg:4", 2, 21), +]) +print(res25) # >>> [2, 2, 2] + +res26 = r.ts().madd([ + ("rg:2", 3, 1.9), + ("rg:3", 3, 0.81), + ("rg:4", 3, 19), +]) +print(res26) # >>> [3, 3, 3] + +res27 = r.ts().madd([ + ("rg:2", 4, 1.78), + ("rg:3", 4, 0.74), + ("rg:4", 4, 23), +]) +print(res27) # >>> [4, 4, 4] + +# Retrieve the last data point from each US time series. If +# you don't specify any labels, an empty array is returned +# for the labels. +res28 = r.ts().mget(["location=us"]) +print(res28) # >>> [{'rg:2': [{}, 4, 1.78]}, {'rg:3': [{}, 4, 0.74]}] + +# Retrieve the same data points, but include the `unit` +# label in the results. +res29 = r.ts().mget(["location=us"], select_labels=["unit"]) +print(res29) +# >>> [{'rg:2': [{'unit': 'cm'}, 4, 1.78]}, {'rg:3': [{'unit': 'in'}, 4, 0.74]}] + +# Retrieve data points up to time 2 (inclusive) from all +# time series that use millimeters as the unit. Include all +# labels in the results. +res30 = r.ts().mrange( + "-", 2, filters=["unit=mm"], with_labels=True +) +print(res30) +# >>> [{'rg:4': [{'location': 'uk', 'unit': 'mm'}, [(0, 25.4),... + +# Retrieve data points from time 1 to time 3 (inclusive) from +# all time series that use centimeters or millimeters as the unit, +# but only return the `location` label. Return the results +# in descending order of timestamp. +res31 = r.ts().mrevrange( + 1, 3, filters=["unit=(cm,mm)"], select_labels=["location"] +) +print(res31) +# >>> [[{'location': 'uk'}, (3, 19.0), (2, 21.0), (1, 18.0)],... +# STEP_END +# REMOVE_START +assert res20 is True +assert res21 is True +assert res22 is True +assert res23 == [0, 0, 0] +assert res24 == [1, 1, 1] +assert res25 == [2, 2, 2] +assert res26 == [3, 3, 3] +assert res27 == [4, 4, 4] +assert res28 == [{'rg:2': [{}, 4, 1.78]}, {'rg:3': [{}, 4, 0.74]}] +assert res29 == [ + {'rg:2': [{'unit': 'cm'}, 4, 1.78]}, + {'rg:3': [{'unit': 'in'}, 4, 0.74]} +] +assert res30 == [ + { + 'rg:4': [ + {'location': 'uk', 'unit': 'mm'}, + [(0, 25), (1, 18.0), (2, 21.0)] + ] + } +] +assert res31 == [ + {'rg:2': [{'location': 'us'}, [(3, 1.9), (2, 2.3), (1, 2.1)]]}, + {'rg:4': [{'location': 'uk'}, [(3, 19.0), (2, 21.0), (1, 18.0)]]} +] +# REMOVE_END + +# STEP_START agg +res32 = r.ts().range( + "rg:2", "-", "+", + aggregation_type="avg", + bucket_size_msec=2 +) +print(res32) +# >>> [(0, 1.9500000000000002), (2, 2.0999999999999996), (4, 1.78)] +# STEP_END +# REMOVE_START +assert res32 == [ + (0, 1.9500000000000002), (2, 2.0999999999999996), + (4, 1.78) +] +# REMOVE_END + +# STEP_START agg_bucket +res33 = r.ts().create("sensor3") +print(res33) # >>> True + +res34 = r.ts().madd([ + ("sensor3", 10, 1000), + ("sensor3", 20, 2000), + ("sensor3", 30, 3000), + ("sensor3", 40, 4000), + ("sensor3", 50, 5000), + ("sensor3", 60, 6000), + ("sensor3", 70, 7000), +]) +print(res34) # >>> [10, 20, 30, 40, 50, 60, 70] + +res35 = r.ts().range( + "sensor3", 10, 70, + aggregation_type="min", + bucket_size_msec=25 +) +print(res35) +# >>> [(0, 1000.0), (25, 3000.0), (50, 5000.0)] +# STEP_END +# REMOVE_START +assert res33 is True +assert res34 == [10, 20, 30, 40, 50, 60, 70] +assert res35 == [(0, 1000.0), (25, 3000.0), (50, 5000.0)] +# REMOVE_END + +# STEP_START agg_align +res36 = r.ts().range( + "sensor3", 10, 70, + aggregation_type="min", + bucket_size_msec=25, + align="START" +) +print(res36) +# >>> [(10, 1000.0), (35, 4000.0), (60, 6000.0)] +# STEP_END +# REMOVE_START +assert res36 == [(10, 1000.0), (35, 4000.0), (60, 6000.0)] +# REMOVE_END + +# STEP_START agg_multi +res37 = r.ts().create( + "wind:1", + labels={"country": "uk"} +) +print(res37) # >>> True + +res38 = r.ts().create( + "wind:2", + labels={"country": "uk"} +) +print(res38) # >>> True + +res39 = r.ts().create( + "wind:3", + labels={"country": "us"} +) +print(res39) # >>> True + +res40 = r.ts().create( + "wind:4", + labels={"country": "us"} +) +print(res40) # >>> True + +res41 = r.ts().madd([ + ("wind:1", 1, 12), + ("wind:2", 1, 18), + ("wind:3", 1, 5), + ("wind:4", 1, 20), +]) +print(res41) # >>> [1, 1, 1, 1] + +res42 = r.ts().madd([ + ("wind:1", 2, 14), + ("wind:2", 2, 21), + ("wind:3", 2, 4), + ("wind:4", 2, 25), +]) +print(res42) # >>> [2, 2, 2, 2] + +res43 = r.ts().madd([ + ("wind:1", 3, 10), + ("wind:2", 3, 24), + ("wind:3", 3, 8), + ("wind:4", 3, 18), +]) +print(res43) # >>> [3, 3, 3, 3] + +# The result pairs contain the timestamp and the maximum sample value +# for the country at that timestamp. +res44 = r.ts().mrange( + "-", "+", + filters=["country=(us,uk)"], + groupby="country", + reduce="max" +) +print(res44) +# >>> [{'country=uk': [{}, [(1, 18.0), (2, 21.0), (3, 24.0)]]}, ... + +# The result pairs contain the timestamp and the average sample value +# for the country at that timestamp. +res45 = r.ts().mrange( + "-", "+", + filters=["country=(us,uk)"], + groupby="country", + reduce="avg" +) +print(res45) +# >>> [{'country=uk': [{}, [(1, 15.0), (2, 17.5), (3, 17.0)]]}, ... +# STEP_END +# REMOVE_START +assert res37 is True +assert res38 is True +assert res39 is True +assert res40 is True +assert res41 == [1, 1, 1, 1] +assert res42 == [2, 2, 2, 2] +assert res43 == [3, 3, 3, 3] +assert res44 == [ + {'country=uk': [{}, [(1, 18.0), (2, 21.0), (3, 24.0)]]}, + {'country=us': [{}, [(1, 20.0), (2, 25.0), (3, 18.0)]]} +] +assert res45 == [ + {'country=uk': [{}, [(1, 15.0), (2, 17.5), (3, 17.0)]]}, + {'country=us': [{}, [(1, 12.5), (2, 14.5), (3, 13.0)]]} +] +# REMOVE_END + +# STEP_START create_compaction +res45 = r.ts().create("hyg:1") +print(res45) # >>> True + +res46 = r.ts().create("hyg:compacted") +print(res46) # >>> True + +res47 = r.ts().createrule("hyg:1", "hyg:compacted", "min", 3) +print(res47) # >>> True + +res48 = r.ts().info("hyg:1") +print(res48.rules) +# >>> [['hyg:compacted', 3, 'MIN', 0]] + +res49 = r.ts().info("hyg:compacted") +print(res49.source_key) # >>> 'hyg:1' +# STEP_END +# REMOVE_START +assert res45 is True +assert res46 is True +assert res47 is True +assert res48.rules == [['hyg:compacted', 3, 'MIN', 0]] +assert res49.source_key == 'hyg:1' +# REMOVE_END + +# STEP_START comp_add +res50 = r.ts().madd([ + ("hyg:1", 0, 75), + ("hyg:1", 1, 77), + ("hyg:1", 2, 78), +]) +print(res50) # >>> [0, 1, 2] + +res51 = r.ts().range("hyg:compacted", "-", "+") +print(res51) # >>> [] + +res52 = r.ts().add("hyg:1", 3, 79) +print(res52) # >>> 3 + +res53 = r.ts().range("hyg:compacted", "-", "+") +print(res53) # >>> [(0, 75.0)] +# STEP_END +# REMOVE_START +assert res50 == [0, 1, 2] +assert res51 == [] +assert res52 == 3 +assert res53 == [(0, 75.0)] +# REMOVE_END + +# STEP_START del +res54 = r.ts().info("thermometer:1") +print(res54.total_samples) # >>> 2 +print(res54.first_timestamp) # >>> 1 +print(res54.last_timestamp) # >>> 2 + +res55 = r.ts().add("thermometer:1", 3, 9.7) +print(res55) # >>> 3 + +res56 = r.ts().info("thermometer:1") +print(res56.total_samples) # >>> 3 +print(res56.first_timestamp) # >>> 1 +print(res56.last_timestamp) # >>> 3 + +res57 = r.ts().delete("thermometer:1", 1, 2) +print(res57) # >>> 2 + +res58 = r.ts().info("thermometer:1") +print(res58.total_samples) # >>> 1 +print(res58.first_timestamp) # >>> 3 +print(res58.last_timestamp) # >>> 3 + +res59 = r.ts().delete("thermometer:1", 3, 3) +print(res59) # >>> 1 + +res60 = r.ts().info("thermometer:1") +print(res60.total_samples) # >>> 0 +# STEP_END +# REMOVE_START +assert res54.total_samples == 2 +assert res54.first_timestamp == 1 +assert res54.last_timestamp == 2 +assert res55 == 3 +assert res56.total_samples == 3 +assert res56.first_timestamp == 1 +assert res56.last_timestamp == 3 +assert res57 == 2 +assert res58.total_samples == 1 +assert res58.first_timestamp == 3 +assert res58.last_timestamp == 3 +assert res59 == 1 +assert res60.total_samples == 0 +# REMOVE_END diff --git a/local_examples/vecset_tutorial/redis-py/dt_vec_set.py b/local_examples/vecset_tutorial/redis-py/dt_vec_set.py new file mode 100644 index 0000000000..c40227b295 --- /dev/null +++ b/local_examples/vecset_tutorial/redis-py/dt_vec_set.py @@ -0,0 +1,288 @@ +# EXAMPLE: vecset_tutorial +# BINDER_ID python-dt-vector-set +# HIDE_START +""" +Code samples for Vector set doc pages: + https://redis.io/docs/latest/develop/data-types/vector-sets/ +""" + +import redis + +from redis.commands.vectorset.commands import ( + QuantizationOptions +) + +r = redis.Redis(decode_responses=True) +# HIDE_END + +# REMOVE_START +r.delete( + "points", "quantSetQ8", "quantSetNoQ", + "quantSetBin", "setNotReduced", "setReduced" +) +# REMOVE_END + +# STEP_START vadd +res1 = r.vset().vadd("points", [1.0, 1.0], "pt:A") +print(res1) # >>> 1 + +res2 = r.vset().vadd("points", [-1.0, -1.0], "pt:B") +print(res2) # >>> 1 + +res3 = r.vset().vadd("points", [-1.0, 1.0], "pt:C") +print(res3) # >>> 1 + +res4 = r.vset().vadd("points", [1.0, -1.0], "pt:D") +print(res4) # >>> 1 + +res5 = r.vset().vadd("points", [1.0, 0], "pt:E") +print(res5) # >>> 1 + +res6 = r.type("points") +print(res6) # >>> vectorset +# STEP_END +# REMOVE_START +assert res1 == 1 +assert res2 == 1 +assert res3 == 1 +assert res4 == 1 +assert res5 == 1 + +assert res6 == "vectorset" +# REMOVE_END + +# STEP_START vcardvdim +res7 = r.vset().vcard("points") +print(res7) # >>> 5 + +res8 = r.vset().vdim("points") +print(res8) # >>> 2 +# STEP_END +# REMOVE_START +assert res7 == 5 +assert res8 == 2 +# REMOVE_END + +# STEP_START vemb +res9 = r.vset().vemb("points", "pt:A") +print(res9) # >>> [0.9999999403953552, 0.9999999403953552] + +res10 = r.vset().vemb("points", "pt:B") +print(res10) # >>> [-0.9999999403953552, -0.9999999403953552] + +res11 = r.vset().vemb("points", "pt:C") +print(res11) # >>> [-0.9999999403953552, 0.9999999403953552] + +res12 = r.vset().vemb("points", "pt:D") +print(res12) # >>> [0.9999999403953552, -0.9999999403953552] + +res13 = r.vset().vemb("points", "pt:E") +print(res13) # >>> [1, 0] +# STEP_END +# REMOVE_START +assert 1 - res9[0] < 0.001 +assert 1 - res9[1] < 0.001 +assert 1 + res10[0] < 0.001 +assert 1 + res10[1] < 0.001 +assert 1 + res11[0] < 0.001 +assert 1 - res11[1] < 0.001 +assert 1 - res12[0] < 0.001 +assert 1 + res12[1] < 0.001 +assert res13 == [1, 0] +# REMOVE_END + +# STEP_START attr +res14 = r.vset().vsetattr("points", "pt:A", { + "name": "Point A", + "description": "First point added" +}) +print(res14) # >>> 1 + +res15 = r.vset().vgetattr("points", "pt:A") +print(res15) +# >>> {'name': 'Point A', 'description': 'First point added'} + +res16 = r.vset().vsetattr("points", "pt:A", "") +print(res16) # >>> 1 + +res17 = r.vset().vgetattr("points", "pt:A") +print(res17) # >>> None +# STEP_END +# REMOVE_START +assert res14 == 1 +assert res15 == {"name": "Point A", "description": "First point added"} +assert res16 == 1 +assert res17 is None +# REMOVE_END + +# STEP_START vrem +res18 = r.vset().vadd("points", [0, 0], "pt:F") +print(res18) # >>> 1 + +res19 = r.vset().vcard("points") +print(res19) # >>> 6 + +res20 = r.vset().vrem("points", "pt:F") +print(res20) # >>> 1 + +res21 = r.vset().vcard("points") +print(res21) # >>> 5 +# STEP_END +# REMOVE_START +assert res18 == 1 +assert res19 == 6 +assert res20 == 1 +assert res21 == 5 +# REMOVE_END + +# STEP_START vsim_basic +res22 = r.vset().vsim("points", [0.9, 0.1]) +print(res22) +# >>> ['pt:E', 'pt:A', 'pt:D', 'pt:C', 'pt:B'] +# STEP_END +# REMOVE_START +assert res22 == ["pt:E", "pt:A", "pt:D", "pt:C", "pt:B"] +# REMOVE_END + +# STEP_START vsim_options +res23 = r.vset().vsim( + "points", "pt:A", + with_scores=True, + count=4 +) +print(res23) +# >>> {'pt:A': 1.0, 'pt:E': 0.8535534143447876, 'pt:D': 0.5, 'pt:C': 0.5} +# STEP_END +# REMOVE_START +assert res23["pt:A"] == 1.0 +assert res23["pt:C"] == 0.5 +assert res23["pt:D"] == 0.5 +assert res23["pt:E"] - 0.85 < 0.005 +# REMOVE_END + +# STEP_START vsim_filter +res24 = r.vset().vsetattr("points", "pt:A", { + "size": "large", + "price": 18.99 +}) +print(res24) # >>> 1 + +res25 = r.vset().vsetattr("points", "pt:B", { + "size": "large", + "price": 35.99 +}) +print(res25) # >>> 1 + +res26 = r.vset().vsetattr("points", "pt:C", { + "size": "large", + "price": 25.99 +}) +print(res26) # >>> 1 + +res27 = r.vset().vsetattr("points", "pt:D", { + "size": "small", + "price": 21.00 +}) +print(res27) # >>> 1 + +res28 = r.vset().vsetattr("points", "pt:E", { + "size": "small", + "price": 17.75 +}) +print(res28) # >>> 1 + +# Return elements in order of distance from point A whose +# `size` attribute is `large`. +res29 = r.vset().vsim( + "points", "pt:A", + filter='.size == "large"' +) +print(res29) # >>> ['pt:A', 'pt:C', 'pt:B'] + +# Return elements in order of distance from point A whose size is +# `large` and whose price is greater than 20.00. +res30 = r.vset().vsim( + "points", "pt:A", + filter='.size == "large" && .price > 20.00' +) +print(res30) # >>> ['pt:C', 'pt:B'] +# STEP_END +# REMOVE_START +assert res24 == 1 +assert res25 == 1 +assert res26 == 1 +assert res27 == 1 +assert res28 == 1 + +assert res29 == ['pt:A', 'pt:C', 'pt:B'] +assert res30 == ['pt:C', 'pt:B'] +# REMOVE_END + +# STEP_START add_quant +# Import `QuantizationOptions` enum using: +# +# from redis.commands.vectorset.commands import ( +# QuantizationOptions +# ) +res31 = r.vset().vadd( + "quantSetQ8", [1.262185, 1.958231], + "quantElement", + quantization=QuantizationOptions.Q8 +) +print(res31) # >>> 1 + +res32 = r.vset().vemb("quantSetQ8", "quantElement") +print(f"Q8: {res32}") +# >>> Q8: [1.2643694877624512, 1.958230972290039] + +res33 = r.vset().vadd( + "quantSetNoQ", [1.262185, 1.958231], + "quantElement", + quantization=QuantizationOptions.NOQUANT +) +print(res33) # >>> 1 + +res34 = r.vset().vemb("quantSetNoQ", "quantElement") +print(f"NOQUANT: {res34}") +# >>> NOQUANT: [1.262184977531433, 1.958230972290039] + +res35 = r.vset().vadd( + "quantSetBin", [1.262185, 1.958231], + "quantElement", + quantization=QuantizationOptions.BIN +) +print(res35) # >>> 1 + +res36 = r.vset().vemb("quantSetBin", "quantElement") +print(f"BIN: {res36}") +# >>> BIN: [1, 1] +# STEP_END +# REMOVE_START +assert res31 == 1 +# REMOVE_END + +# STEP_START add_reduce +# Create a list of 300 arbitrary values. +values = [x / 299 for x in range(300)] + +res37 = r.vset().vadd( + "setNotReduced", + values, + "element" +) +print(res37) # >>> 1 + +res38 = r.vset().vdim("setNotReduced") +print(res38) # >>> 300 + +res39 = r.vset().vadd( + "setReduced", + values, + "element", + reduce_dim=100 +) +print(res39) # >>> 1 + +res40 = r.vset().vdim("setReduced") # >>> 100 +print(res40) +# STEP_END