From 0ca2f7e76cf066bd8bf23d1d45e00e325fb1cd61 Mon Sep 17 00:00:00 2001 From: Ben Kaye Date: Tue, 23 Jun 2026 13:26:37 +0100 Subject: [PATCH 1/2] Add Python bindings Cython extension wrapping the lite3 C library, with packaging (setup.py/pyproject.toml), tests, and README. --- bindings/python/.gitignore | 8 + bindings/python/MANIFEST.in | 3 + bindings/python/README.md | 184 ++++++++++ bindings/python/lite3/__init__.py | 31 ++ bindings/python/lite3/_core.pyx | 452 ++++++++++++++++++++++++ bindings/python/pyproject.toml | 15 + bindings/python/setup.py | 43 +++ bindings/python/src/lite3_shim.c | 106 ++++++ bindings/python/src/lite3_shim.h | 90 +++++ bindings/python/tests/test_roundtrip.py | 86 +++++ bindings/python/tests/test_writes.py | 130 +++++++ 11 files changed, 1148 insertions(+) create mode 100644 bindings/python/.gitignore create mode 100644 bindings/python/MANIFEST.in create mode 100644 bindings/python/README.md create mode 100644 bindings/python/lite3/__init__.py create mode 100644 bindings/python/lite3/_core.pyx create mode 100644 bindings/python/pyproject.toml create mode 100644 bindings/python/setup.py create mode 100644 bindings/python/src/lite3_shim.c create mode 100644 bindings/python/src/lite3_shim.h create mode 100644 bindings/python/tests/test_roundtrip.py create mode 100644 bindings/python/tests/test_writes.py diff --git a/bindings/python/.gitignore b/bindings/python/.gitignore new file mode 100644 index 0000000..4197bc8 --- /dev/null +++ b/bindings/python/.gitignore @@ -0,0 +1,8 @@ +.venv/ +build/ +*.o +*.so +*.egg-info/ +__pycache__/ +lite3/_core.c +_vendor/ diff --git a/bindings/python/MANIFEST.in b/bindings/python/MANIFEST.in new file mode 100644 index 0000000..d8077b3 --- /dev/null +++ b/bindings/python/MANIFEST.in @@ -0,0 +1,3 @@ +recursive-include _vendor *.c *.h LICENSE +recursive-include src *.c *.h +include lite3/_core.pyx diff --git a/bindings/python/README.md b/bindings/python/README.md new file mode 100644 index 0000000..3190593 --- /dev/null +++ b/bindings/python/README.md @@ -0,0 +1,184 @@ +# lite3 — Python binding + +Zero-copy reads over the Lite³ wire format, plus typed writes. Build or receive a +message, then index it like a `dict`/`list` — fields are read straight out of the +serialized buffer on demand (lazy proxies, never a hydrated copy). + +## Install + +Builds the C core (lite3 + bundled yyjson) as a Cython extension: + +```sh +cd bindings/python +python3 -m venv .venv # optional, but recommended +.venv/bin/pip install . # or `-e .` for an editable dev install +``` + +Requires a C compiler, Python ≥ 3.11, and Cython ≥ 3 (pulled in by the build). +The build compiles the lite3 sources directly and does **not** use the repo +`Makefile`. + +At build time the C core (`src/`, `lib/`, `include/` from the repo root) is +copied into a local `_vendor/` so the package is self-contained — `pip install .`, +`sdist`, and `cibuildwheel` all work without reaching outside the package. The +repo root stays the single source of truth; `_vendor/` is generated and gitignored. + +## Quickstart + +```python +from lite3 import Lite3 + +# construct +msg = Lite3.from_dict({"event": "ping", "headers": {"id": "req_9f"}, "tags": ["a", "b"]}) +msg = Lite3() # empty object, then msg["k"] = v +arr = Lite3.new_array() # empty array, then arr.root().append(v) + +# wire protocol +wire = msg.to_bytes() # copy of the buffer +sock.send(memoryview(msg)) # zero-copy send — no serialize step +got = Lite3.from_bytes(wire) # ingest a received buffer + +# read (lazy — only touched fields are read) +got["event"] # "ping" +got["headers"]["id"] # "req_9f" — nested proxy, still no copy +got["tags"][1] # "b" — array proxy + +# write (typed): None/bool/int/float/str/bytes/dict/list +got["hops"] = 1 # then forward got.to_bytes() +arr.root().append({"k": "v"}) +arr.root()[0] = 100 # array index overwrite +``` + +`memoryview(msg)` *is* the on-wire format. Send it on a socket, mmap it, hand it +to another process — no encode pass. + +## API + +Construct: + +| | | +|---|---| +| `Lite3()` | empty object, ready for `msg[key] = v` | +| `Lite3.new_array()` | empty array, use `.root().append(...)` | +| `Lite3.from_dict(d)` | build from a Python dict/list (routes through JSON) | +| `Lite3.from_json(s)` | build from a JSON `str` or `bytes` | +| `Lite3.from_bytes(b)` | ingest a received Lite³ buffer (copies it in) | + +Read (on `Lite3` and the nested `_ObjView` / `_ArrView` proxies): + +| | | +|---|---| +| `msg[key]` / `msg[i]` | lazy field read; raises `KeyError`/`IndexError` if absent | +| `obj.get(key, default)` | object only; no raise | +| `key in obj` | membership | +| `len(x)` | entry/element count | +| `obj.keys()`, `obj.items()`, `iter(obj)` | object enumeration | +| `iter(arr)` | array iteration | + +Write: + +| | | +|---|---| +| `obj[key] = v` | set/overwrite object field (recurses for dict/list) | +| `arr[i] = v` | overwrite array element (index must be `< len`) | +| `arr.append(v)`, `arr.extend(vs)` | grow an array | + +Serialize: + +| | | +|---|---| +| `bytes(memoryview(msg))` | zero-copy wire bytes | +| `msg.to_bytes()` | copy of the wire bytes | +| `msg.to_dict()` / `msg.to_json()` | full hydration (slow paths) | + +## Semantics & limits + +- **Lifetime.** Proxies and the `memoryview` borrow the `Lite3` buffer — keep the + `Lite3` alive while you hold them, or you read freed memory. +- **Writes can relocate buffer nodes.** A held `msg["a"]` proxy may go stale + after a mutation — re-fetch views from the root after writing. +- **`bytes` is binary-path only.** JSON has no bytes type, so `from_dict`/ + `to_dict` base64-encode it. `bytes` round-trips losslessly only via typed + write + `from_bytes`/typed read. `to_dict`/`to_json` are explicit slow paths. +- **Overwriting a longer string/bytes grows the buffer and never reclaims the + old space** (a Lite³ property, not a binding bug). +- **Types**: `None`, `bool`, `int` (i64), `float` (f64), `str`, `bytes`, nested + objects/arrays. Object keys must be strings. + +## Tests + +```sh +.venv/bin/python tests/test_roundtrip.py # reads + dict/json round-trip + fuzz +.venv/bin/python tests/test_writes.py # writes, from_bytes, fuzz +``` + +--- + +# Maintainer's guide + +## Architecture + +Three layers. The middle one — the C shim — is what makes the binding possible. + +``` +lite3/_core.pyx Cython: Lite3 + _ObjView/_ArrView proxies, type dispatch + │ (compiled to an importable .so) +src/lite3_shim.c thin C file: #includes the headers so the macros expand HERE, + │ re-exports them as plain extern functions +../../src/*.c the Lite³ library, compiled in unchanged (+ ../../lib/**) +``` + +**Why the shim exists.** Lite³'s ergonomic API (`lite3_ctx_set_str`, +`lite3_ctx_get_i64`, the `LITE3_KEY_DATA` key hasher, the auto-grow retry loops) +is **C preprocessor macros and `static inline` functions** in +`include/lite3_context_api.h`. Those never become symbols in the compiled +`.a`/`.so` — they exist only at compile time. They are therefore unreachable +from `ctypes`/`cffi` over a prebuilt library, and the key-hash/grow logic must +not be reimplemented in Python — doing so reintroduces a class of bug Lite³ has +already had to fix. The shim is a C file, so when it calls `lite3_ctx_set_i64` +the macro expands normally; it wraps that in a real exported function the binding +calls. All the tricky logic stays in tested C. + +## Changing the binding when the C API changes + +Adding or fixing a wrapped call is mechanical — touch **three places in +lockstep**: + +1. `src/lite3_shim.h` — declare the plain function, e.g. + `int l3_get_i64(lite3_ctx *c, size_t ofs, const char *key, int64_t *out);` +2. `src/lite3_shim.c` — one-line body calling the macro/inline: + `{ return lite3_ctx_get_i64(c, ofs, key, out); }` +3. `lite3/_core.pyx` — add the matching line inside `cdef extern from + "lite3_shim.h"`, then call it from a `Lite3`/`_ObjView`/`_ArrView` method. + +Then `.venv/bin/pip install -e .` to rebuild. If a C signature changes upstream, +fix it in steps 1–2 (and the `extern` in 3); the Python API stays stable. + +Shim notes: +- `lite3_ctx_set_obj`/`set_arr` are macros containing `return` statements — they + work inside a shim function returning `int` (early returns propagate). +- For strings/bytes the shim returns `(ptr, len)` via `LITE3_STR`/`LITE3_BYTES` + (the generational-pointer safe-access macros); Cython copies into a Python + object immediately. +- Object-key gotcha: iterator keys are NUL-terminated C strings; `key.len` is + **not** the byte length. Read to the NUL (`ptr`), not `ptr[:len]`. + +## Version coupling — important for the wire format + +This binding is **coupled to the Lite³ source it is compiled against** +(`setup.py` globs `../../src/*.c`). Proxies walk byte offsets in the buffer and +`from_bytes`/`to_bytes` are the raw binary format, so: + +- Within one build, producer and consumer always match (same C compiled in). +- **Across versions, compatibility is not guaranteed.** The library README + states the API is unstable, and roadmap items — *"built-in defragmentation + with GC-index"* and *"write formal spec"* — could change the on-wire buffer + layout. Bytes written by one version may not read under another. + +Built against **Lite³ v1.0.0** (`lite3.pc`), repo commit `7b62398`. Record the +lite3 commit/version any distributed build corresponds to, so a format change +doesn't silently produce buffers an older consumer can't read. + +## Publish to PyPI +Set up ``cibuildwheel`` CI. + diff --git a/bindings/python/lite3/__init__.py b/bindings/python/lite3/__init__.py new file mode 100644 index 0000000..7db85c7 --- /dev/null +++ b/bindings/python/lite3/__init__.py @@ -0,0 +1,31 @@ +"""Lite³ — zero-copy serialization, Python binding. + +Build / construct: + from lite3 import Lite3 + msg = Lite3.from_dict({"event": "ping", "n": 3}) # via JSON (slow path) + msg = Lite3() # empty object; then msg["k"] = v + arr = Lite3.new_array() # empty array; then arr.root().append(v) + +Wire protocol: + wire = msg.to_bytes() # copy of the buffer + sock.send(memoryview(msg)) # zero-copy send (no serialize step) + got = Lite3.from_bytes(wire) # ingest a received buffer + +Read (lazy, straight from the buffer — no parsing): + got["event"] # -> "ping" + got["headers"]["content-type"] # nested proxy + got["tags"][0] # array proxy + +Write (typed; bytes/int/float/str/bool/None/dict/list): + msg["hops"] = msg["hops"] + 1 # mutate, then forward msg.to_bytes() + +Notes: +- to_dict()/to_json() are explicit slow paths (full hydration). +- JSON has no bytes type: `bytes` round-trips only via the binary path + (typed write + from_bytes/typed read), not from_dict/to_dict. +- Mutating relocates buffer nodes — re-fetch views from the root after a write + rather than reusing a held proxy. +""" +from ._core import Lite3 + +__all__ = ["Lite3"] diff --git a/bindings/python/lite3/_core.pyx b/bindings/python/lite3/_core.pyx new file mode 100644 index 0000000..4974e79 --- /dev/null +++ b/bindings/python/lite3/_core.pyx @@ -0,0 +1,452 @@ +# cython: language_level=3 +"""Cython core for the Lite³ Python binding. + +Design: lazy zero-copy reads. `Lite3` owns the C context; `_ObjView`/`_ArrView` +read fields out of the serialized buffer on demand (proxies, not hydrated dicts). +`to_dict()`/`from_dict()` and `to_json()`/`from_json()` are the explicit slow +paths — they round-trip through the in-repo yyjson C codec, never reimplemented. +""" +import json as _json +from libc.stdint cimport int64_t, uint8_t, uint32_t +from libc.stddef cimport size_t + +cdef extern from "lite3_shim.h": + ctypedef struct lite3_ctx + ctypedef struct l3_iter + lite3_ctx *l3_create(size_t bufsz) + void l3_destroy(lite3_ctx *c) + int l3_init_obj(lite3_ctx *c) + int l3_init_arr(lite3_ctx *c) + int l3_import_from_buf(lite3_ctx *c, const uint8_t *buf, size_t buflen) + int l3_from_json(lite3_ctx *c, const char *json, size_t length) + char *l3_to_json(lite3_ctx *c, size_t ofs) + void l3_free(void *p) + const uint8_t *l3_buf(lite3_ctx *c) + size_t l3_buflen(lite3_ctx *c) + int l3_root_type(lite3_ctx *c) + long l3_count(lite3_ctx *c, size_t ofs) + int l3_get_type(lite3_ctx *c, size_t ofs, const char *key) + int l3_get_bool(lite3_ctx *c, size_t ofs, const char *key, int *out) + int l3_get_i64 (lite3_ctx *c, size_t ofs, const char *key, int64_t *out) + int l3_get_f64 (lite3_ctx *c, size_t ofs, const char *key, double *out) + int l3_get_str (lite3_ctx *c, size_t ofs, const char *key, const char **ptr, size_t *length) + int l3_get_bytes(lite3_ctx *c, size_t ofs, const char *key, const uint8_t **ptr, size_t *length) + int l3_get_obj (lite3_ctx *c, size_t ofs, const char *key, size_t *out_ofs) + int l3_get_arr (lite3_ctx *c, size_t ofs, const char *key, size_t *out_ofs) + int l3_arr_get_type(lite3_ctx *c, size_t ofs, uint32_t i) + int l3_arr_get_bool(lite3_ctx *c, size_t ofs, uint32_t i, int *out) + int l3_arr_get_i64 (lite3_ctx *c, size_t ofs, uint32_t i, int64_t *out) + int l3_arr_get_f64 (lite3_ctx *c, size_t ofs, uint32_t i, double *out) + int l3_arr_get_str (lite3_ctx *c, size_t ofs, uint32_t i, const char **ptr, size_t *length) + int l3_arr_get_bytes(lite3_ctx *c, size_t ofs, uint32_t i, const uint8_t **ptr, size_t *length) + int l3_arr_get_obj (lite3_ctx *c, size_t ofs, uint32_t i, size_t *out_ofs) + int l3_arr_get_arr (lite3_ctx *c, size_t ofs, uint32_t i, size_t *out_ofs) + int l3_set_null (lite3_ctx *c, size_t ofs, const char *key) + int l3_set_bool (lite3_ctx *c, size_t ofs, const char *key, int v) + int l3_set_i64 (lite3_ctx *c, size_t ofs, const char *key, int64_t v) + int l3_set_f64 (lite3_ctx *c, size_t ofs, const char *key, double v) + int l3_set_str (lite3_ctx *c, size_t ofs, const char *key, const char *s, size_t n) + int l3_set_bytes(lite3_ctx *c, size_t ofs, const char *key, const uint8_t *b, size_t n) + int l3_set_obj (lite3_ctx *c, size_t ofs, const char *key, size_t *out_ofs) + int l3_set_arr (lite3_ctx *c, size_t ofs, const char *key, size_t *out_ofs) + int l3_app_null (lite3_ctx *c, size_t ofs) + int l3_app_bool (lite3_ctx *c, size_t ofs, int v) + int l3_app_i64 (lite3_ctx *c, size_t ofs, int64_t v) + int l3_app_f64 (lite3_ctx *c, size_t ofs, double v) + int l3_app_str (lite3_ctx *c, size_t ofs, const char *s, size_t n) + int l3_app_bytes(lite3_ctx *c, size_t ofs, const uint8_t *b, size_t n) + int l3_app_obj (lite3_ctx *c, size_t ofs, size_t *out_ofs) + int l3_app_arr (lite3_ctx *c, size_t ofs, size_t *out_ofs) + int l3_arr_set_null (lite3_ctx *c, size_t ofs, uint32_t i) + int l3_arr_set_bool (lite3_ctx *c, size_t ofs, uint32_t i, int v) + int l3_arr_set_i64 (lite3_ctx *c, size_t ofs, uint32_t i, int64_t v) + int l3_arr_set_f64 (lite3_ctx *c, size_t ofs, uint32_t i, double v) + int l3_arr_set_str (lite3_ctx *c, size_t ofs, uint32_t i, const char *s, size_t n) + int l3_arr_set_bytes(lite3_ctx *c, size_t ofs, uint32_t i, const uint8_t *b, size_t n) + int l3_arr_set_obj (lite3_ctx *c, size_t ofs, uint32_t i, size_t *out_ofs) + int l3_arr_set_arr (lite3_ctx *c, size_t ofs, uint32_t i, size_t *out_ofs) + l3_iter *l3_iter_new(lite3_ctx *c, size_t ofs) + int l3_iter_next(l3_iter *it, const char **key_ptr, size_t *key_len) + void l3_iter_free(l3_iter *it) + +# lite3_type enum +cdef enum: + T_NULL = 0 + T_BOOL = 1 + T_I64 = 2 + T_F64 = 3 + T_BYTES = 4 + T_STRING = 5 + T_OBJECT = 6 + T_ARRAY = 7 + T_INVALID = 8 + + +cdef class Lite3: + """Owns a Lite³ context. Index it like the root object/array for lazy reads.""" + cdef lite3_ctx *c + + def __cinit__(self): + self.c = l3_create(1024) + if self.c is NULL: + raise MemoryError("lite3 context allocation failed") + l3_init_obj(self.c) # empty object by default; from_*/new_array overwrite + + def __dealloc__(self): + if self.c is not NULL: + l3_destroy(self.c) + + @classmethod + def from_json(cls, data): + cdef Lite3 self = cls() + b = data.encode() if isinstance(data, str) else bytes(data) + if l3_from_json(self.c, b, len(b)) < 0: + raise ValueError("invalid JSON for Lite3") + return self + + @classmethod + def from_dict(cls, d): + # Slow path: reuse the tested C JSON decoder rather than hand-building. + return cls.from_json(_json.dumps(d)) + + @classmethod + def from_bytes(cls, data): + """Ingest a received Lite³ buffer (copies it into a fresh context).""" + cdef Lite3 self = cls() + cdef const uint8_t[::1] mv = bytes(data) + if mv.shape[0] == 0: + raise ValueError("invalid Lite3 buffer") + if l3_import_from_buf(self.c, &mv[0], mv.shape[0]) < 0: + raise ValueError("invalid Lite3 buffer") + return self + + @classmethod + def new_array(cls): + """Empty root array, ready for .root().append(...).""" + cdef Lite3 self = cls() + l3_init_arr(self.c) + return self + + def to_bytes(self): + """Copy of the wire buffer. For zero-copy send, use memoryview(self).""" + return bytes(memoryview(self)) + + def to_json(self): + cdef char *s = l3_to_json(self.c, 0) + if s is NULL: + raise ValueError("JSON encode failed") + try: + return (s).decode() + finally: + l3_free(s) + + def to_dict(self): + return _json.loads(self.to_json()) + + cdef object _view(self): + cdef int t = l3_root_type(self.c) + if t == T_OBJECT: + return _ObjView(self, 0) + if t == T_ARRAY: + return _ArrView(self, 0) + raise ValueError("uninitialized Lite3 buffer") + + def root(self): + return self._view() + + def __getitem__(self, k): + return self._view()[k] + + def __setitem__(self, k, v): + self._view()[k] = v + + def __len__(self): + return len(self._view()) + + def __contains__(self, k): + return k in self._view() + + def __iter__(self): + return iter(self._view()) + + def keys(self): + return self._view().keys() + + # zero-copy buffer protocol: memoryview(lite3) is the wire bytes, no copy. + def __getbuffer__(self, Py_buffer *buffer, int flags): + cdef const uint8_t *p = l3_buf(self.c) + cdef size_t n = l3_buflen(self.c) + buffer.buf = p + buffer.obj = self + buffer.len = n + buffer.itemsize = 1 + buffer.readonly = 1 + buffer.ndim = 1 + buffer.format = NULL + buffer.shape = NULL + buffer.strides = NULL + buffer.suboffsets = NULL + buffer.internal = NULL + + +cdef object _scalar_by_key(Lite3 owner, size_t ofs, str key): + cdef bytes kb = key.encode() + cdef const char *k = kb + cdef int t = l3_get_type(owner.c, ofs, k) + cdef int64_t iv + cdef double dv + cdef int bv + cdef const char *sp + cdef const uint8_t *bp + cdef size_t sl + cdef size_t sub + if t == T_NULL: + return None + if t == T_BOOL: + l3_get_bool(owner.c, ofs, k, &bv); return bool(bv) + if t == T_I64: + l3_get_i64(owner.c, ofs, k, &iv); return iv + if t == T_F64: + l3_get_f64(owner.c, ofs, k, &dv); return dv + if t == T_STRING: + l3_get_str(owner.c, ofs, k, &sp, &sl); return sp[:sl].decode() + if t == T_BYTES: + l3_get_bytes(owner.c, ofs, k, &bp, &sl); return bytes(bp[:sl]) + if t == T_OBJECT: + l3_get_obj(owner.c, ofs, k, &sub); return _ObjView(owner, sub) + if t == T_ARRAY: + l3_get_arr(owner.c, ofs, k, &sub); return _ArrView(owner, sub) + raise KeyError(key) + + +cdef object _scalar_by_index(Lite3 owner, size_t ofs, uint32_t i): + cdef int t = l3_arr_get_type(owner.c, ofs, i) + cdef int64_t iv + cdef double dv + cdef int bv + cdef const char *sp + cdef const uint8_t *bp + cdef size_t sl + cdef size_t sub + if t == T_NULL: + return None + if t == T_BOOL: + l3_arr_get_bool(owner.c, ofs, i, &bv); return bool(bv) + if t == T_I64: + l3_arr_get_i64(owner.c, ofs, i, &iv); return iv + if t == T_F64: + l3_arr_get_f64(owner.c, ofs, i, &dv); return dv + if t == T_STRING: + l3_arr_get_str(owner.c, ofs, i, &sp, &sl); return sp[:sl].decode() + if t == T_BYTES: + l3_arr_get_bytes(owner.c, ofs, i, &bp, &sl); return bytes(bp[:sl]) + if t == T_OBJECT: + l3_arr_get_obj(owner.c, ofs, i, &sub); return _ObjView(owner, sub) + if t == T_ARRAY: + l3_arr_get_arr(owner.c, ofs, i, &sub); return _ArrView(owner, sub) + raise IndexError(i) + + +cdef int _set_value(Lite3 owner, size_t ofs, str key, object v) except -1: + cdef bytes kb = key.encode() + cdef const char *k = kb + cdef bytes sb + cdef size_t child + if v is None: + l3_set_null(owner.c, ofs, k) + elif v is True or v is False: # bool before int (bool is an int subclass) + l3_set_bool(owner.c, ofs, k, 1 if v else 0) + elif isinstance(v, int): + l3_set_i64(owner.c, ofs, k, v) + elif isinstance(v, float): + l3_set_f64(owner.c, ofs, k, v) + elif isinstance(v, str): + sb = v.encode() + l3_set_str(owner.c, ofs, k, sb, len(sb)) + elif isinstance(v, (bytes, bytearray)): + sb = bytes(v) + l3_set_bytes(owner.c, ofs, k, (sb), len(sb)) + elif isinstance(v, dict): + l3_set_obj(owner.c, ofs, k, &child) + for kk, vv in v.items(): + _set_value(owner, child, kk, vv) + elif isinstance(v, (list, tuple)): + l3_set_arr(owner.c, ofs, k, &child) + for item in v: + _append_value(owner, child, item) + else: + raise TypeError("unsupported value type: %r" % type(v)) + return 0 + + +cdef int _append_value(Lite3 owner, size_t ofs, object v) except -1: + cdef bytes sb + cdef size_t child + if v is None: + l3_app_null(owner.c, ofs) + elif v is True or v is False: + l3_app_bool(owner.c, ofs, 1 if v else 0) + elif isinstance(v, int): + l3_app_i64(owner.c, ofs, v) + elif isinstance(v, float): + l3_app_f64(owner.c, ofs, v) + elif isinstance(v, str): + sb = v.encode() + l3_app_str(owner.c, ofs, sb, len(sb)) + elif isinstance(v, (bytes, bytearray)): + sb = bytes(v) + l3_app_bytes(owner.c, ofs, (sb), len(sb)) + elif isinstance(v, dict): + l3_app_obj(owner.c, ofs, &child) + for kk, vv in v.items(): + _set_value(owner, child, kk, vv) + elif isinstance(v, (list, tuple)): + l3_app_arr(owner.c, ofs, &child) + for item in v: + _append_value(owner, child, item) + else: + raise TypeError("unsupported value type: %r" % type(v)) + return 0 + + +# ponytail: third copy of the _set_value/_append_value type ladder (differs only +# in the l3_* family called). A new type means editing three sites. Collapse to one +# dispatcher parameterized by the leaf-writer if a fourth target ever appears. +cdef int _set_index_value(Lite3 owner, size_t ofs, uint32_t i, object v) except -1: + cdef bytes sb + cdef size_t child + if v is None: + l3_arr_set_null(owner.c, ofs, i) + elif v is True or v is False: + l3_arr_set_bool(owner.c, ofs, i, 1 if v else 0) + elif isinstance(v, int): + l3_arr_set_i64(owner.c, ofs, i, v) + elif isinstance(v, float): + l3_arr_set_f64(owner.c, ofs, i, v) + elif isinstance(v, str): + sb = v.encode() + l3_arr_set_str(owner.c, ofs, i, sb, len(sb)) + elif isinstance(v, (bytes, bytearray)): + sb = bytes(v) + l3_arr_set_bytes(owner.c, ofs, i, (sb), len(sb)) + elif isinstance(v, dict): + l3_arr_set_obj(owner.c, ofs, i, &child) + for kk, vv in v.items(): + _set_value(owner, child, kk, vv) + elif isinstance(v, (list, tuple)): + l3_arr_set_arr(owner.c, ofs, i, &child) + for item in v: + _append_value(owner, child, item) + else: + raise TypeError("unsupported value type: %r" % type(v)) + return 0 + + +cdef class _ObjView: + """Lazy proxy over a Lite³ object at a buffer offset. Reads on demand. + + Writes (__setitem__) are supported, but mutating the buffer can relocate + nodes — re-fetch any view from the root after a write rather than reusing + a held proxy. + """ + cdef Lite3 owner + cdef size_t ofs + + def __cinit__(self, Lite3 owner, size_t ofs): + self.owner = owner + self.ofs = ofs + + def __getitem__(self, str key): + if l3_get_type(self.owner.c, self.ofs, key.encode()) >= T_INVALID: + raise KeyError(key) + return _scalar_by_key(self.owner, self.ofs, key) + + def __setitem__(self, str key, value): + _set_value(self.owner, self.ofs, key, value) + + def get(self, str key, default=None): + if l3_get_type(self.owner.c, self.ofs, key.encode()) >= T_INVALID: + return default + return _scalar_by_key(self.owner, self.ofs, key) + + def __contains__(self, str key): + return l3_get_type(self.owner.c, self.ofs, key.encode()) < T_INVALID + + def __len__(self): + cdef long n = l3_count(self.owner.c, self.ofs) + if n < 0: + raise ValueError("count failed") + return n + + def keys(self): + cdef l3_iter *it = l3_iter_new(self.owner.c, self.ofs) + cdef const char *kp + cdef size_t kl + cdef list out = [] + if it is NULL: + raise MemoryError("iterator alloc failed") + try: + # keys are NUL-terminated C strings; key.len is not the byte length + while l3_iter_next(it, &kp, &kl) == 1: + out.append((kp).decode()) + finally: + l3_iter_free(it) + return out + + def __iter__(self): + return iter(self.keys()) + + def items(self): + return [(k, self[k]) for k in self.keys()] + + def to_dict(self): + return {k: (v.to_dict() if isinstance(v, (_ObjView, _ArrView)) else v) + for k, v in self.items()} + + +cdef class _ArrView: + """Lazy proxy over a Lite³ array at a buffer offset.""" + cdef Lite3 owner + cdef size_t ofs + + def __cinit__(self, Lite3 owner, size_t ofs): + self.owner = owner + self.ofs = ofs + + def __len__(self): + cdef long n = l3_count(self.owner.c, self.ofs) + if n < 0: + raise ValueError("count failed") + return n + + def __getitem__(self, Py_ssize_t i): + cdef Py_ssize_t n = len(self) + if i < 0: + i += n + if i < 0 or i >= n: + raise IndexError(i) + return _scalar_by_index(self.owner, self.ofs, i) + + def __setitem__(self, Py_ssize_t i, value): + cdef Py_ssize_t n = len(self) + if i < 0: + i += n + if i < 0 or i >= n: + raise IndexError(i) + _set_index_value(self.owner, self.ofs, i, value) + + def __iter__(self): + for i in range(len(self)): + yield self[i] + + def append(self, value): + _append_value(self.owner, self.ofs, value) + + def extend(self, values): + for v in values: + _append_value(self.owner, self.ofs, v) + + def to_list(self): + return [(v.to_dict() if isinstance(v, (_ObjView, _ArrView)) else v) for v in self] + + to_dict = to_list diff --git a/bindings/python/pyproject.toml b/bindings/python/pyproject.toml new file mode 100644 index 0000000..0fb3147 --- /dev/null +++ b/bindings/python/pyproject.toml @@ -0,0 +1,15 @@ +[build-system] +requires = ["setuptools>=64", "Cython>=3"] +build-backend = "setuptools.build_meta" + +[project] +name = "lite3" +version = "0.0.1" +description = "Python binding for the Lite³ zero-copy serialization format" +requires-python = ">=3.11" + +[project.optional-dependencies] +test = ["pytest"] + +[tool.setuptools] +packages = ["lite3"] diff --git a/bindings/python/setup.py b/bindings/python/setup.py new file mode 100644 index 0000000..544871a --- /dev/null +++ b/bindings/python/setup.py @@ -0,0 +1,43 @@ +import shutil +from pathlib import Path +from setuptools import setup, Extension +from Cython.Build import cythonize + +HERE = Path(__file__).resolve().parent +ROOT = HERE.parent.parent # repo root +VENDOR = HERE / "_vendor" + +# The C core lives at the repo root (../../). That's outside this package, so it +# is invisible under build isolation and absent from an sdist. Vendor it in: copy +# when building from the repo, reuse the copy when building from an unpacked sdist. +# This is what makes `pip install .`, sdist, and cibuildwheel work. +# ponytail: wholesale dir copy, no per-file curation — robust to new C files. +def vendor(): + if (ROOT / "src" / "lite3.c").is_file(): # building from the repo + for d in ("include", "lib", "src"): + dst = VENDOR / d + shutil.rmtree(dst, ignore_errors=True) + shutil.copytree(ROOT / d, dst) + elif not VENDOR.is_dir(): # not the repo and no bundled copy (sdist carries one) + raise SystemExit("setup.py: C sources not found — build from the repo root or a bundled _vendor/") + +vendor() + +def vsrc(*parts): + return [str(p.relative_to(HERE)) for p in (VENDOR / Path(*parts[:-1])).glob(parts[-1])] + +sources = ( + ["lite3/_core.pyx", "src/lite3_shim.c"] + + vsrc("src", "*.c") + + vsrc("lib", "yyjson", "*.c") + + vsrc("lib", "nibble_base64", "*.c") +) + +ext = Extension( + "lite3._core", + sources=sources, + include_dirs=[str(VENDOR / "include"), str(VENDOR / "lib"), "src"], + extra_compile_args=["-O2", "-std=gnu11", "-DNDEBUG"], +) + +setup(ext_modules=cythonize([ext], language_level=3)) diff --git a/bindings/python/src/lite3_shim.c b/bindings/python/src/lite3_shim.c new file mode 100644 index 0000000..3d1ee5d --- /dev/null +++ b/bindings/python/src/lite3_shim.c @@ -0,0 +1,106 @@ +#include +#include "lite3_context_api.h" +#include "lite3_shim.h" + +lite3_ctx *l3_create(size_t bufsz) { return lite3_ctx_create_with_size(bufsz); } +void l3_destroy(lite3_ctx *c) { lite3_ctx_destroy(c); } +int l3_init_obj(lite3_ctx *c) { return lite3_ctx_init_obj(c); } +int l3_init_arr(lite3_ctx *c) { return lite3_ctx_init_arr(c); } + +int l3_import_from_buf(lite3_ctx *c, const uint8_t *buf, size_t buflen) { return lite3_ctx_import_from_buf(c, buf, buflen); } +int l3_from_json(lite3_ctx *c, const char *json, size_t len) { return lite3_ctx_json_dec(c, json, len); } +char *l3_to_json(lite3_ctx *c, size_t ofs) { size_t n; return lite3_ctx_json_enc(c, ofs, &n); } +void l3_free(void *p) { free(p); } + +const uint8_t *l3_buf(lite3_ctx *c) { return c->buf; } +size_t l3_buflen(lite3_ctx *c) { return c->buflen; } + +int l3_root_type(lite3_ctx *c) { return (int)lite3_ctx_get_root_type(c); } + +long l3_count(lite3_ctx *c, size_t ofs) { + uint32_t n; + if (lite3_ctx_count(c, ofs, &n) < 0) return -1; + return (long)n; +} + +/* by-key */ +int l3_get_type(lite3_ctx *c, size_t ofs, const char *key) { return (int)lite3_ctx_get_type(c, ofs, key); } +int l3_get_bool(lite3_ctx *c, size_t ofs, const char *key, int *out) { + bool b; int r = lite3_ctx_get_bool(c, ofs, key, &b); if (r == 0) *out = b; return r; +} +int l3_get_i64(lite3_ctx *c, size_t ofs, const char *key, int64_t *out) { return lite3_ctx_get_i64(c, ofs, key, out); } +int l3_get_f64(lite3_ctx *c, size_t ofs, const char *key, double *out) { return lite3_ctx_get_f64(c, ofs, key, out); } +int l3_get_str(lite3_ctx *c, size_t ofs, const char *key, const char **ptr, size_t *len) { + lite3_str s; int r = lite3_ctx_get_str(c, ofs, key, &s); if (r < 0) return r; + *ptr = LITE3_STR(c->buf, s); *len = s.len; return r; +} +int l3_get_bytes(lite3_ctx *c, size_t ofs, const char *key, const uint8_t **ptr, size_t *len) { + lite3_bytes b; int r = lite3_ctx_get_bytes(c, ofs, key, &b); if (r < 0) return r; + *ptr = LITE3_BYTES(c->buf, b); *len = b.len; return r; +} +int l3_get_obj(lite3_ctx *c, size_t ofs, const char *key, size_t *o) { return lite3_ctx_get_obj(c, ofs, key, o); } +int l3_get_arr(lite3_ctx *c, size_t ofs, const char *key, size_t *o) { return lite3_ctx_get_arr(c, ofs, key, o); } + +/* by-index */ +int l3_arr_get_type(lite3_ctx *c, size_t ofs, uint32_t i) { return (int)lite3_ctx_arr_get_type(c, ofs, i); } +int l3_arr_get_bool(lite3_ctx *c, size_t ofs, uint32_t i, int *out) { + bool b; int r = lite3_ctx_arr_get_bool(c, ofs, i, &b); if (r == 0) *out = b; return r; +} +int l3_arr_get_i64(lite3_ctx *c, size_t ofs, uint32_t i, int64_t *out) { return lite3_ctx_arr_get_i64(c, ofs, i, out); } +int l3_arr_get_f64(lite3_ctx *c, size_t ofs, uint32_t i, double *out) { return lite3_ctx_arr_get_f64(c, ofs, i, out); } +int l3_arr_get_str(lite3_ctx *c, size_t ofs, uint32_t i, const char **ptr, size_t *len) { + lite3_str s; int r = lite3_ctx_arr_get_str(c, ofs, i, &s); if (r < 0) return r; + *ptr = LITE3_STR(c->buf, s); *len = s.len; return r; +} +int l3_arr_get_bytes(lite3_ctx *c, size_t ofs, uint32_t i, const uint8_t **ptr, size_t *len) { + lite3_bytes b; int r = lite3_ctx_arr_get_bytes(c, ofs, i, &b); if (r < 0) return r; + *ptr = LITE3_BYTES(c->buf, b); *len = b.len; return r; +} +int l3_arr_get_obj(lite3_ctx *c, size_t ofs, uint32_t i, size_t *o) { return lite3_ctx_arr_get_obj(c, ofs, i, o); } +int l3_arr_get_arr(lite3_ctx *c, size_t ofs, uint32_t i, size_t *o) { return lite3_ctx_arr_get_arr(c, ofs, i, o); } + +/* --- writes --- */ +int l3_set_null (lite3_ctx *c, size_t ofs, const char *key) { return lite3_ctx_set_null(c, ofs, key); } +int l3_set_bool (lite3_ctx *c, size_t ofs, const char *key, int v) { return lite3_ctx_set_bool(c, ofs, key, v != 0); } +int l3_set_i64 (lite3_ctx *c, size_t ofs, const char *key, int64_t v) { return lite3_ctx_set_i64(c, ofs, key, v); } +int l3_set_f64 (lite3_ctx *c, size_t ofs, const char *key, double v) { return lite3_ctx_set_f64(c, ofs, key, v); } +int l3_set_str (lite3_ctx *c, size_t ofs, const char *key, const char *s, size_t n) { return lite3_ctx_set_str_n(c, ofs, key, s, n); } +int l3_set_bytes(lite3_ctx *c, size_t ofs, const char *key, const uint8_t *b, size_t n) { return lite3_ctx_set_bytes(c, ofs, key, b, n); } +int l3_set_obj (lite3_ctx *c, size_t ofs, const char *key, size_t *o) { return lite3_ctx_set_obj(c, ofs, key, o); } +int l3_set_arr (lite3_ctx *c, size_t ofs, const char *key, size_t *o) { return lite3_ctx_set_arr(c, ofs, key, o); } + +int l3_arr_set_null (lite3_ctx *c, size_t ofs, uint32_t i) { return lite3_ctx_arr_set_null(c, ofs, i); } +int l3_arr_set_bool (lite3_ctx *c, size_t ofs, uint32_t i, int v) { return lite3_ctx_arr_set_bool(c, ofs, i, v != 0); } +int l3_arr_set_i64 (lite3_ctx *c, size_t ofs, uint32_t i, int64_t v) { return lite3_ctx_arr_set_i64(c, ofs, i, v); } +int l3_arr_set_f64 (lite3_ctx *c, size_t ofs, uint32_t i, double v) { return lite3_ctx_arr_set_f64(c, ofs, i, v); } +int l3_arr_set_str (lite3_ctx *c, size_t ofs, uint32_t i, const char *s, size_t n) { return lite3_ctx_arr_set_str_n(c, ofs, i, s, n); } +int l3_arr_set_bytes(lite3_ctx *c, size_t ofs, uint32_t i, const uint8_t *b, size_t n) { return lite3_ctx_arr_set_bytes(c, ofs, i, b, n); } +int l3_arr_set_obj (lite3_ctx *c, size_t ofs, uint32_t i, size_t *o) { return lite3_ctx_arr_set_obj(c, ofs, i, o); } +int l3_arr_set_arr (lite3_ctx *c, size_t ofs, uint32_t i, size_t *o) { return lite3_ctx_arr_set_arr(c, ofs, i, o); } + +int l3_app_null (lite3_ctx *c, size_t ofs) { return lite3_ctx_arr_append_null(c, ofs); } +int l3_app_bool (lite3_ctx *c, size_t ofs, int v) { return lite3_ctx_arr_append_bool(c, ofs, v != 0); } +int l3_app_i64 (lite3_ctx *c, size_t ofs, int64_t v) { return lite3_ctx_arr_append_i64(c, ofs, v); } +int l3_app_f64 (lite3_ctx *c, size_t ofs, double v) { return lite3_ctx_arr_append_f64(c, ofs, v); } +int l3_app_str (lite3_ctx *c, size_t ofs, const char *s, size_t n) { return lite3_ctx_arr_append_str_n(c, ofs, s, n); } +int l3_app_bytes(lite3_ctx *c, size_t ofs, const uint8_t *b, size_t n) { return lite3_ctx_arr_append_bytes(c, ofs, b, n); } +int l3_app_obj (lite3_ctx *c, size_t ofs, size_t *o) { return lite3_ctx_arr_append_obj(c, ofs, o); } +int l3_app_arr (lite3_ctx *c, size_t ofs, size_t *o) { return lite3_ctx_arr_append_arr(c, ofs, o); } + +/* heap iterator: keeps ctx + lite3_iter together so Cython treats it opaquely */ +struct l3_iter { lite3_ctx *c; lite3_iter it; }; + +l3_iter *l3_iter_new(lite3_ctx *c, size_t ofs) { + l3_iter *h = malloc(sizeof *h); + if (!h) return NULL; + h->c = c; + if (lite3_ctx_iter_create(c, ofs, &h->it) < 0) { free(h); return NULL; } + return h; +} +int l3_iter_next(l3_iter *h, const char **key_ptr, size_t *key_len) { + lite3_str key; + int r = lite3_ctx_iter_next(h->c, &h->it, &key, NULL); + if (r == LITE3_ITER_ITEM) { *key_ptr = LITE3_STR(h->c->buf, key); *key_len = key.len; } + return r; +} +void l3_iter_free(l3_iter *h) { free(h); } diff --git a/bindings/python/src/lite3_shim.h b/bindings/python/src/lite3_shim.h new file mode 100644 index 0000000..3ac920f --- /dev/null +++ b/bindings/python/src/lite3_shim.h @@ -0,0 +1,90 @@ +/* Lite3 Python binding shim — re-exposes the macro/inline Context API as plain + * extern symbols so Cython/ctypes can reach it. The tricky key-hash + auto-grow + * logic stays in C (it lives in the macros), never reimplemented in Python. */ +#ifndef LITE3_SHIM_H +#define LITE3_SHIM_H +#include +#include + +typedef struct lite3_ctx lite3_ctx; /* opaque to Cython */ +typedef struct l3_iter l3_iter; /* opaque heap iterator */ + +/* lifecycle */ +lite3_ctx *l3_create(size_t bufsz); +void l3_destroy(lite3_ctx *c); +int l3_init_obj(lite3_ctx *c); +int l3_init_arr(lite3_ctx *c); + +/* ingest a received Lite3 buffer (copies into the context) */ +int l3_import_from_buf(lite3_ctx *c, const uint8_t *buf, size_t buflen); + +/* JSON round-trip (reuses in-repo yyjson) */ +int l3_from_json(lite3_ctx *c, const char *json, size_t len); +char *l3_to_json(lite3_ctx *c, size_t ofs); /* malloc'd NUL-string; l3_free it */ +void l3_free(void *p); + +/* zero-copy buffer access (buffer protocol) */ +const uint8_t *l3_buf(lite3_ctx *c); +size_t l3_buflen(lite3_ctx *c); + +/* structure introspection */ +int l3_root_type(lite3_ctx *c); /* lite3_type or INVALID */ +long l3_count(lite3_ctx *c, size_t ofs); /* entries/elems, -1 err */ + +/* by-key reads (objects) */ +int l3_get_type(lite3_ctx *c, size_t ofs, const char *key); +int l3_get_bool(lite3_ctx *c, size_t ofs, const char *key, int *out); +int l3_get_i64 (lite3_ctx *c, size_t ofs, const char *key, int64_t *out); +int l3_get_f64 (lite3_ctx *c, size_t ofs, const char *key, double *out); +int l3_get_str (lite3_ctx *c, size_t ofs, const char *key, const char **ptr, size_t *len); +int l3_get_bytes(lite3_ctx *c, size_t ofs, const char *key, const uint8_t **ptr, size_t *len); +int l3_get_obj (lite3_ctx *c, size_t ofs, const char *key, size_t *out_ofs); +int l3_get_arr (lite3_ctx *c, size_t ofs, const char *key, size_t *out_ofs); + +/* by-index reads (arrays) */ +int l3_arr_get_type(lite3_ctx *c, size_t ofs, uint32_t i); +int l3_arr_get_bool(lite3_ctx *c, size_t ofs, uint32_t i, int *out); +int l3_arr_get_i64 (lite3_ctx *c, size_t ofs, uint32_t i, int64_t *out); +int l3_arr_get_f64 (lite3_ctx *c, size_t ofs, uint32_t i, double *out); +int l3_arr_get_str (lite3_ctx *c, size_t ofs, uint32_t i, const char **ptr, size_t *len); +int l3_arr_get_bytes(lite3_ctx *c, size_t ofs, uint32_t i, const uint8_t **ptr, size_t *len); +int l3_arr_get_obj (lite3_ctx *c, size_t ofs, uint32_t i, size_t *out_ofs); +int l3_arr_get_arr (lite3_ctx *c, size_t ofs, uint32_t i, size_t *out_ofs); + +/* by-key writes (objects). str/bytes take explicit length. obj/arr return the + * new container's offset so children can be written into it. */ +int l3_set_null (lite3_ctx *c, size_t ofs, const char *key); +int l3_set_bool (lite3_ctx *c, size_t ofs, const char *key, int v); +int l3_set_i64 (lite3_ctx *c, size_t ofs, const char *key, int64_t v); +int l3_set_f64 (lite3_ctx *c, size_t ofs, const char *key, double v); +int l3_set_str (lite3_ctx *c, size_t ofs, const char *key, const char *s, size_t n); +int l3_set_bytes(lite3_ctx *c, size_t ofs, const char *key, const uint8_t *b, size_t n); +int l3_set_obj (lite3_ctx *c, size_t ofs, const char *key, size_t *out_ofs); +int l3_set_arr (lite3_ctx *c, size_t ofs, const char *key, size_t *out_ofs); + +/* array set-by-index (overwrite; index must be < length) */ +int l3_arr_set_null (lite3_ctx *c, size_t ofs, uint32_t i); +int l3_arr_set_bool (lite3_ctx *c, size_t ofs, uint32_t i, int v); +int l3_arr_set_i64 (lite3_ctx *c, size_t ofs, uint32_t i, int64_t v); +int l3_arr_set_f64 (lite3_ctx *c, size_t ofs, uint32_t i, double v); +int l3_arr_set_str (lite3_ctx *c, size_t ofs, uint32_t i, const char *s, size_t n); +int l3_arr_set_bytes(lite3_ctx *c, size_t ofs, uint32_t i, const uint8_t *b, size_t n); +int l3_arr_set_obj (lite3_ctx *c, size_t ofs, uint32_t i, size_t *out_ofs); +int l3_arr_set_arr (lite3_ctx *c, size_t ofs, uint32_t i, size_t *out_ofs); + +/* array append */ +int l3_app_null (lite3_ctx *c, size_t ofs); +int l3_app_bool (lite3_ctx *c, size_t ofs, int v); +int l3_app_i64 (lite3_ctx *c, size_t ofs, int64_t v); +int l3_app_f64 (lite3_ctx *c, size_t ofs, double v); +int l3_app_str (lite3_ctx *c, size_t ofs, const char *s, size_t n); +int l3_app_bytes(lite3_ctx *c, size_t ofs, const uint8_t *b, size_t n); +int l3_app_obj (lite3_ctx *c, size_t ofs, size_t *out_ofs); +int l3_app_arr (lite3_ctx *c, size_t ofs, size_t *out_ofs); + +/* object key enumeration (heap iterator) */ +l3_iter *l3_iter_new(lite3_ctx *c, size_t ofs); +int l3_iter_next(l3_iter *it, const char **key_ptr, size_t *key_len); /* 1 item, 0 done, <0 err */ +void l3_iter_free(l3_iter *it); + +#endif diff --git a/bindings/python/tests/test_roundtrip.py b/bindings/python/tests/test_roundtrip.py new file mode 100644 index 0000000..d3e9543 --- /dev/null +++ b/bindings/python/tests/test_roundtrip.py @@ -0,0 +1,86 @@ +"""Run: bindings/python/.venv/bin/python -m pytest tests/ (or just run this file).""" +import json +import random +import string + +from lite3 import Lite3 + +SAMPLE = { + "event": "http_request", + "method": "POST", + "duration_ms": 47, + "ok": True, + "ratio": 1.5, + "missing": None, + "headers": {"content-type": "application/json", "x-request-id": "req_9f8e2a"}, + "tags": ["a", "b", "c"], + "nested": {"deep": {"n": 1, "list": [1, 2, {"k": "v"}]}}, +} + + +def test_dict_roundtrip(): + m = Lite3.from_dict(SAMPLE) + assert m.to_dict() == SAMPLE + + +def test_lazy_reads(): + m = Lite3.from_dict(SAMPLE) + assert m["method"] == "POST" + assert m["duration_ms"] == 47 + assert m["ok"] is True + assert m["ratio"] == 1.5 + assert m["missing"] is None + assert m["headers"]["content-type"] == "application/json" # nested proxy + assert m["tags"][1] == "b" # array proxy + assert m["nested"]["deep"]["list"][2]["k"] == "v" # deep mixed + assert len(m["tags"]) == 3 + assert "headers" in m + assert set(m["headers"].keys()) == {"content-type", "x-request-id"} + + +def test_zero_copy_buffer(): + m = Lite3.from_dict(SAMPLE) + mv = memoryview(m) + assert mv.readonly and len(mv) > 0 + # round-trips through the wire bytes via JSON oracle + assert Lite3.from_json(m.to_json()).to_dict() == SAMPLE + + +def test_json_oracle_roundtrip(): + m = Lite3.from_dict(SAMPLE) + assert json.loads(m.to_json()) == SAMPLE + + +# --- fuzz: random nested structures through from_dict -> to_dict and to/from_json +def _rand(depth, rng): + if depth <= 0 or rng.random() < 0.3: + return rng.choice([ + rng.randint(-10**12, 10**12), + round(rng.uniform(-1e6, 1e6), 6), + "".join(rng.choices(string.ascii_letters + " _-", k=rng.randint(0, 12))), + rng.choice([True, False]), + None, + ]) + if rng.random() < 0.5: + return [_rand(depth - 1, rng) for _ in range(rng.randint(0, 5))] + return { + "k%d" % i: _rand(depth - 1, rng) + for i in range(rng.randint(0, 5)) + } + + +def test_fuzz_roundtrip(): + rng = random.Random(1337) + for _ in range(500): + top = {"f%d" % i: _rand(3, rng) for i in range(rng.randint(1, 6))} + m = Lite3.from_dict(top) + assert m.to_dict() == top, top + assert Lite3.from_json(m.to_json()).to_dict() == top, top + + +if __name__ == "__main__": + for name, fn in sorted(globals().items()): + if name.startswith("test_") and callable(fn): + fn() + print("ok", name) + print("ALL PASSED") diff --git a/bindings/python/tests/test_writes.py b/bindings/python/tests/test_writes.py new file mode 100644 index 0000000..dbb3cdf --- /dev/null +++ b/bindings/python/tests/test_writes.py @@ -0,0 +1,130 @@ +"""Write API + wire round-trip. Run with the read tests via: + bindings/python/.venv/bin/python tests/test_writes.py""" +import random +import string + +from lite3 import Lite3 + +# JSON-safe (from_dict/to_dict go through JSON, which has no bytes type) +SAMPLE = { + "event": "http_request", + "n": 47, + "ok": True, + "ratio": 1.5, + "missing": None, + "headers": {"content-type": "application/json", "id": "req_9f8e2a"}, + "tags": ["a", "b", "c"], + "nested": {"deep": {"n": 1, "list": [1, 2, {"k": "v"}]}}, +} + + +def test_from_bytes_roundtrip(): + # produce wire bytes, then ingest them as if received from a socket + sent = Lite3.from_dict(SAMPLE) + wire = sent.to_bytes() + got = Lite3.from_bytes(wire) + assert got.to_dict() == SAMPLE + # zero-copy view equals the copy + assert bytes(memoryview(got)) == wire + + +def test_typed_writes_scalar(): + m = Lite3() # empty object + m["event"] = "ping" + m["n"] = 7 + m["ok"] = False + m["ratio"] = 2.5 + m["missing"] = None + assert m.to_dict() == { + "event": "ping", "n": 7, "ok": False, "ratio": 2.5, "missing": None, + } + + +def test_typed_writes_nested(): + m = Lite3() + m["headers"] = {"a": 1, "b": {"c": [1, 2, 3]}} + m["list"] = [1, "two", {"three": 3}, [4, 5]] + assert m.to_dict() == { + "headers": {"a": 1, "b": {"c": [1, 2, 3]}}, + "list": [1, "two", {"three": 3}, [4, 5]], + } + + +def test_array_root_and_append(): + a = Lite3.new_array() + a.root().append(1) + a.root().append("x") + a.root().extend([{"k": "v"}, [9]]) + assert a.to_dict() == [1, "x", {"k": "v"}, [9]] + + +def test_array_index_overwrite(): + a = Lite3.new_array() + a.root().extend([1, "two", 3, [9]]) + r = a.root() + r[0] = 100 # scalar overwrite + r[1] = "TWO" # str (variable length) + r[-1] = {"k": "v"} # negative index, container overwrite + assert a.to_dict() == [100, "TWO", 3, {"k": "v"}] + try: + r[10] = 1 + except IndexError: + pass + else: + raise AssertionError("out-of-bounds index did not raise") + # nested array overwrite, then wire round-trip + m = Lite3.from_dict({"xs": [1, 2, 3]}) + m["xs"][1] = 22 + assert Lite3.from_bytes(m.to_bytes()).to_dict() == {"xs": [1, 22, 3]} + + +def test_mutate_and_forward(): + # receive -> overwrite a field -> forward. Re-fetch from root after write. + msg = Lite3.from_bytes(Lite3.from_dict({"hops": 0, "path": "a"}).to_bytes()) + msg["hops"] = msg["hops"] + 1 + assert msg["hops"] == 1 + assert Lite3.from_bytes(msg.to_bytes()).to_dict() == {"hops": 1, "path": "a"} + + +def test_bytes_via_binary_path(): + # bytes survive only through the binary path (not JSON). Write typed, read typed. + m = Lite3() + m["blob"] = b"\x00\x01\xff\xfe" + assert m["blob"] == b"\x00\x01\xff\xfe" + # and across a wire round-trip + assert Lite3.from_bytes(m.to_bytes())["blob"] == b"\x00\x01\xff\xfe" + + +# --- fuzz: build via typed writes (NOT from_dict), exercising the offset logic +def _rand(depth, rng): + if depth <= 0 or rng.random() < 0.3: + return rng.choice([ + rng.randint(-10**12, 10**12), + round(rng.uniform(-1e6, 1e6), 6), + "".join(rng.choices(string.ascii_letters, k=rng.randint(0, 10))), + rng.choice([True, False]), + None, + ]) + if rng.random() < 0.5: + return [_rand(depth - 1, rng) for _ in range(rng.randint(0, 5))] + return {"k%d" % i: _rand(depth - 1, rng) for i in range(rng.randint(0, 5))} + + +def test_fuzz_typed_writes(): + rng = random.Random(99) + for _ in range(500): + top = {"f%d" % i: _rand(3, rng) for i in range(rng.randint(1, 6))} + m = Lite3() + for k, v in top.items(): + m[k] = v + assert m.to_dict() == top, top + # and survives a wire round-trip + assert Lite3.from_bytes(m.to_bytes()).to_dict() == top, top + + +if __name__ == "__main__": + for name, fn in sorted(globals().items()): + if name.startswith("test_") and callable(fn): + fn() + print("ok", name) + print("ALL PASSED") From 8fee994670006e41db08686545cad8cc0a079d16 Mon Sep 17 00:00:00 2001 From: Ben Kaye Date: Tue, 23 Jun 2026 13:42:43 +0100 Subject: [PATCH 2/2] Harden Python bindings: write safety, view invalidation, type stubs - Check return codes on all l3_* writes (_chk); raise on buffer-full/overflow - Raise OverflowError on ints outside int64 range instead of silent C wrap - Block mutation while a memoryview is exported (BufferError) - Invalidate stale sub-views after a realloc via a write generation counter; views that did the write stay valid - Add __repr__/__eq__/__hash__ on Lite3 and cheap reprs on _ObjView/_ArrView - Ship PEP 561 stubs (__init__.pyi, py.typed) via pyproject package-data --- bindings/python/MANIFEST.in | 1 + bindings/python/lite3/__init__.pyi | 53 ++++++++++ bindings/python/lite3/_core.pyx | 140 ++++++++++++++++++++++----- bindings/python/lite3/py.typed | 0 bindings/python/pyproject.toml | 3 + bindings/python/setup.py | 2 +- bindings/python/tests/test_writes.py | 84 ++++++++++++++++ 7 files changed, 258 insertions(+), 25 deletions(-) create mode 100644 bindings/python/lite3/__init__.pyi create mode 100644 bindings/python/lite3/py.typed diff --git a/bindings/python/MANIFEST.in b/bindings/python/MANIFEST.in index d8077b3..a20a4ef 100644 --- a/bindings/python/MANIFEST.in +++ b/bindings/python/MANIFEST.in @@ -1,3 +1,4 @@ recursive-include _vendor *.c *.h LICENSE recursive-include src *.c *.h include lite3/_core.pyx +include lite3/py.typed lite3/*.pyi diff --git a/bindings/python/lite3/__init__.pyi b/bindings/python/lite3/__init__.pyi new file mode 100644 index 0000000..9982d67 --- /dev/null +++ b/bindings/python/lite3/__init__.pyi @@ -0,0 +1,53 @@ +from typing import Any, Iterator + +# Scalar leaf values that survive a binary round-trip. +_Scalar = None | bool | int | float | str | bytes +# What writes accept (nested containers allowed). +_Value = _Scalar | dict[str, Any] | list[Any] | tuple[Any, ...] + +class _ObjView: + def __getitem__(self, key: str) -> Any: ... + def __setitem__(self, key: str, value: _Value) -> None: ... + def __contains__(self, key: str) -> bool: ... + def __len__(self) -> int: ... + def __iter__(self) -> Iterator[str]: ... + def __repr__(self) -> str: ... + def get(self, key: str, default: Any = ...) -> Any: ... + def keys(self) -> list[str]: ... + def items(self) -> list[tuple[str, Any]]: ... + def to_dict(self) -> dict[str, Any]: ... + +class _ArrView: + def __getitem__(self, i: int) -> Any: ... + def __setitem__(self, i: int, value: _Value) -> None: ... + def __len__(self) -> int: ... + def __iter__(self) -> Iterator[Any]: ... + def __repr__(self) -> str: ... + def append(self, value: _Value) -> None: ... + def extend(self, values: list[Any] | tuple[Any, ...]) -> None: ... + def to_list(self) -> list[Any]: ... + def to_dict(self) -> list[Any]: ... + +class Lite3: + def __init__(self) -> None: ... + @classmethod + def from_json(cls, data: str | bytes) -> Lite3: ... + @classmethod + def from_dict(cls, d: dict[str, Any]) -> Lite3: ... + @classmethod + def from_bytes(cls, data: bytes | bytearray | memoryview) -> Lite3: ... + @classmethod + def new_array(cls) -> Lite3: ... + def to_bytes(self) -> bytes: ... + def to_json(self) -> str: ... + def to_dict(self) -> dict[str, Any] | list[Any]: ... + def root(self) -> _ObjView | _ArrView: ... + def keys(self) -> list[str]: ... + def __getitem__(self, k: str | int) -> Any: ... + def __setitem__(self, k: str | int, v: _Value) -> None: ... + def __contains__(self, k: str) -> bool: ... + def __len__(self) -> int: ... + def __iter__(self) -> Iterator[Any]: ... + def __repr__(self) -> str: ... + def __eq__(self, other: object) -> bool: ... + def __hash__(self) -> int: ... diff --git a/bindings/python/lite3/_core.pyx b/bindings/python/lite3/_core.pyx index 4974e79..420162b 100644 --- a/bindings/python/lite3/_core.pyx +++ b/bindings/python/lite3/_core.pyx @@ -81,10 +81,26 @@ cdef enum: T_ARRAY = 7 T_INVALID = 8 +# int64 range as Python ints; outside this a Python int would silently wrap on +# the C cast. Kept as Python objects so the comparison stays a PyObject compare +# (a cdef int64_t folds to an out-of-range C literal the compiler warns on). +cdef object _I64_MIN = -0x8000000000000000 +cdef object _I64_MAX = 0x7fffffffffffffff + + +cdef inline int _chk(int r) except -1: + # The C write API returns <0 on buffer-full / overflow / bad offset. + if r < 0: + raise ValueError("lite3 write failed (buffer full or invalid offset)") + return 0 + cdef class Lite3: """Owns a Lite³ context. Index it like the root object/array for lazy reads.""" cdef lite3_ctx *c + cdef int _exports # live memoryview count; writes are blocked while >0 + cdef int _gen # write generation; bumped after each mutation so held + # sub-views can detect that the buffer moved under them def __cinit__(self): self.c = l3_create(1024) @@ -92,6 +108,12 @@ cdef class Lite3: raise MemoryError("lite3 context allocation failed") l3_init_obj(self.c) # empty object by default; from_*/new_array overwrite + cdef int _check_writable(self) except -1: + # A write can realloc the buffer, dangling any exported memoryview. + if self._exports > 0: + raise BufferError("cannot mutate Lite3 while a memoryview is held") + return 0 + def __dealloc__(self): if self.c is not NULL: l3_destroy(self.c) @@ -187,6 +209,24 @@ cdef class Lite3: buffer.strides = NULL buffer.suboffsets = NULL buffer.internal = NULL + self._exports += 1 + + def __releasebuffer__(self, Py_buffer *buffer): + self._exports -= 1 + + def __repr__(self): + cdef int t = l3_root_type(self.c) + kind = "array" if t == T_ARRAY else "object" if t == T_OBJECT else "uninitialized" + return "" % (kind, l3_buflen(self.c)) + + def __eq__(self, other): + # Wire-buffer equality: same serialized bytes == equal. + if not isinstance(other, Lite3): + return NotImplemented + return bytes(memoryview(self)) == bytes(memoryview(other)) + + def __hash__(self): + return hash(bytes(memoryview(self))) cdef object _scalar_by_key(Lite3 owner, size_t ofs, str key): @@ -248,64 +288,72 @@ cdef object _scalar_by_index(Lite3 owner, size_t ofs, uint32_t i): cdef int _set_value(Lite3 owner, size_t ofs, str key, object v) except -1: + owner._check_writable() cdef bytes kb = key.encode() cdef const char *k = kb cdef bytes sb cdef size_t child if v is None: - l3_set_null(owner.c, ofs, k) + _chk(l3_set_null(owner.c, ofs, k)) elif v is True or v is False: # bool before int (bool is an int subclass) - l3_set_bool(owner.c, ofs, k, 1 if v else 0) + _chk(l3_set_bool(owner.c, ofs, k, 1 if v else 0)) elif isinstance(v, int): - l3_set_i64(owner.c, ofs, k, v) + if v < _I64_MIN or v > _I64_MAX: + raise OverflowError("int exceeds 64-bit range: %r" % v) + _chk(l3_set_i64(owner.c, ofs, k, v)) elif isinstance(v, float): - l3_set_f64(owner.c, ofs, k, v) + _chk(l3_set_f64(owner.c, ofs, k, v)) elif isinstance(v, str): sb = v.encode() - l3_set_str(owner.c, ofs, k, sb, len(sb)) + _chk(l3_set_str(owner.c, ofs, k, sb, len(sb))) elif isinstance(v, (bytes, bytearray)): sb = bytes(v) - l3_set_bytes(owner.c, ofs, k, (sb), len(sb)) + _chk(l3_set_bytes(owner.c, ofs, k, (sb), len(sb))) elif isinstance(v, dict): - l3_set_obj(owner.c, ofs, k, &child) + _chk(l3_set_obj(owner.c, ofs, k, &child)) for kk, vv in v.items(): _set_value(owner, child, kk, vv) elif isinstance(v, (list, tuple)): - l3_set_arr(owner.c, ofs, k, &child) + _chk(l3_set_arr(owner.c, ofs, k, &child)) for item in v: _append_value(owner, child, item) else: raise TypeError("unsupported value type: %r" % type(v)) + owner._gen += 1 # buffer may have moved; held sub-views are now stale return 0 cdef int _append_value(Lite3 owner, size_t ofs, object v) except -1: + owner._check_writable() cdef bytes sb cdef size_t child if v is None: - l3_app_null(owner.c, ofs) + _chk(l3_app_null(owner.c, ofs)) elif v is True or v is False: - l3_app_bool(owner.c, ofs, 1 if v else 0) + _chk(l3_app_bool(owner.c, ofs, 1 if v else 0)) elif isinstance(v, int): - l3_app_i64(owner.c, ofs, v) + if v < _I64_MIN or v > _I64_MAX: + raise OverflowError("int exceeds 64-bit range: %r" % v) + _chk(l3_app_i64(owner.c, ofs, v)) elif isinstance(v, float): - l3_app_f64(owner.c, ofs, v) + _chk(l3_app_f64(owner.c, ofs, v)) elif isinstance(v, str): sb = v.encode() - l3_app_str(owner.c, ofs, sb, len(sb)) + _chk(l3_app_str(owner.c, ofs, sb, len(sb))) elif isinstance(v, (bytes, bytearray)): sb = bytes(v) - l3_app_bytes(owner.c, ofs, (sb), len(sb)) + _chk(l3_app_bytes(owner.c, ofs, (sb), len(sb))) elif isinstance(v, dict): - l3_app_obj(owner.c, ofs, &child) + _chk(l3_app_obj(owner.c, ofs, &child)) for kk, vv in v.items(): _set_value(owner, child, kk, vv) elif isinstance(v, (list, tuple)): - l3_app_arr(owner.c, ofs, &child) + _chk(l3_app_arr(owner.c, ofs, &child)) for item in v: _append_value(owner, child, item) else: raise TypeError("unsupported value type: %r" % type(v)) + owner._gen += 1 # buffer may have moved; held sub-views are now stale return 0 @@ -313,32 +361,36 @@ cdef int _append_value(Lite3 owner, size_t ofs, object v) except -1: # in the l3_* family called). A new type means editing three sites. Collapse to one # dispatcher parameterized by the leaf-writer if a fourth target ever appears. cdef int _set_index_value(Lite3 owner, size_t ofs, uint32_t i, object v) except -1: + owner._check_writable() cdef bytes sb cdef size_t child if v is None: - l3_arr_set_null(owner.c, ofs, i) + _chk(l3_arr_set_null(owner.c, ofs, i)) elif v is True or v is False: - l3_arr_set_bool(owner.c, ofs, i, 1 if v else 0) + _chk(l3_arr_set_bool(owner.c, ofs, i, 1 if v else 0)) elif isinstance(v, int): - l3_arr_set_i64(owner.c, ofs, i, v) + if v < _I64_MIN or v > _I64_MAX: + raise OverflowError("int exceeds 64-bit range: %r" % v) + _chk(l3_arr_set_i64(owner.c, ofs, i, v)) elif isinstance(v, float): - l3_arr_set_f64(owner.c, ofs, i, v) + _chk(l3_arr_set_f64(owner.c, ofs, i, v)) elif isinstance(v, str): sb = v.encode() - l3_arr_set_str(owner.c, ofs, i, sb, len(sb)) + _chk(l3_arr_set_str(owner.c, ofs, i, sb, len(sb))) elif isinstance(v, (bytes, bytearray)): sb = bytes(v) - l3_arr_set_bytes(owner.c, ofs, i, (sb), len(sb)) + _chk(l3_arr_set_bytes(owner.c, ofs, i, (sb), len(sb))) elif isinstance(v, dict): - l3_arr_set_obj(owner.c, ofs, i, &child) + _chk(l3_arr_set_obj(owner.c, ofs, i, &child)) for kk, vv in v.items(): _set_value(owner, child, kk, vv) elif isinstance(v, (list, tuple)): - l3_arr_set_arr(owner.c, ofs, i, &child) + _chk(l3_arr_set_arr(owner.c, ofs, i, &child)) for item in v: _append_value(owner, child, item) else: raise TypeError("unsupported value type: %r" % type(v)) + owner._gen += 1 # buffer may have moved; held sub-views are now stale return 0 @@ -351,34 +403,49 @@ cdef class _ObjView: """ cdef Lite3 owner cdef size_t ofs + cdef int _gen def __cinit__(self, Lite3 owner, size_t ofs): self.owner = owner self.ofs = ofs + self._gen = owner._gen + + cdef int _stale_check(self) except -1: + # A write through some other handle may have relocated this node. + if self._gen != self.owner._gen: + raise RuntimeError("stale Lite3 view: re-fetch from root after a write") + return 0 def __getitem__(self, str key): + self._stale_check() if l3_get_type(self.owner.c, self.ofs, key.encode()) >= T_INVALID: raise KeyError(key) return _scalar_by_key(self.owner, self.ofs, key) def __setitem__(self, str key, value): + self._stale_check() _set_value(self.owner, self.ofs, key, value) + self._gen = self.owner._gen # this view did the write; it stays valid def get(self, str key, default=None): + self._stale_check() if l3_get_type(self.owner.c, self.ofs, key.encode()) >= T_INVALID: return default return _scalar_by_key(self.owner, self.ofs, key) def __contains__(self, str key): + self._stale_check() return l3_get_type(self.owner.c, self.ofs, key.encode()) < T_INVALID def __len__(self): + self._stale_check() cdef long n = l3_count(self.owner.c, self.ofs) if n < 0: raise ValueError("count failed") return n def keys(self): + self._stale_check() cdef l3_iter *it = l3_iter_new(self.owner.c, self.ofs) cdef const char *kp cdef size_t kl @@ -403,17 +470,33 @@ cdef class _ObjView: return {k: (v.to_dict() if isinstance(v, (_ObjView, _ArrView)) else v) for k, v in self.items()} + def __repr__(self): + # Keys only, capped — never hydrate values (could be the whole document). + ks = self.keys() + shown = ", ".join(repr(k) for k in ks[:8]) + if len(ks) > 8: + shown += ", ... +%d" % (len(ks) - 8) + return "<_ObjView keys={%s}>" % shown + cdef class _ArrView: """Lazy proxy over a Lite³ array at a buffer offset.""" cdef Lite3 owner cdef size_t ofs + cdef int _gen def __cinit__(self, Lite3 owner, size_t ofs): self.owner = owner self.ofs = ofs + self._gen = owner._gen + + cdef int _stale_check(self) except -1: + if self._gen != self.owner._gen: + raise RuntimeError("stale Lite3 view: re-fetch from root after a write") + return 0 def __len__(self): + self._stale_check() cdef long n = l3_count(self.owner.c, self.ofs) if n < 0: raise ValueError("count failed") @@ -434,19 +517,28 @@ cdef class _ArrView: if i < 0 or i >= n: raise IndexError(i) _set_index_value(self.owner, self.ofs, i, value) + self._gen = self.owner._gen # this view did the write; it stays valid def __iter__(self): for i in range(len(self)): yield self[i] def append(self, value): + self._stale_check() _append_value(self.owner, self.ofs, value) + self._gen = self.owner._gen def extend(self, values): + self._stale_check() for v in values: _append_value(self.owner, self.ofs, v) + self._gen = self.owner._gen def to_list(self): return [(v.to_dict() if isinstance(v, (_ObjView, _ArrView)) else v) for v in self] to_dict = to_list + + def __repr__(self): + # Length only — never hydrate elements. + return "<_ArrView len=%d>" % len(self) diff --git a/bindings/python/lite3/py.typed b/bindings/python/lite3/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/bindings/python/pyproject.toml b/bindings/python/pyproject.toml index 0fb3147..9ad0318 100644 --- a/bindings/python/pyproject.toml +++ b/bindings/python/pyproject.toml @@ -13,3 +13,6 @@ test = ["pytest"] [tool.setuptools] packages = ["lite3"] + +[tool.setuptools.package-data] +lite3 = ["py.typed", "*.pyi"] # PEP 561 stubs ship with the wheel diff --git a/bindings/python/setup.py b/bindings/python/setup.py index 544871a..0ed31bf 100644 --- a/bindings/python/setup.py +++ b/bindings/python/setup.py @@ -40,4 +40,4 @@ def vsrc(*parts): extra_compile_args=["-O2", "-std=gnu11", "-DNDEBUG"], ) -setup(ext_modules=cythonize([ext], language_level=3)) +setup(ext_modules=cythonize([ext], language_level=3)) # packages/package-data in pyproject.toml diff --git a/bindings/python/tests/test_writes.py b/bindings/python/tests/test_writes.py index dbb3cdf..33ce03b 100644 --- a/bindings/python/tests/test_writes.py +++ b/bindings/python/tests/test_writes.py @@ -122,6 +122,90 @@ def test_fuzz_typed_writes(): assert Lite3.from_bytes(m.to_bytes()).to_dict() == top, top +def test_int_overflow_raises(): + m = Lite3() + m["ok"] = 2**63 - 1 # max int64, fine + for bad in (2**63, -2**63 - 1, 2**70): + try: + m["x"] = bad + except OverflowError: + pass + else: + raise AssertionError("expected OverflowError for %d" % bad) + + +def test_write_blocked_while_memoryview_held(): + m = Lite3() + m["a"] = 1 + mv = memoryview(m) + try: + m["b"] = 2 # would realloc under a live export -> must refuse + except BufferError: + pass + else: + raise AssertionError("expected BufferError while memoryview held") + finally: + mv.release() + m["b"] = 2 # ok once released + assert m["b"] == 2 + + +def test_stale_view_raises(): + # A held sub-view must raise (not read garbage) once a write goes through + # another handle and relocates buffer nodes. + m = Lite3.from_dict({"xs": [1, 2, 3], "tag": "a"}) + xs = m["xs"] + assert xs[0] == 1 # fine before any write + m["tag"] = "this is a much longer value that forces a relocation" + try: + xs[0] # xs is now stale + except RuntimeError: + pass + else: + raise AssertionError("stale view did not raise") + assert m["xs"][0] == 1 # re-fetch from root works + + # Writing *through* a held root view keeps that same view valid. + a = Lite3.new_array() + r = a.root() + r.extend([1, "two", 3]) + r[1] = "a longer string" # relocation, but r did the write + r.append(4) + assert r[0] == 1 and r[3] == 4 # r still usable + assert a.to_dict() == [1, "a longer string", 3, 4] + + +def test_stale_view_mid_iteration(): + # Start consuming an array, let an external write relocate the buffer, + # then resume: the iterator must raise, not yield stale/garbage elements. + m = Lite3.from_dict({"xs": [10, 20, 30, 40], "tag": "a"}) + it = iter(m["xs"]) + assert next(it) == 10 # consumed one element + m["tag"] = "a much longer value that forces the buffer to relocate" + try: + next(it) # resume after external mutation + except RuntimeError: + pass + else: + raise AssertionError("iterator did not detect external mutation") + + +def test_eq_and_hash(): + a = Lite3.from_dict({"x": 1, "y": [1, 2]}) + b = Lite3.from_dict({"x": 1, "y": [1, 2]}) + c = Lite3.from_dict({"x": 2}) + assert a == b and hash(a) == hash(b) + assert a != c + assert a != "not a lite3" + + +def test_repr_is_cheap_and_does_not_hydrate(): + m = Lite3.from_dict({"a": 1, "b": 2, "tags": [1, 2, 3]}) + assert "Lite3" in repr(m) + assert "_ObjView" in repr(m.root()) + assert repr(m["tags"]) == "<_ArrView len=3>" + + if __name__ == "__main__": for name, fn in sorted(globals().items()): if name.startswith("test_") and callable(fn):