From 031d2adc61b519051dd6ad32588b447d373af207 Mon Sep 17 00:00:00 2001 From: samuelburnham <45365069+samuelburnham@users.noreply.github.com> Date: Tue, 24 Mar 2026 15:23:30 -0400 Subject: [PATCH 1/5] chore: Update Readme --- README.md | 151 ++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 107 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index 42e4a4a..fb72f23 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ Rust bindings to the `lean.h` Lean C FFI, generated with [`rust-bindgen`](https://github.com/rust-lang/rust-bindgen). Bindgen runs in `build.rs` and generates unsafe Rust functions that link to Lean's `lean.h` C library. This external module can then be found at -`target/release/lean-ffi-/out/lean.rs`. +`target/release/build/lean-ffi-/out/lean.rs`. These bindings are then wrapped in a typed Rust API that models Lean's ownership conventions (`lean_obj_arg` vs `b_lean_obj_arg`) using Rust's @@ -13,13 +13,23 @@ type system. The core types are: -- **`LeanOwned`** — An owned reference to a Lean object. `Drop` calls `lean_dec`, - `Clone` calls `lean_inc`. Not `Copy`. Corresponds to `lean_obj_arg` (input) and - `lean_obj_res` (output) in the C FFI. - -- **`LeanBorrowed<'a>`** — A borrowed reference. `Copy`, no `Drop`, lifetime-bounded. - Corresponds to `b_lean_obj_arg` in the C FFI. Used when Lean declares a parameter - with `@&`. +- **`LeanOwned`** — An owned reference to a Lean object. Corresponds to + `lean_obj_arg` (input) and `lean_obj_res` (output) in the C FFI. + - `Drop` calls `lean_dec_ref`, which decrements `m_rc`. This happens automatically + when the value goes out of scope. When `m_rc` reaches zero, Lean frees the object + and recursively decrements its children. + - `Clone` calls `lean_inc_ref`, which increments `m_rc`, creating a second owned + handle to the same underlying object. + - Both skip tagged scalars (bit 0 set — small `Nat`, `Bool`, etc.) and are no-ops + for persistent objects (`m_rc == 0`). + - Not `Copy` — ownership is linear. Each handle must be explicitly cloned or dropped. + +- **`LeanBorrowed<'a>`** — A borrowed reference. Corresponds to `b_lean_obj_arg` in + the C FFI. Used when Lean declares a parameter with `@&`. + - Implements `Copy` (trivial bitwise copy) with no `Drop` — no refcount changes at all. + - The lifetime `'a` ties it to the source reference's scope, preventing use-after-free. + - This makes it safe to copy freely without cost — each copy is just a pointer with no + side effects. - **`LeanShared`** — A thread-safe owned reference. Wraps `LeanOwned` after calling `lean_mark_mt` on the object graph, which transitions all reachable objects to @@ -84,12 +94,90 @@ impl LeanPutResponse { } ``` +### Inductive types and field layout + +Extra care must be taken when dealing with [inductive +types](https://lean-lang.org/doc/reference/latest/The-Type-System/Inductive-Types/#run-time-inductives) +as the runtime memory layout of constructor fields may not match the +declaration order in Lean. Fields are reordered into three groups: + +1. Non-scalar fields (`lean_object*`), in declaration order +2. `USize` fields, in declaration order +3. Other scalar fields, in decreasing order by size, then declaration order within each size + +This means a structure like + +```lean +structure MyStruct where + u8val : UInt8 + obj : Nat + u32val : UInt32 + u64val : UInt64 +``` + +would be laid out as `[obj, u64val, u32val, u8val]` at runtime. Trivial wrapper +types (e.g. `Char` wraps `UInt32`) count as their underlying scalar type. + +A constructor's memory looks like: + +``` +[header (8B)] [object fields (8B each)] [USize fields (8B each)] [scalar data area] +``` + +Object fields and USize fields each occupy 8-byte slots. The scalar data area is a +flat region of bytes containing all remaining scalar field values, packed by +descending size. For `MyStruct` (1 object field, 0 USize fields, 13 scalar bytes): + +- `u64val` occupies bytes 0–7 of the scalar area +- `u32val` occupies bytes 8–11 +- `u8val` occupies byte 12 + +Use `LeanCtor` to access fields at the correct positions. Scalar getters and +setters take `(num_slots, byte_offset)` — `num_slots` is the total number of +8-byte slots (object fields + USize fields) preceding the scalar data area, and +`byte_offset` is the position of the field within that area. + +Readers are generic over `R: LeanRef`, and constructors return `LeanOwned`: + +```rust +impl LeanScalarStruct { + pub fn obj(&self) -> LeanBorrowed<'_> { self.as_ctor().get(0) } + pub fn u64val(&self) -> u64 { self.as_ctor().get_u64(1, 0) } + pub fn u32val(&self) -> u32 { self.as_ctor().get_u32(1, 8) } + pub fn u8val(&self) -> u8 { self.as_ctor().get_u8(1, 12) } +} + +impl LeanScalarStruct { + pub fn mk(obj: LeanNat, u64val: u64, u32val: u32, u8val: u8) -> Self { + let ctor = LeanCtor::alloc(0, 1, 13); // tag 0, 1 obj field, 13 scalar bytes + ctor.set(0, obj); // object field 0 + ctor.set_u64(1, 0, u64val); // 1 slot before scalars, byte 0 + ctor.set_u32(1, 8, u32val); // 1 slot before scalars, byte 8 + ctor.set_u8(1, 12, u8val); // 1 slot before scalars, byte 12 + Self::new(ctor.into()) + } +} +``` + ### External objects (`LeanExternal`) External objects let you store arbitrary Rust data inside a Lean object. Lean sees an opaque type; Rust controls allocation, access, mutation, and cleanup. -**Register** an external class exactly once, using `OnceLock` or `LazyLock`: +**Register** an external class exactly once, using `OnceLock` or `LazyLock`. + +`ExternalClass::register` calls `lean_register_external_class`, which allocates a +class descriptor with two function pointers: a **finalizer** called when the object's +refcount reaches zero to free the Rust data, and a **foreach** callback for Lean +to traverse any embedded `lean_object*` pointers (usually a no-op for pure Rust data). + +`register_with_drop::()` generates a finalizer that calls +`drop(Box::from_raw(ptr.cast::()))` and a no-op foreach — sufficient for any +Rust type that doesn't hold Lean objects. + +Registration must happen exactly once per type. `LazyLock` (or `OnceLock`) ensures +thread-safe one-time initialization, storing the returned `ExternalClass` in a +`static` for reuse across all allocations: ```rust use std::sync::LazyLock; @@ -97,8 +185,6 @@ use lean_ffi::object::{ExternalClass, LeanExternal, LeanOwned, LeanBorrowed}; struct Hasher { state: Vec } -// register_with_drop generates a finalizer that calls drop(Box::from_raw(ptr)) -// and a no-op foreach (no Lean objects inside T to traverse). static HASHER_CLASS: LazyLock = LazyLock::new(ExternalClass::register_with_drop::); ``` @@ -177,39 +263,9 @@ extern "C" fn process( } ``` -## Inductive Types and Field Layout - -Extra care must be taken when dealing with [inductive -types](https://lean-lang.org/doc/reference/latest/The-Type-System/Inductive-Types/#run-time-inductives) -as the runtime memory layout of constructor fields may not match the -declaration order in Lean. Fields are reordered into three groups: - -1. Non-scalar fields (`lean_object*`), in declaration order -2. `USize` fields, in declaration order -3. Other scalar fields, in decreasing order by size, then declaration order within each size - -This means a structure like - -```lean -structure Reorder where - flag : Bool - obj : Array Nat - size : UInt64 -``` - -would be laid out as `[obj, size, flag]` at runtime — the `UInt64` is placed -before the `Bool`. Trivial wrapper types (e.g. `Char` wraps `UInt32`) count as -their underlying scalar type. - -Use `LeanCtor` methods to access fields at the correct offsets: - -```rust -// 1 object field, scalars: u64 at offset 0, u8 (Bool) at offset 8 -let ctor = unsafe { LeanBorrowed::from_raw(ptr.as_raw()) }.as_ctor(); -let obj = ctor.get(0); // object field by index -let size = ctor.get_u64(1, 0); // u64 at scalar offset 0 (past 1 non-scalar field) -let flag = ctor.get_bool(1, 8); // bool at scalar offset 8 -``` +More examples can be found in `src/test_ffi.rs` (Rust FFI implementations) and +`Tests/FFI.lean` (Lean declarations and tests), covering all domain types, scalar +field layouts, external objects, in-place mutation, and ownership patterns. ## In-Place Mutation @@ -285,6 +341,13 @@ bytes including the NUL terminator. `LeanString` wraps these correctly: - `length()` — UTF-8 character count (`m_length`) - `as_str()` — view as `&str` +## References + +- [Lean FFI documentation](https://lean-lang.org/doc/reference/latest/Run-Time-Code/#runtime) +- [`lean.h` C library](https://github.com/leanprover/lean4/blob/master/src/include/lean/lean.h) +- [Counting Immutable Beans paper](https://arxiv.org/pdf/1908.05647) +- [Rust FFI guide](https://doc.rust-lang.org/nomicon/ffi.html) + ## License MIT or Apache 2.0 From 223ad0d0b99fbc63a659b91e499e0d2e82d6b055 Mon Sep 17 00:00:00 2001 From: samuelburnham <45365069+samuelburnham@users.noreply.github.com> Date: Thu, 26 Mar 2026 14:37:02 -0400 Subject: [PATCH 2/5] Clarify and format --- README.md | 248 ++++++++++++++++++++++++++++-------------------- src/nat.rs | 2 +- src/object.rs | 71 +++++++++----- src/test_ffi.rs | 12 +-- 4 files changed, 198 insertions(+), 135 deletions(-) diff --git a/README.md b/README.md index fb72f23..9b529d6 100644 --- a/README.md +++ b/README.md @@ -1,60 +1,100 @@ # lean-ffi -Rust bindings to the `lean.h` Lean C FFI, generated with [`rust-bindgen`](https://github.com/rust-lang/rust-bindgen). -Bindgen runs in `build.rs` and generates unsafe Rust functions that link to -Lean's `lean.h` C library. This external module can then be found at -`target/release/build/lean-ffi-/out/lean.rs`. +Rust bindings to the `lean.h` Lean C FFI, generated with +[`rust-bindgen`](https://github.com/rust-lang/rust-bindgen). Bindgen runs in +`build.rs` and generates unsafe Rust functions that link to Lean's `lean.h` C +library. This module can be found at +`target/release/build/lean-ffi-/out/lean.rs` after running +`cargo build --release`. -These bindings are then wrapped in a typed Rust API that models Lean's -ownership conventions (`lean_obj_arg` vs `b_lean_obj_arg`) using Rust's -type system. +These bindings are then wrapped in a typed Rust API that models Lean's reference +counting system for owned (`lean_obj_arg`) and borrowed (`b_lean_obj_arg`) +references. ## Ownership Model -The core types are: - -- **`LeanOwned`** — An owned reference to a Lean object. Corresponds to - `lean_obj_arg` (input) and `lean_obj_res` (output) in the C FFI. - - `Drop` calls `lean_dec_ref`, which decrements `m_rc`. This happens automatically - when the value goes out of scope. When `m_rc` reaches zero, Lean frees the object - and recursively decrements its children. - - `Clone` calls `lean_inc_ref`, which increments `m_rc`, creating a second owned - handle to the same underlying object. - - Both skip tagged scalars (bit 0 set — small `Nat`, `Bool`, etc.) and are no-ops - for persistent objects (`m_rc == 0`). - - Not `Copy` — ownership is linear. Each handle must be explicitly cloned or dropped. - -- **`LeanBorrowed<'a>`** — A borrowed reference. Corresponds to `b_lean_obj_arg` in - the C FFI. Used when Lean declares a parameter with `@&`. - - Implements `Copy` (trivial bitwise copy) with no `Drop` — no refcount changes at all. - - The lifetime `'a` ties it to the source reference's scope, preventing use-after-free. - - This makes it safe to copy freely without cost — each copy is just a pointer with no - side effects. - -- **`LeanShared`** — A thread-safe owned reference. Wraps `LeanOwned` after calling - `lean_mark_mt` on the object graph, which transitions all reachable objects to - multi-threaded mode with atomic refcounting. `Send + Sync`. Use `borrow()` to get - a `LeanBorrowed<'_>` for reading, `into_owned()` to unwrap back to `LeanOwned`. - -- **`LeanRef`** — Trait implemented by `LeanOwned`, `LeanBorrowed`, and `LeanShared`, - providing shared read-only operations like `as_raw()`, `is_scalar()`, `tag()`, and - unboxing methods. - -All reference types are safe for persistent objects (`m_rc == 0`) — `lean_inc_ref` and -`lean_dec_ref` are no-ops when `m_rc == 0`. +### Background: Lean C API conventions + +In Lean's C API, a **reference** is a `lean_object*` pointer to the header of a +heap-allocated object. Note that a `lean_object*` can also refer to a tagged +scalar value encoded as a pointer-sized data type, where the low bit (tag) of +the pointer is set to 1. In that case it would not be called a reference. + +References in Lean can either be **owned** or **borrowed**. + +An **owned reference**, signified by `lean_obj_arg` in C, uses reference +counting via the `int m_rc` field of the `lean_object` to determine when to free +the underlying object. Before a new reference to the object is created, the Lean +compiler inserts a `lean_inc` call to increment the ref count. When the +reference goes out of scope, the Lean compiler inserts a `lean_dec` call to +decrement the ref count. When `m_rc` reaches 0, the Lean runtime frees the +object. + +A **borrowed reference**, signified by `@&` in Lean and `b_lean_obj_arg` in C, +inherits the reference count of a surrounding owned reference, and is assumed to +be kept alive as long as its parent. This enables the borrowed reference to +dispense with reference counting altogether as it will get dropped when going +out of scope. + +### Rust API + +In order to handle Lean reference counting gracefully in Rust, we use the +following types: + +- **`LeanOwned`** - An owned reference to a Lean object with RAII semantics. + Corresponds to `lean_obj_arg` (input) and `lean_obj_res` (output) in the C + FFI. + - The `Clone` implementation calls `lean_inc` and returns a new `LeanOwned` + reference to the same object. `Copy` is not implemented. + - The `Drop` implementation calls `lean_dec` automatically on scope exit. + - Passing or assigning a `LeanOwned` **moves** it (transferring the + `lean_dec`); use `.clone()` to create a second owned reference via + `lean_inc`. + - [`into_raw`] consumes the wrapper **without** calling `lean_dec`, for + passing ownership to Lean C API functions that take `lean_obj_arg` (which + will `lean_dec` internally). Not needed for returning values from + `extern "C"` functions — returning `LeanOwned` directly works because Rust + does not call `Drop` on return values. + - Tagged scalar values (bit 0 set — small `Nat`, `Bool`, etc.) and persistent + objects (`m_rc == 0`) skip refcount operations entirely. + +- **`LeanBorrowed<'a>`** — A borrowed reference. Corresponds to `b_lean_obj_arg` + in the C FFI. Used when Lean declares a parameter with `@&`. + - The `Copy` and `Clone` implementations perform a trivial bitwise copy. Neither + `Clone` nor `Drop` modify the reference count. + - The lifetime `'a` ties the borrowed reference to the source reference's + scope, preventing use-after-free. + - Call `.to_owned_ref()` to promote to `LeanOwned` (calls `lean_inc`). + - Note: The `b_lean_obj_res` type is used when returning a borrowed reference + in C, but returning it and `LeanBorrowed` are only used internally as Lean + expects owned references at the FFI boundary. + +- **`LeanShared`** — A thread-safe owned reference. Wraps `LeanOwned` after + calling `lean_mark_mt` on the object graph, which transitions all reachable + objects to multi-threaded mode with atomic refcounting. Implements + `Send + Sync`. Use `borrow()` to get a `LeanBorrowed<'_>` for reading, + `into_owned()` to unwrap back to `LeanOwned`. + +- **`LeanRef`** — Trait implemented by `LeanOwned`, `LeanBorrowed`, and + `LeanShared`, providing shared read-only operations like `as_raw()`, + `is_scalar()`, `tag()`, and unboxing methods. + +All reference types are safe for persistent objects and compact memory regions +(`m_rc == 0`) — `lean_inc_ref` and `lean_dec_ref` are no-ops when `m_rc == 0`. ## Domain Types Domain types wrap the ownership types to provide type safety at FFI boundaries. Built-in domain types include `LeanArray`, `LeanString`, `LeanCtor`, -`LeanList`, `LeanOption`, `LeanExcept`, `LeanIOResult`, `LeanProd`, -`LeanNat`, `LeanBool`, `LeanByteArray`, and `LeanExternal`. +`LeanList`, `LeanOption`, `LeanExcept`, `LeanIOResult`, +`LeanProd`, `LeanNat`, `LeanBool`, `LeanByteArray`, and +`LeanExternal`. ### Naming convention -Domain types are prefixed with `Lean` to distinguish them from Lean-side type names -and to match the built-in types. For example, a Lean `Point` structure becomes -`LeanPoint` in Rust. +Domain types are prefixed with `Lean` to distinguish them from Lean-side type +names and to match the built-in types. For example, a Lean `Point` structure +becomes `LeanPoint` in Rust. ### Defining custom domain types @@ -69,10 +109,10 @@ lean_ffi::lean_domain_type! { } ``` -This generates a `#[repr(transparent)]` wrapper with `Clone`, conditional `Copy`, -`inner()`, `as_raw()`, `into_raw()`, and `From` impls. You can then add -accessor methods — readers are generic over `R: LeanRef` (work on both owned -and borrowed), constructors return `LeanOwned`: +This generates a `#[repr(transparent)]` wrapper with `Clone`, `Copy` for +`LeanBorrowed`, `inner()`, `as_raw()`, `into_raw()`, and `From` impls. You can +then add accessor methods — readers are generic over `R: LeanRef` (work on both +owned and borrowed), constructors return `LeanOwned`: ```rust impl LeanPutResponse { @@ -96,14 +136,15 @@ impl LeanPutResponse { ### Inductive types and field layout -Extra care must be taken when dealing with [inductive -types](https://lean-lang.org/doc/reference/latest/The-Type-System/Inductive-Types/#run-time-inductives) -as the runtime memory layout of constructor fields may not match the -declaration order in Lean. Fields are reordered into three groups: +Extra care must be taken when dealing with +[inductive types](https://lean-lang.org/doc/reference/latest/The-Type-System/Inductive-Types/#run-time-inductives) +as the runtime memory layout of constructor fields may not match the declaration +order in Lean. Fields are reordered into three groups: 1. Non-scalar fields (`lean_object*`), in declaration order 2. `USize` fields, in declaration order -3. Other scalar fields, in decreasing order by size, then declaration order within each size +3. Other scalar fields, in decreasing order by size, then declaration order + within each size This means a structure like @@ -124,9 +165,10 @@ A constructor's memory looks like: [header (8B)] [object fields (8B each)] [USize fields (8B each)] [scalar data area] ``` -Object fields and USize fields each occupy 8-byte slots. The scalar data area is a -flat region of bytes containing all remaining scalar field values, packed by -descending size. For `MyStruct` (1 object field, 0 USize fields, 13 scalar bytes): +Object fields and USize fields each occupy 8-byte slots. The scalar data area is +a flat region of bytes containing all remaining scalar field values, packed by +descending size. For `MyStruct` (1 object field, 0 USize fields, 13 scalar +bytes): - `u64val` occupies bytes 0–7 of the scalar area - `u32val` occupies bytes 8–11 @@ -137,8 +179,6 @@ setters take `(num_slots, byte_offset)` — `num_slots` is the total number of 8-byte slots (object fields + USize fields) preceding the scalar data area, and `byte_offset` is the position of the field within that area. -Readers are generic over `R: LeanRef`, and constructors return `LeanOwned`: - ```rust impl LeanScalarStruct { pub fn obj(&self) -> LeanBorrowed<'_> { self.as_ctor().get(0) } @@ -166,18 +206,19 @@ sees an opaque type; Rust controls allocation, access, mutation, and cleanup. **Register** an external class exactly once, using `OnceLock` or `LazyLock`. -`ExternalClass::register` calls `lean_register_external_class`, which allocates a -class descriptor with two function pointers: a **finalizer** called when the object's -refcount reaches zero to free the Rust data, and a **foreach** callback for Lean -to traverse any embedded `lean_object*` pointers (usually a no-op for pure Rust data). +`ExternalClass::register` calls `lean_register_external_class`, which allocates +a class descriptor with two function pointers: a **finalizer** called when the +object's refcount reaches zero to free the Rust data, and a **foreach** callback +for Lean to traverse any embedded `lean_object*` pointers (usually a no-op for +pure Rust data). `register_with_drop::()` generates a finalizer that calls `drop(Box::from_raw(ptr.cast::()))` and a no-op foreach — sufficient for any Rust type that doesn't hold Lean objects. -Registration must happen exactly once per type. `LazyLock` (or `OnceLock`) ensures -thread-safe one-time initialization, storing the returned `ExternalClass` in a -`static` for reuse across all allocations: +Registration must happen exactly once per type. `LazyLock` (or `OnceLock`) +ensures thread-safe one-time initialization, storing the returned +`ExternalClass` in a `static` for reuse across all allocations: ```rust use std::sync::LazyLock; @@ -190,7 +231,7 @@ static HASHER_CLASS: LazyLock = ``` **Create** — `LeanExternal::alloc` boxes the value and returns an owned -external object: +reference to the external object: ```rust // Lean: @[extern "rs_hasher_new"] opaque Hasher.new : Unit → Hasher @@ -214,9 +255,9 @@ extern "C" fn rs_hasher_bytes( ``` **Update** — `.get_mut()` returns `Option<&mut T>`, which is `Some` when the -object is exclusively owned (`m_rc == 1`). This enables -in-place mutation without allocating a new external object. When shared `.get_mut()` -returns `None` and instead clones into a new object on write. +object is exclusively owned (`m_rc == 1`). This enables in-place mutation +without allocating a new external object. When shared `.get_mut()` returns +`None` and instead clones into a new object on write. ```rust // Lean: @[extern "rs_hasher_update"] opaque Hasher.update : Hasher → @& ByteArray → Hasher @@ -242,15 +283,14 @@ extern "C" fn rs_hasher_update( - `LeanExternal` — `Drop` calls `lean_dec`. When the refcount reaches zero, Lean calls the class finalizer, which (via `register_with_drop`) runs `drop(Box::from_raw(ptr))` to free the Rust value. -- `LeanExternal>` — no refcount changes, no cleanup. - Use for `@&` parameters. +- `LeanExternal>` — no refcount changes, no cleanup. Use for + `@&` parameters. - Converting to `LeanOwned` (e.g. to store in a ctor field): call `.into()`. - ### FFI function signatures -Use domain types in `extern "C"` function signatures. The ownership type parameter -tells Rust how to handle reference counting: +Use domain types in `extern "C"` function signatures. The ownership type +parameter tells Rust how to handle reference counting: ```rust // Lean: @[extern "process"] def process (xs : @& Array Nat) (n : Nat) : Array Nat @@ -264,42 +304,43 @@ extern "C" fn process( ``` More examples can be found in `src/test_ffi.rs` (Rust FFI implementations) and -`Tests/FFI.lean` (Lean declarations and tests), covering all domain types, scalar -field layouts, external objects, in-place mutation, and ownership patterns. +`Tests/FFI.lean` (Lean declarations and tests), covering all domain types, +scalar field layouts, external objects, in-place mutation, and ownership +patterns. ## In-Place Mutation -Lean's runtime supports in-place mutation when an object is **exclusively owned** -(`m_rc == 1`, single-threaded mode). When shared, the object is copied first. -`LeanRef::is_exclusive()` exposes this check. +Lean's runtime supports in-place mutation when an object is **exclusively +owned** (`m_rc == 1`, single-threaded mode). When shared, the object is copied +first. `LeanRef::is_exclusive()` exposes this check. These methods consume `self` and return a (possibly new) object, mutating in place when exclusive or copying first when shared: ### `LeanArray` -| Method | C equivalent | Description | -|--------|--------------|-------------| +| Method | C equivalent | Description | +| -------------------- | --------------------- | ------------------------------------------------------------------ | | `set(&self, i, val)` | `lean_array_set_core` | Set element (asserts exclusive — use for freshly allocated arrays) | -| `uset(self, i, val)` | `lean_array_uset` | Set element (copies if shared) | -| `push(self, val)` | `lean_array_push` | Append an element | -| `pop(self)` | `lean_array_pop` | Remove the last element | -| `uswap(self, i, j)` | `lean_array_uswap` | Swap elements at `i` and `j` | +| `uset(self, i, val)` | `lean_array_uset` | Set element (copies if shared) | +| `push(self, val)` | `lean_array_push` | Append an element | +| `pop(self)` | `lean_array_pop` | Remove the last element | +| `uswap(self, i, j)` | `lean_array_uswap` | Swap elements at `i` and `j` | ### `LeanByteArray` -| Method | C equivalent | Description | -|--------|--------------|-------------| +| Method | C equivalent | Description | +| ----------------------- | --------------------------- | ----------------------------------------------------------------- | | `set_data(&self, data)` | `lean_sarray_cptr` + memcpy | Bulk write (asserts exclusive — use for freshly allocated arrays) | -| `uset(self, i, val)` | `lean_byte_array_uset` | Set byte (copies if shared) | -| `push(self, val)` | `lean_byte_array_push` | Append a byte | -| `copy(self)` | `lean_copy_byte_array` | Deep copy into a new exclusive array | +| `uset(self, i, val)` | `lean_byte_array_uset` | Set byte (copies if shared) | +| `push(self, val)` | `lean_byte_array_push` | Append a byte | +| `copy(self)` | `lean_copy_byte_array` | Deep copy into a new exclusive array | ### `LeanString` -| Method | C equivalent | Description | -|--------|--------------|-------------| -| `push(self, c)` | `lean_string_push` | Append a UTF-32 character | +| Method | C equivalent | Description | +| --------------------- | -------------------- | ------------------------------------- | +| `push(self, c)` | `lean_string_push` | Append a UTF-32 character | | `append(self, other)` | `lean_string_append` | Concatenate another string (borrowed) | `LeanExternal` also supports in-place mutation via `get_mut()` — see the @@ -309,7 +350,11 @@ place when exclusive or copying first when shared: ### Rust panic behavior -By default, Rust uses stack unwinding for panics. If a panic occurs in a Lean-to-Rust FFI function, the unwinding will try to cross the FFI boundary back into Lean. This is [undefined behavior](https://doc.rust-lang.org/stable/reference/panic.html#unwinding-across-ffi-boundaries). To avoid this, configure Rust to abort on panic in `Cargo.toml`: +By default, Rust uses stack unwinding for panics. If a panic occurs in a +Lean-to-Rust FFI function, the unwinding will try to cross the FFI boundary back +into Lean. This is +[undefined behavior](https://doc.rust-lang.org/stable/reference/panic.html#unwinding-across-ffi-boundaries). +To avoid this, configure Rust to abort on panic in `Cargo.toml`: ```toml [profile.release] @@ -320,22 +365,15 @@ panic = "abort" Lean passes simple enums (inductives where all constructors have zero fields, e.g. `DefKind`, `QuotKind`) as **raw unboxed tag values** (`0`, `1`, `2`, ...) -across the FFI boundary, not as `lean_box(tag)`. Use `LeanOwned::from_enum_tag()` -and `LeanRef::as_enum_tag()` for these. - -### Persistent objects - -Module-level Lean definitions and objects in compact regions are persistent -(`m_rc == 0`). Both `lean_inc_ref` and `lean_dec_ref` are no-ops for persistent -objects, so `LeanOwned`, `LeanBorrowed`, `Clone`, and `Drop` all work correctly -without special handling. +across the FFI boundary, not as `lean_box(tag)`. Use +`LeanOwned::from_enum_tag()` and `LeanRef::as_enum_tag()` for these. ### `lean_string_size` vs `lean_string_byte_size` `lean_string_byte_size` returns the **total object memory size** -(`sizeof(lean_string_object) + capacity`), not the string data length. -Use `lean_string_size` instead, which returns `m_size` — the number of data -bytes including the NUL terminator. `LeanString` wraps these correctly: +(`sizeof(lean_string_object) + capacity`), not the string data length. Use +`lean_string_size` instead, which returns `m_size` — the number of data bytes +including the NUL terminator. `LeanString` wraps these correctly: - `byte_len()` — data bytes excluding NUL (`m_size - 1`) - `length()` — UTF-8 character count (`m_length`) diff --git a/src/nat.rs b/src/nat.rs index 61fa619..4c5a406 100644 --- a/src/nat.rs +++ b/src/nat.rs @@ -58,7 +58,7 @@ impl Nat { self.0.to_bytes_le() } - /// Convert this `Nat` into a Lean `Nat` object (always owned). + /// Convert this `Nat` into a Lean `Nat` (returns an owned reference). pub fn to_lean(&self) -> LeanNat { // Try to get as u64 first if let Some(val) = self.to_u64() { diff --git a/src/object.rs b/src/object.rs index 23afdcf..003ef18 100644 --- a/src/object.rs +++ b/src/object.rs @@ -37,7 +37,7 @@ use tags::*; const IO_ERROR_USER_ERROR_TAG: u8 = 7; // ============================================================================= -// LeanRef trait — shared interface for owned and borrowed pointers +// LeanRef trait — shared interface for owned and borrowed references // ============================================================================= // // lean.h base object header (8 bytes): @@ -86,17 +86,6 @@ pub trait LeanRef: Clone { !self.is_scalar() && unsafe { include::lean_is_persistent(self.as_raw()) } } - /// Produce an owned copy by incrementing the reference count. - /// Safe for persistent objects (m_rc == 0) — `lean_inc_ref` is a no-op when `m_rc == 0`. - #[inline] - fn to_owned_ref(&self) -> LeanOwned { - let ptr = self.as_raw(); - if ptr as usize & 1 != 1 { - unsafe { include::lean_inc_ref(ptr) }; - } - LeanOwned(ptr) - } - /// Unbox a tagged scalar pointer into a `usize`. #[inline] fn unbox_usize(&self) -> usize { @@ -141,16 +130,31 @@ pub trait LeanRef: Clone { } // ============================================================================= -// LeanOwned — Owned Lean object pointer (RAII) +// LeanOwned — Owned reference to a Lean object (RAII) // ============================================================================= -/// Owned reference to a Lean object. +/// An owned reference to a Lean object, in the sense of +/// [Counting Immutable Beans](https://arxiv.org/abs/1908.05647): the holder +/// of an owned reference must call `lean_dec` exactly once. +/// +/// In the Lean C API, owned and borrowed references are both raw `lean_object*` +/// pointers — the distinction is purely a calling convention: +/// an owned reference (`lean_obj_arg`) means the recipient must call `lean_dec` +/// exactly once, while a borrowed reference (`b_lean_obj_arg`) means they must +/// not. Calling `lean_inc` does not create a new pointer; it increments `m_rc` +/// on the existing one. The reference count tracks how many `lean_dec` calls +/// remain before the object is freed. +/// +/// `LeanOwned` wraps a raw `lean_object*` with RAII semantics. **Every +/// `LeanOwned` value will call `lean_dec` exactly once when dropped.** `Clone` +/// calls `lean_inc` to balance the additional `lean_dec` from the new value's +/// `Drop`, and returns a second `LeanOwned` to the same object. /// -/// - `Drop` calls `lean_dec` (with scalar check). -/// - `Clone` calls `lean_inc`. -/// - **Not `Copy`** — ownership is linear. +/// Not `Copy` — passing or assigning a `LeanOwned` moves it (transferring the +/// `lean_dec`); use `.clone()` to create a second owned reference via `lean_inc`. /// -/// Corresponds to `lean_obj_arg` (received) and `lean_obj_res` (returned via repr(transparent)). +/// Corresponds to `lean_obj_arg` (received) and `lean_obj_res` (returned via +/// repr(transparent)). #[repr(transparent)] pub struct LeanOwned(*mut include::lean_object); @@ -202,7 +206,14 @@ impl LeanOwned { /// Consume this wrapper without calling `lean_dec`. /// - /// Use when transferring ownership back to Lean (returning `lean_obj_res`). + /// Use when passing ownership to a Lean C API function that takes + /// `lean_obj_arg` (which will `lean_dec` internally). Without this, + /// both the C function and Rust's `Drop` would `lean_dec`, causing a + /// double-free. + /// + /// Not needed for returning values from `extern "C"` FFI functions — + /// returning a `LeanOwned` directly works because Rust does not call + /// `Drop` on return values. #[inline] pub fn into_raw(self) -> *mut include::lean_object { let ptr = self.0; @@ -300,6 +311,20 @@ impl<'a> LeanBorrowed<'a> { pub unsafe fn from_raw(ptr: *mut include::lean_object) -> Self { Self(ptr, PhantomData) } + + /// Promote this borrowed reference to an owned reference. + /// + /// Calls `lean_inc_ref` to account for the `lean_dec` that the returned + /// [`LeanOwned`]'s `Drop` will perform. + /// No-op for tagged scalars (bit 0 set) and persistent objects (`m_rc == 0`). + #[inline] + pub fn to_owned_ref(&self) -> LeanOwned { + let ptr = self.as_raw(); + if ptr as usize & 1 != 1 { + unsafe { include::lean_inc_ref(ptr) }; + } + LeanOwned(ptr) + } } // ============================================================================= @@ -1749,7 +1774,7 @@ impl From for LeanOwned { // Convenience: as_ctor / as_string / as_array / as_list / as_byte_array // ============================================================================= -/// Helper methods for interpreting a reference as a specific domain type (borrowed view). +/// Helper methods for interpreting a borrowed reference as a specific domain type. impl<'a> LeanBorrowed<'a> { /// Interpret as a constructor object. #[inline] @@ -1788,10 +1813,10 @@ impl<'a> LeanBorrowed<'a> { } // ============================================================================= -// LeanShared — Thread-safe owned Lean object +// LeanShared — Thread-safe owned reference to a Lean object // ============================================================================= -/// Thread-safe owned Lean object with atomic refcounting. +/// Thread-safe owned reference to a Lean object, with atomic refcounting. /// /// Lean objects track refcounts in `m_rc`: /// - `m_rc > 0` → single-threaded (ST): non-atomic inc/dec @@ -1823,7 +1848,7 @@ unsafe impl Send for LeanShared {} unsafe impl Sync for LeanShared {} impl LeanShared { - /// Mark an owned object's entire reachable graph as MT and take ownership. + /// Mark the object's entire reachable graph as MT and wrap as a shared reference. /// /// Persistent objects (`m_rc == 0`) and scalars are unaffected. /// After this call, all refcount operations on the object graph use diff --git a/src/test_ffi.rs b/src/test_ffi.rs index a298f32..f1c86e9 100644 --- a/src/test_ffi.rs +++ b/src/test_ffi.rs @@ -666,10 +666,10 @@ pub(crate) extern "C" fn rs_owned_scalar_sum(ptr: LeanScalarStruct) - // ============================================================================= /// Clone an owned array and return the sum of lengths of both copies. -/// Tests that Clone (lean_inc) produces a valid second handle. +/// Tests that Clone (lean_inc) produces a valid second owned reference. #[unsafe(no_mangle)] pub(crate) extern "C" fn rs_clone_array_len_sum(arr_ptr: LeanArray>) -> usize { - // Create an owned copy, then clone it + // Build an owned array, then clone it let owned: LeanArray = { let nats: Vec = arr_ptr.map(|b| Nat::from_obj(&b)); let arr = LeanArray::alloc(nats.len()); @@ -1035,7 +1035,7 @@ pub(crate) extern "C" fn rs_list_to_array_via_push( // ============================================================================= /// Take a borrowed Nat, convert to owned via to_owned_ref, return it. -/// Tests that to_owned_ref (lean_inc) produces a valid owned handle. +/// Tests that to_owned_ref (lean_inc) produces a valid owned reference. #[unsafe(no_mangle)] pub(crate) extern "C" fn rs_borrow_to_owned(nat: LeanNat>) -> LeanNat { LeanNat::new(nat.inner().to_owned_ref()) @@ -1208,10 +1208,10 @@ pub(crate) extern "C" fn rs_external_set_x( } // ============================================================================= -// External object: multiple field reads from same borrowed handle +// External object: multiple field reads from same borrowed reference // ============================================================================= -/// Read all fields from a single borrowed external handle and return as a string. +/// Read all fields from a single borrowed external reference and return as a string. /// Tests that multiple reads from a borrowed external don't corrupt state. #[unsafe(no_mangle)] pub(crate) extern "C" fn rs_external_all_fields( @@ -1393,7 +1393,7 @@ pub(crate) extern "C" fn rs_shared_parallel_read( ) -> LeanNat { use std::thread; - // Create an owned copy and mark as MT + // Promote to owned reference and mark as MT let shared = LeanShared::new(arr.inner().to_owned_ref()); let mut handles = Vec::new(); From a900ebc34c0be68c23df63f341446cd54e29a479 Mon Sep 17 00:00:00 2001 From: samuelburnham <45365069+samuelburnham@users.noreply.github.com> Date: Thu, 26 Mar 2026 14:56:44 -0400 Subject: [PATCH 3/5] Cleanup --- README.md | 64 +++++++++++++++++++++++++++---------------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index 9b529d6..d9c5a5e 100644 --- a/README.md +++ b/README.md @@ -1,26 +1,22 @@ # lean-ffi -Rust bindings to the `lean.h` Lean C FFI, generated with -[`rust-bindgen`](https://github.com/rust-lang/rust-bindgen). Bindgen runs in -`build.rs` and generates unsafe Rust functions that link to Lean's `lean.h` C -library. This module can be found at -`target/release/build/lean-ffi-/out/lean.rs` after running -`cargo build --release`. - -These bindings are then wrapped in a typed Rust API that models Lean's reference -counting system for owned (`lean_obj_arg`) and borrowed (`b_lean_obj_arg`) -references. +A Rust library that wraps low-level bindings to the +[`lean.h`](https://github.com/leanprover/lean4/blob/master/src/include/lean/lean.h) +Lean C library with a high-level Rust API for safe and ergonomic FFI from Lean +to Rust. This allows the user to focus on the actual Rust logic rather than +manually manipulating pointers and keeping track of Lean reference counts. -## Ownership Model +The Rust bindings are auto-generated with +[`rust-bindgen`](https://github.com/rust-lang/rust-bindgen). Bindgen runs in +`build.rs` and generates unsafe Rust functions that link to `lean.h`. This +module can be found at `target/release/build/lean-ffi-/out/lean.rs` after +running `cargo build --release`. -### Background: Lean C API conventions +## Background: Lean Ownership Model In Lean's C API, a **reference** is a `lean_object*` pointer to the header of a -heap-allocated object. Note that a `lean_object*` can also refer to a tagged -scalar value encoded as a pointer-sized data type, where the low bit (tag) of -the pointer is set to 1. In that case it would not be called a reference. - -References in Lean can either be **owned** or **borrowed**. +heap-allocated object. References in Lean can either be **owned** or +**borrowed**. An **owned reference**, signified by `lean_obj_arg` in C, uses reference counting via the `int m_rc` field of the `lean_object` to determine when to free @@ -36,7 +32,11 @@ be kept alive as long as its parent. This enables the borrowed reference to dispense with reference counting altogether as it will get dropped when going out of scope. -### Rust API +> [!NOTE] A `lean_object*` can also refer to a tagged scalar value encoded as a +> pointer-sized data type, where the low bit (tag) of the pointer is set to 1. +> In that case it would not be called a reference. + +## `lean-ffi` Rust API In order to handle Lean reference counting gracefully in Rust, we use the following types: @@ -50,18 +50,18 @@ following types: - Passing or assigning a `LeanOwned` **moves** it (transferring the `lean_dec`); use `.clone()` to create a second owned reference via `lean_inc`. - - [`into_raw`] consumes the wrapper **without** calling `lean_dec`, for - passing ownership to Lean C API functions that take `lean_obj_arg` (which - will `lean_dec` internally). Not needed for returning values from - `extern "C"` functions — returning `LeanOwned` directly works because Rust - does not call `Drop` on return values. + - `into_raw` consumes the wrapper **without** calling `lean_dec`, for passing + ownership to Lean C API functions that take `lean_obj_arg` (which will + `lean_dec` internally). Not needed for returning values from `extern "C"` + functions — returning `LeanOwned` directly works because Rust does not call + `Drop` on return values. - Tagged scalar values (bit 0 set — small `Nat`, `Bool`, etc.) and persistent objects (`m_rc == 0`) skip refcount operations entirely. - **`LeanBorrowed<'a>`** — A borrowed reference. Corresponds to `b_lean_obj_arg` in the C FFI. Used when Lean declares a parameter with `@&`. - - The `Copy` and `Clone` implementations perform a trivial bitwise copy. Neither - `Clone` nor `Drop` modify the reference count. + - The `Copy` and `Clone` implementations perform a trivial bitwise copy. + Neither `Clone` nor `Drop` modify the reference count. - The lifetime `'a` ties the borrowed reference to the source reference's scope, preventing use-after-free. - Call `.to_owned_ref()` to promote to `LeanOwned` (calls `lean_inc`). @@ -82,7 +82,7 @@ following types: All reference types are safe for persistent objects and compact memory regions (`m_rc == 0`) — `lean_inc_ref` and `lean_dec_ref` are no-ops when `m_rc == 0`. -## Domain Types +### Domain Types Domain types wrap the ownership types to provide type safety at FFI boundaries. Built-in domain types include `LeanArray`, `LeanString`, `LeanCtor`, @@ -90,13 +90,13 @@ Built-in domain types include `LeanArray`, `LeanString`, `LeanCtor`, `LeanProd`, `LeanNat`, `LeanBool`, `LeanByteArray`, and `LeanExternal`. -### Naming convention +#### Naming convention Domain types are prefixed with `Lean` to distinguish them from Lean-side type names and to match the built-in types. For example, a Lean `Point` structure becomes `LeanPoint` in Rust. -### Defining custom domain types +#### Defining custom domain types Use the `lean_domain_type!` macro to define newtypes for your Lean types: @@ -308,7 +308,7 @@ More examples can be found in `src/test_ffi.rs` (Rust FFI implementations) and scalar field layouts, external objects, in-place mutation, and ownership patterns. -## In-Place Mutation +### In-Place Mutation Lean's runtime supports in-place mutation when an object is **exclusively owned** (`m_rc == 1`, single-threaded mode). When shared, the object is copied @@ -317,7 +317,7 @@ first. `LeanRef::is_exclusive()` exposes this check. These methods consume `self` and return a (possibly new) object, mutating in place when exclusive or copying first when shared: -### `LeanArray` +#### `LeanArray` | Method | C equivalent | Description | | -------------------- | --------------------- | ------------------------------------------------------------------ | @@ -327,7 +327,7 @@ place when exclusive or copying first when shared: | `pop(self)` | `lean_array_pop` | Remove the last element | | `uswap(self, i, j)` | `lean_array_uswap` | Swap elements at `i` and `j` | -### `LeanByteArray` +#### `LeanByteArray` | Method | C equivalent | Description | | ----------------------- | --------------------------- | ----------------------------------------------------------------- | @@ -336,7 +336,7 @@ place when exclusive or copying first when shared: | `push(self, val)` | `lean_byte_array_push` | Append a byte | | `copy(self)` | `lean_copy_byte_array` | Deep copy into a new exclusive array | -### `LeanString` +#### `LeanString` | Method | C equivalent | Description | | --------------------- | -------------------- | ------------------------------------- | From da71b899aeb6caf384c64a07e36fefa4f4f74388 Mon Sep 17 00:00:00 2001 From: samuelburnham <45365069+samuelburnham@users.noreply.github.com> Date: Thu, 26 Mar 2026 15:34:31 -0400 Subject: [PATCH 4/5] Address review --- README.md | 89 ++++++++++++++++++++++++++------------------------- src/object.rs | 23 +++++++++---- 2 files changed, 61 insertions(+), 51 deletions(-) diff --git a/README.md b/README.md index d9c5a5e..f09dbe6 100644 --- a/README.md +++ b/README.md @@ -48,13 +48,13 @@ following types: reference to the same object. `Copy` is not implemented. - The `Drop` implementation calls `lean_dec` automatically on scope exit. - Passing or assigning a `LeanOwned` **moves** it (transferring the - `lean_dec`); use `.clone()` to create a second owned reference via + `lean_dec`); use `self.clone()` to create a second owned reference via `lean_inc`. - - `into_raw` consumes the wrapper **without** calling `lean_dec`, for passing - ownership to Lean C API functions that take `lean_obj_arg` (which will - `lean_dec` internally). Not needed for returning values from `extern "C"` - functions — returning `LeanOwned` directly works because Rust does not call - `Drop` on return values. + - `self.into_raw()` consumes the wrapper **without** calling `lean_dec`, for + passing ownership to Lean C API functions that take `lean_obj_arg` (which + will `lean_dec` internally). Not needed for returning values from + `extern "C"` functions — returning `LeanOwned` directly works because Rust + does not call `Drop` on return values. - Tagged scalar values (bit 0 set — small `Nat`, `Bool`, etc.) and persistent objects (`m_rc == 0`) skip refcount operations entirely. @@ -64,7 +64,7 @@ following types: Neither `Clone` nor `Drop` modify the reference count. - The lifetime `'a` ties the borrowed reference to the source reference's scope, preventing use-after-free. - - Call `.to_owned_ref()` to promote to `LeanOwned` (calls `lean_inc`). + - Call `self.to_owned_ref()` to promote to `LeanOwned` (calls `lean_inc`). - Note: The `b_lean_obj_res` type is used when returning a borrowed reference in C, but returning it and `LeanBorrowed` are only used internally as Lean expects owned references at the FFI boundary. @@ -76,11 +76,12 @@ following types: `into_owned()` to unwrap back to `LeanOwned`. - **`LeanRef`** — Trait implemented by `LeanOwned`, `LeanBorrowed`, and - `LeanShared`, providing shared read-only operations like `as_raw()`, - `is_scalar()`, `tag()`, and unboxing methods. + `LeanShared`, providing shared read-only operations like `self.as_raw()`, + `self.is_scalar()`, `self.tag()`, and unboxing methods. All reference types are safe for persistent objects and compact memory regions -(`m_rc == 0`) — `lean_inc_ref` and `lean_dec_ref` are no-ops when `m_rc == 0`. +(`m_rc == 0`) — `lean_inc_ref()` and `lean_dec_ref()` are no-ops when +`m_rc == 0`. ### Domain Types @@ -110,9 +111,9 @@ lean_ffi::lean_domain_type! { ``` This generates a `#[repr(transparent)]` wrapper with `Clone`, `Copy` for -`LeanBorrowed`, `inner()`, `as_raw()`, `into_raw()`, and `From` impls. You can -then add accessor methods — readers are generic over `R: LeanRef` (work on both -owned and borrowed), constructors return `LeanOwned`: +`LeanBorrowed`, `self.inner()`, `self.as_raw()`, `self.into_raw()`, and `From` +impls. You can then add accessor methods — readers are generic over `R: LeanRef` +(work on both owned and borrowed), constructors return `LeanOwned`: ```rust impl LeanPutResponse { @@ -206,13 +207,13 @@ sees an opaque type; Rust controls allocation, access, mutation, and cleanup. **Register** an external class exactly once, using `OnceLock` or `LazyLock`. -`ExternalClass::register` calls `lean_register_external_class`, which allocates -a class descriptor with two function pointers: a **finalizer** called when the -object's refcount reaches zero to free the Rust data, and a **foreach** callback -for Lean to traverse any embedded `lean_object*` pointers (usually a no-op for -pure Rust data). +`ExternalClass::register()` calls `lean_register_external_class`, which +allocates a class descriptor with two function pointers: a **finalizer** called +when the object's refcount reaches zero to free the Rust data, and a **foreach** +callback that `lean_mark_persistent` and `lean_mark_mt` use to traverse any +embedded `lean_object*` pointers (usually a no-op for pure Rust data). -`register_with_drop::()` generates a finalizer that calls +`ExternalClass::register_with_drop::()` generates a finalizer that calls `drop(Box::from_raw(ptr.cast::()))` and a no-op foreach — sufficient for any Rust type that doesn't hold Lean objects. @@ -230,7 +231,7 @@ static HASHER_CLASS: LazyLock = LazyLock::new(ExternalClass::register_with_drop::); ``` -**Create** — `LeanExternal::alloc` boxes the value and returns an owned +**Create** — `LeanExternal::alloc()` boxes the value and returns an owned reference to the external object: ```rust @@ -241,8 +242,8 @@ extern "C" fn rs_hasher_new(_unit: LeanOwned) -> LeanExternal } ``` -**Read** — `.get()` borrows the stored `&T`. Works on both owned and borrowed -references: +**Read** — `self.get()` borrows the stored `&T`. Works on both owned and +borrowed references: ```rust // Lean: @[extern "rs_hasher_bytes"] opaque Hasher.bytes : @& Hasher → ByteArray @@ -254,9 +255,9 @@ extern "C" fn rs_hasher_bytes( } ``` -**Update** — `.get_mut()` returns `Option<&mut T>`, which is `Some` when the +**Update** — `self.get_mut()` returns `Option<&mut T>`, which is `Some` when the object is exclusively owned (`m_rc == 1`). This enables in-place mutation -without allocating a new external object. When shared `.get_mut()` returns +without allocating a new external object. When shared `self.get_mut()` returns `None` and instead clones into a new object on write. ```rust @@ -319,29 +320,29 @@ place when exclusive or copying first when shared: #### `LeanArray` -| Method | C equivalent | Description | -| -------------------- | --------------------- | ------------------------------------------------------------------ | -| `set(&self, i, val)` | `lean_array_set_core` | Set element (asserts exclusive — use for freshly allocated arrays) | -| `uset(self, i, val)` | `lean_array_uset` | Set element (copies if shared) | -| `push(self, val)` | `lean_array_push` | Append an element | -| `pop(self)` | `lean_array_pop` | Remove the last element | -| `uswap(self, i, j)` | `lean_array_uswap` | Swap elements at `i` and `j` | +| Method | C equivalent | Description | +| ------------------------- | --------------------- | ------------------------------------------------------------------ | +| `self.set(&self, i, val)` | `lean_array_set_core` | Set element (asserts exclusive — use for freshly allocated arrays) | +| `self.uset(self, i, val)` | `lean_array_uset` | Set element (copies if shared) | +| `self.push(self, val)` | `lean_array_push` | Append an element | +| `self.pop(self)` | `lean_array_pop` | Remove the last element | +| `self.uswap(self, i, j)` | `lean_array_uswap` | Swap elements at `i` and `j` | #### `LeanByteArray` -| Method | C equivalent | Description | -| ----------------------- | --------------------------- | ----------------------------------------------------------------- | -| `set_data(&self, data)` | `lean_sarray_cptr` + memcpy | Bulk write (asserts exclusive — use for freshly allocated arrays) | -| `uset(self, i, val)` | `lean_byte_array_uset` | Set byte (copies if shared) | -| `push(self, val)` | `lean_byte_array_push` | Append a byte | -| `copy(self)` | `lean_copy_byte_array` | Deep copy into a new exclusive array | +| Method | C equivalent | Description | +| ---------------------------- | --------------------------- | ----------------------------------------------------------------- | +| `self.set_data(&self, data)` | `lean_sarray_cptr` + memcpy | Bulk write (asserts exclusive — use for freshly allocated arrays) | +| `self.uset(self, i, val)` | `lean_byte_array_uset` | Set byte (copies if shared) | +| `self.push(self, val)` | `lean_byte_array_push` | Append a byte | +| `self.copy(self)` | `lean_copy_byte_array` | Deep copy into a new exclusive array | #### `LeanString` -| Method | C equivalent | Description | -| --------------------- | -------------------- | ------------------------------------- | -| `push(self, c)` | `lean_string_push` | Append a UTF-32 character | -| `append(self, other)` | `lean_string_append` | Concatenate another string (borrowed) | +| Method | C equivalent | Description | +| -------------------------- | -------------------- | ------------------------------------- | +| `self.push(self, c)` | `lean_string_push` | Append a UTF-32 character | +| `self.append(self, other)` | `lean_string_append` | Concatenate another string (borrowed) | `LeanExternal` also supports in-place mutation via `get_mut()` — see the **Update** section under [External objects](#external-objects-leanexternalt-r). @@ -375,9 +376,9 @@ across the FFI boundary, not as `lean_box(tag)`. Use `lean_string_size` instead, which returns `m_size` — the number of data bytes including the NUL terminator. `LeanString` wraps these correctly: -- `byte_len()` — data bytes excluding NUL (`m_size - 1`) -- `length()` — UTF-8 character count (`m_length`) -- `as_str()` — view as `&str` +- `self.byte_len()` — data bytes excluding NUL (`m_size - 1`) +- `self.length()` — UTF-8 character count (`m_length`) +- `self.as_str()` — view as `&str` ## References diff --git a/src/object.rs b/src/object.rs index 003ef18..ac4158f 100644 --- a/src/object.rs +++ b/src/object.rs @@ -1,11 +1,11 @@ -//! Ownership-aware wrappers for Lean FFI object pointers. +//! Ownership-aware wrappers for Lean FFI references. //! -//! The two core pointer types are: +//! The two core reference types are: //! - [`LeanOwned`]: Owned reference. `Drop` calls `lean_dec`, `Clone` calls `lean_inc`. Not `Copy`. //! - [`LeanBorrowed`]: Borrowed reference. `Copy`, no `Drop`, lifetime-bounded. //! //! Domain types like [`LeanArray`], [`LeanCtor`], etc. are generic over `R: LeanRef`, -//! inheriting ownership semantics from the inner pointer type. +//! inheriting ownership semantics from the inner reference type. use std::marker::PhantomData; use std::mem::ManuallyDrop; @@ -272,7 +272,7 @@ impl LeanOwned { } // ============================================================================= -// LeanBorrowed — Borrowed Lean object pointer +// LeanBorrowed — Borrowed reference to a Lean object // ============================================================================= /// Borrowed reference to a Lean object. @@ -1188,7 +1188,7 @@ impl LeanExternal { } impl<'a, T> LeanExternal> { - /// Wrap a raw pointer as a borrowed external object. + /// Wrap a raw pointer as a borrowed reference to an external object. /// /// # Safety /// The pointer must be a valid Lean external object whose data pointer @@ -1215,6 +1215,11 @@ impl From> for LeanOwned { // ============================================================================= /// A registered Lean external class (wraps `lean_external_class*`). +/// +/// A "class" is a pair of function pointers (finalizer + foreach) shared by +/// all external objects of the same Rust type. Created once via +/// [`register`](Self::register) or [`register_with_drop`](Self::register_with_drop) +/// and stored in a `static`. pub struct ExternalClass(*mut include::lean_external_class); // Safety: the class pointer is initialized once and read-only thereafter. @@ -1226,7 +1231,10 @@ impl ExternalClass { /// /// # Safety /// The `finalizer` callback must correctly free the external data, and - /// `foreach` must correctly visit any Lean object references held by the data. + /// `foreach` must visit any `lean_object*` pointers held by the data so that + /// `lean_mark_persistent` and `lean_mark_mt` can traverse the full object + /// graph. Only called during persistent/MT marking, not during normal + /// deallocation. pub unsafe fn register( finalizer: include::lean_external_finalize_proc, foreach: include::lean_external_foreach_proc, @@ -1235,7 +1243,8 @@ impl ExternalClass { } /// Register a new external class that uses `Drop` to finalize `T` - /// and has no Lean object references to visit. + /// and provides a no-op foreach (suitable when `T` holds no `lean_object*` + /// pointers). pub fn register_with_drop() -> Self { unsafe extern "C" fn drop_finalizer(ptr: *mut std::ffi::c_void) { if !ptr.is_null() { From f860a163da34f54c6bc86649cca06118ed4653b4 Mon Sep 17 00:00:00 2001 From: samuelburnham <45365069+samuelburnham@users.noreply.github.com> Date: Fri, 27 Mar 2026 10:03:17 -0400 Subject: [PATCH 5/5] Address review --- README.md | 71 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 37 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index f09dbe6..2482799 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,8 @@ A Rust library that wraps low-level bindings to the [`lean.h`](https://github.com/leanprover/lean4/blob/master/src/include/lean/lean.h) -Lean C library with a high-level Rust API for safe and ergonomic FFI from Lean -to Rust. This allows the user to focus on the actual Rust logic rather than +Lean C library with a high-level API for safe and ergonomic FFI from Lean to +Rust. This allows the user to focus on the actual Rust logic rather than manually manipulating pointers and keeping track of Lean reference counts. The Rust bindings are auto-generated with @@ -18,21 +18,24 @@ In Lean's C API, a **reference** is a `lean_object*` pointer to the header of a heap-allocated object. References in Lean can either be **owned** or **borrowed**. -An **owned reference**, signified by `lean_obj_arg` in C, uses reference -counting via the `int m_rc` field of the `lean_object` to determine when to free -the underlying object. Before a new reference to the object is created, the Lean -compiler inserts a `lean_inc` call to increment the ref count. When the -reference goes out of scope, the Lean compiler inserts a `lean_dec` call to -decrement the ref count. When `m_rc` reaches 0, the Lean runtime frees the -object. - -A **borrowed reference**, signified by `@&` in Lean and `b_lean_obj_arg` in C, -inherits the reference count of a surrounding owned reference, and is assumed to -be kept alive as long as its parent. This enables the borrowed reference to -dispense with reference counting altogether as it will get dropped when going -out of scope. - -> [!NOTE] A `lean_object*` can also refer to a tagged scalar value encoded as a +An **owned reference** is a `lean_object*` that participates in reference +counting via the `int m_rc` field. Before a new reference to the object is +created, the Lean compiler inserts a `lean_inc` call to increment the ref count. +When the reference goes out of scope, the Lean compiler inserts a `lean_dec` +call to decrement the ref count. When `m_rc` reaches 0, the Lean runtime frees +the object. In C the conventional type alias for an owned reference is +`lean_obj_arg` for function parameters and `lean_obj_res` for return values. + +A **borrowed reference**, signified by `@&` in a Lean function parameter, is a +`lean_object*` for which the compiler does not emit `lean_inc` or `lean_dec` +calls, relying on a surrounding owned reference to keep the object alive. This +is more efficient for cases when the object is known to outlive the borrowed +reference, e.g. reading a constructor field. In C the conventional type alias +for a borrowed reference is `b_lean_obj_arg` for function parameters and +`b_lean_obj_res` for return values. + +> [!NOTE] +> A `lean_object*` can also refer to a tagged scalar value encoded as a > pointer-sized data type, where the low bit (tag) of the pointer is set to 1. > In that case it would not be called a reference. @@ -320,29 +323,29 @@ place when exclusive or copying first when shared: #### `LeanArray` -| Method | C equivalent | Description | -| ------------------------- | --------------------- | ------------------------------------------------------------------ | -| `self.set(&self, i, val)` | `lean_array_set_core` | Set element (asserts exclusive — use for freshly allocated arrays) | -| `self.uset(self, i, val)` | `lean_array_uset` | Set element (copies if shared) | -| `self.push(self, val)` | `lean_array_push` | Append an element | -| `self.pop(self)` | `lean_array_pop` | Remove the last element | -| `self.uswap(self, i, j)` | `lean_array_uswap` | Swap elements at `i` and `j` | +| Method | C equivalent | Description | +| ------------------- | --------------------- | ------------------------------------------------------------------ | +| `self.set(i, val)` | `lean_array_set_core` | Set element (asserts exclusive — use for freshly allocated arrays) | +| `self.uset(i, val)` | `lean_array_uset` | Set element (copies if shared) | +| `self.push(val)` | `lean_array_push` | Append an element | +| `self.pop(self)` | `lean_array_pop` | Remove the last element | +| `self.uswap(i, j)` | `lean_array_uswap` | Swap elements at `i` and `j` | #### `LeanByteArray` -| Method | C equivalent | Description | -| ---------------------------- | --------------------------- | ----------------------------------------------------------------- | -| `self.set_data(&self, data)` | `lean_sarray_cptr` + memcpy | Bulk write (asserts exclusive — use for freshly allocated arrays) | -| `self.uset(self, i, val)` | `lean_byte_array_uset` | Set byte (copies if shared) | -| `self.push(self, val)` | `lean_byte_array_push` | Append a byte | -| `self.copy(self)` | `lean_copy_byte_array` | Deep copy into a new exclusive array | +| Method | C equivalent | Description | +| --------------------- | --------------------------- | ----------------------------------------------------------------- | +| `self.set_data(data)` | `lean_sarray_cptr` + memcpy | Bulk write (asserts exclusive — use for freshly allocated arrays) | +| `self.uset(i, val)` | `lean_byte_array_uset` | Set byte (copies if shared) | +| `self.push(val)` | `lean_byte_array_push` | Append a byte | +| `self.copy()` | `lean_copy_byte_array` | Deep copy into a new exclusive array | #### `LeanString` -| Method | C equivalent | Description | -| -------------------------- | -------------------- | ------------------------------------- | -| `self.push(self, c)` | `lean_string_push` | Append a UTF-32 character | -| `self.append(self, other)` | `lean_string_append` | Concatenate another string (borrowed) | +| Method | C equivalent | Description | +| -------------------- | -------------------- | ------------------------------------- | +| `self.push(c)` | `lean_string_push` | Append a UTF-32 character | +| `self.append(other)` | `lean_string_append` | Concatenate another string (borrowed) | `LeanExternal` also supports in-place mutation via `get_mut()` — see the **Update** section under [External objects](#external-objects-leanexternalt-r).